From 1520c43db59686bcda443be6f3130dfc64652da7 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 26 Apr 2024 13:34:18 -0700 Subject: [PATCH 01/22] Add lindi read helper functions --- src/nwb_benchmarks/core/_streaming.py | 35 +++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index f22b7ed..b7274e9 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -1,3 +1,4 @@ +import json import tempfile import time import warnings @@ -5,6 +6,7 @@ import fsspec import h5py +import lindi import pynwb import remfile from fsspec.asyn import reset_lock @@ -179,3 +181,36 @@ def read_hdf5_nwbfile_ros3(s3_url: str, retry: bool = True) -> Tuple[pynwb.NWBFi retries = None nwbfile = io.read() return (nwbfile, io, retries) + + +def create_lindi_reference_file_system(s3_url: str, outfile_path: str): + """ + Create a lindi reference file system JSON cache file for a given HDF5 file on S3 (or locally) + + The output_file path should end in the '.lindi.json' extension + """ + # Create a read-only Zarr store as a wrapper for the h5 file + store = lindi.LindiH5ZarrStore.from_file(s3_url) + # Generate a reference file system + rfs = store.to_reference_file_system() + # Save it to a file for later use + with open(outfile_path, "w") as f: + json.dump(rfs, f, indent=2) + + +def read_hdf5_lindi(s3_url: str) -> lindi.LindiH5pyFile: + """Open an HDF5 file from an S3 URL using Lindi.""" + # TODO: Example URL of a remote .nwb.lindi.json file that we can use for initial test setup + # url = 'https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json' + # Load the h5py-like client for the reference file system + client = lindi.LindiH5pyFile.from_reference_file_system(s3_url) + return client + + +def read_hdf5_nwbfile_lindi(s3_url: str) -> Tuple[pynwb.NWBFile, pynwb.NWBHDF5IO, lindi.LindiH5pyFile]: + """Read an HDF5 NWB file from an S3 URL using the ROS3 driver from h5py.""" + client = read_hdf5_lindi(s3_url=s3_url) + # Open using pynwb + io = pynwb.NWBHDF5IO(file=client, mode="r") + nwbfile = io.read() + return (nwbfile, io, client) From 359e69f10c182a2e57428286290bbe943e67d32c Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 26 Apr 2024 13:35:42 -0700 Subject: [PATCH 02/22] Add module docstring --- src/nwb_benchmarks/core/_streaming.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index b7274e9..3f251bb 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -1,3 +1,8 @@ +""" +Module with helper functions for streaming read access to remote files using various different methods, e.g, +fsspec, remfile, ros3, lindi +""" + import json import tempfile import time From 9d252f615e0e40340c78011f9425b5aa121a66e3 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 26 Apr 2024 22:39:20 -0700 Subject: [PATCH 03/22] Add LINDI time_remote_file_reading benchmarks for NWB files --- .../benchmarks/time_remote_file_reading.py | 108 ++++++++++++++++++ src/nwb_benchmarks/core/__init__.py | 6 + src/nwb_benchmarks/core/_streaming.py | 19 ++- 3 files changed, 127 insertions(+), 6 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index c06198b..6ed22fa 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -1,11 +1,16 @@ """Basic benchmarks for timing streaming access of NWB files and their contents.""" +import os + from nwb_benchmarks.core import ( + create_lindi_reference_file_system, get_s3_url, read_hdf5_fsspec_no_cache, read_hdf5_fsspec_with_cache, + read_hdf5_lindi, read_hdf5_nwbfile_fsspec_no_cache, read_hdf5_nwbfile_fsspec_with_cache, + read_hdf5_nwbfile_lindi, read_hdf5_nwbfile_remfile, read_hdf5_nwbfile_remfile_with_cache, read_hdf5_nwbfile_ros3, @@ -24,6 +29,18 @@ "https://dandiarchive.s3.amazonaws.com/ros3test.nwb", # The original small test NWB file ] +# Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on +# the remote server (i.e., we create the LINDI JSON file for these in these tests) +lindi_hdf5_param_names = param_names +lindi_hdf5_params = params + +# Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not +# to create the JSON but can load it directly from the remote store +lindi_remote_rfs_param_names = param_names +lindi_remote_rfs_params = [ + "https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json" +] + class DirectFileReadBenchmark: """ @@ -95,3 +112,94 @@ def time_read_hdf5_nwbfile_remfile_with_cache(self, s3_url: str): def time_read_hdf5_nwbfile_ros3(self, s3_url: str): self.nwbfile, self.io, _ = read_hdf5_nwbfile_ros3(s3_url=s3_url, retry=False) + + +class LindiFileReadLocalReferenceFileSystemBenchmark: + """ + Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi + filesystem is available locally. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_hdf5_param_names + params = lindi_hdf5_params + + def setup(self, s3_url: str): + """Create the local JSON LINDI reference filesystem if it does not exist""" + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" + if not os.path.exists(self.lindi_file): + create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) + + def time_read_lindi_nwbfile(self, s3_url: str): + """Read the NWB file with pynwb using LINDI with the local reference filesystem JSON""" + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file) + + def time_read_lindi_jsonrfs(self, s3_url: str): + """Read the NWB file with LINDI directly using the local reference filesystem JSON""" + self.client = read_hdf5_lindi(rfs=self.lindi_file) + + +class NWBLindiFileCreateLocalReferenceFileSystemBenchmark: + """ + Time the creation of a local Lindi JSON reference filesystem for a remote NWB file + as well as reading the NWB file with PyNWB when the local reference filesystem does not + yet exist. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_hdf5_param_names + params = lindi_hdf5_params + + def setup(self, s3_url: str): + """Clear the LINDI JSON if it still exists""" + self.teardown(s3_url=s3_url) + + def teardown(self, s3_url: str): + """Clear the LINDI JSON if it still exists""" + if os.path.exists(self.lindi_file): + os.remove(self.lindi_file) + + def time_create_lindi_referernce_file_system(self, s3_url: str): + """Create a local Lindi JSON reference filesystem from a remote HDF5 file""" + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" + create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) + + def time_create_lindi_referernce_file_system_and_read_nwbfile(self, s3_url: str): + """ + Create a local Lindi JSON reference filesystem from a remote HDF5 file + and then read the NWB file with PyNWB using LINDI with the local JSON + """ + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" + create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file) + + def time_create_lindi_referernce_file_system_and_read_jsonrfs(self, s3_url: str): + """ + Create a local Lindi JSON reference filesystem from a remote HDF5 file and + then read the HDF5 file with LINDI using the local JSON. + """ + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" + create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) + self.client = read_hdf5_lindi(rfs=self.lindi_file) + + +class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: + """ + Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi + filesystem is available locally. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_remote_rfs_param_names + params = lindi_remote_rfs_params + + def time_read_lindi_nwbfile(self, s3_url: str): + """Read a remote NWB file with PyNWB using the remote LINDI JSON reference filesystem""" + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url) + + def time_read_lindi_jsonrfs(self, s3_url: str): + """Read a remote HDF5 file with LINDI using the remote LINDI JSON reference filesystem""" + self.client = read_hdf5_lindi(rfs=self.lindi_file) diff --git a/src/nwb_benchmarks/core/__init__.py b/src/nwb_benchmarks/core/__init__.py index 64a44de..42c1db1 100644 --- a/src/nwb_benchmarks/core/__init__.py +++ b/src/nwb_benchmarks/core/__init__.py @@ -7,10 +7,13 @@ from ._network_tracker import network_activity_tracker from ._nwb_helpers import get_object_by_name from ._streaming import ( + create_lindi_reference_file_system, read_hdf5_fsspec_no_cache, read_hdf5_fsspec_with_cache, + read_hdf5_lindi, read_hdf5_nwbfile_fsspec_no_cache, read_hdf5_nwbfile_fsspec_with_cache, + read_hdf5_nwbfile_lindi, read_hdf5_nwbfile_remfile, read_hdf5_nwbfile_remfile_with_cache, read_hdf5_nwbfile_ros3, @@ -35,6 +38,9 @@ "read_hdf5_remfile_with_cache", "read_hdf5_nwbfile_remfile", "read_hdf5_nwbfile_remfile_with_cache", + "create_lindi_reference_file_system", + "read_hdf5_lindi", + "read_hdf5_nwbfile_lindi", "get_s3_url", "get_object_by_name", "robust_ros3_read", diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index 3f251bb..0d2b018 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -203,18 +203,25 @@ def create_lindi_reference_file_system(s3_url: str, outfile_path: str): json.dump(rfs, f, indent=2) -def read_hdf5_lindi(s3_url: str) -> lindi.LindiH5pyFile: - """Open an HDF5 file from an S3 URL using Lindi.""" +def read_hdf5_lindi(rfs: Union[dict, str]) -> lindi.LindiH5pyFile: + """Open an HDF5 file from an S3 URL using Lindi. + + :param rfs: The LINDI reference file system file. This can be a dictionary or a URL or path to a .lindi.json file. + """ # TODO: Example URL of a remote .nwb.lindi.json file that we can use for initial test setup # url = 'https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json' # Load the h5py-like client for the reference file system - client = lindi.LindiH5pyFile.from_reference_file_system(s3_url) + client = lindi.LindiH5pyFile.from_reference_file_system(rfs) return client -def read_hdf5_nwbfile_lindi(s3_url: str) -> Tuple[pynwb.NWBFile, pynwb.NWBHDF5IO, lindi.LindiH5pyFile]: - """Read an HDF5 NWB file from an S3 URL using the ROS3 driver from h5py.""" - client = read_hdf5_lindi(s3_url=s3_url) +def read_hdf5_nwbfile_lindi(rfs: Union[dict, str]) -> Tuple[pynwb.NWBFile, pynwb.NWBHDF5IO, lindi.LindiH5pyFile]: + """ + Read an HDF5 NWB file from an S3 URL using the ROS3 driver from h5py. + + :param rfs: The LINDI reference file system file. This can be a dictionary or a URL or path to a .lindi.json file. + """ + client = read_hdf5_lindi(s3_url=rfs) # Open using pynwb io = pynwb.NWBHDF5IO(file=client, mode="r") nwbfile = io.read() From 19f9d7462c52352b5307b6b5a9c4ec67c3af26d1 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 26 Apr 2024 22:48:57 -0700 Subject: [PATCH 04/22] Add LINDI time_remote_slicing benchmark --- .../benchmarks/time_remote_slicing.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py index ad1f5d4..02e3686 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py @@ -6,13 +6,12 @@ from nwb_benchmarks.core import ( get_object_by_name, get_s3_url, - read_hdf5_fsspec_with_cache, read_hdf5_nwbfile_fsspec_no_cache, read_hdf5_nwbfile_fsspec_with_cache, + read_hdf5_nwbfile_lindi, read_hdf5_nwbfile_remfile, read_hdf5_nwbfile_remfile_with_cache, read_hdf5_nwbfile_ros3, - read_hdf5_remfile_with_cache, ) # TODO: add the others @@ -30,6 +29,15 @@ ) +# Parameters for LINDI pointing to a remote LINDI reference file system JSON file +lindi_remote_rfs_param_names = param_names +lindi_remote_rfs_params = [ + ["https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json"], + ["accelerometer"], + [(slice(0, 30_000), slice(0, 3))], +] + + class FsspecNoCacheContinuousSliceBenchmark: rounds = 1 repeat = 3 @@ -118,3 +126,24 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): """Note: store as self._temp to avoid tracking garbage collection as well.""" self.data_to_slice[slice_range] + + +class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: + """ + Time the read of a data slice from a remote NWB file with pynwb using lindi with a remote JSON reference + filesystem available. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_remote_rfs_param_names + params = lindi_remote_rfs_params + + def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url) + self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) + self.data_to_slice = self.neurodata_object.data + + def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + """Note: store as self._temp to avoid tracking garbage collection as well.""" + self.data_to_slice[slice_range] From 134b385a25ae03a32b263a5ab21293569085e15c Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 26 Apr 2024 22:57:04 -0700 Subject: [PATCH 05/22] Add LINDI network_tracking_remote_slicing benchmark --- .../network_tracking_remote_slicing.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py index 7bd2ec3..475badd 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py @@ -9,6 +9,7 @@ network_activity_tracker, read_hdf5_nwbfile_fsspec_no_cache, read_hdf5_nwbfile_fsspec_with_cache, + read_hdf5_nwbfile_lindi, read_hdf5_nwbfile_remfile, read_hdf5_nwbfile_remfile_with_cache, read_hdf5_nwbfile_ros3, @@ -27,6 +28,13 @@ [(slice(0, 30_000), slice(0, 384))], # ~23 MB ) +# Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not +# to create the JSON but can load it directly from the remote store +lindi_remote_rfs_param_names = param_names +lindi_remote_rfs_params = [ + "https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json" +] + class FsspecNoCacheContinuousSliceBenchmark: param_names = param_names @@ -109,6 +117,30 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: - self._temp, retries = robust_ros3_read(command=self.data_to_slice.__getitem__, command_args=(slice_range,)) + self._temp, self.retries = robust_ros3_read( + command=self.data_to_slice.__getitem__, command_args=(slice_range,) + ) network_tracker.asv_network_statistics.update(retries=retries) return network_tracker.asv_network_statistics + + +class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark: + """ + Time the read of a data slice from a remote NWB file with pynwb using lindi with a remote JSON reference + filesystem available. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_remote_rfs_param_names + params = lindi_remote_rfs_params + + def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url) + self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) + self.data_to_slice = self.neurodata_object.data + + def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: + self._temp = self.data_to_slice[slice_range] + return network_tracker.asv_network_statistics From c49149555f7da4755fd2b3cf974487a9d25d0127 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 26 Apr 2024 23:07:01 -0700 Subject: [PATCH 06/22] Add LINDI network_tracking_file_reading benchmark --- .../network_tracking_remote_file_reading.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index f48b25f..c3edc5c 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -1,13 +1,18 @@ """Basic benchmarks for profiling network statistics for streaming access to NWB files and their contents.""" +import os + from nwb_benchmarks import TSHARK_PATH from nwb_benchmarks.core import ( + create_lindi_reference_file_system, get_s3_url, network_activity_tracker, read_hdf5_fsspec_no_cache, read_hdf5_fsspec_with_cache, + read_hdf5_lindi, read_hdf5_nwbfile_fsspec_no_cache, read_hdf5_nwbfile_fsspec_with_cache, + read_hdf5_nwbfile_lindi, read_hdf5_nwbfile_remfile, read_hdf5_nwbfile_remfile_with_cache, read_hdf5_nwbfile_ros3, @@ -26,6 +31,18 @@ "https://dandiarchive.s3.amazonaws.com/ros3test.nwb", # The original small test NWB file ] +# Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on +# the remote server (i.e., we create the LINDI JSON file for these in these tests) +lindi_hdf5_param_names = param_names +lindi_hdf5_params = params + +# Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not +# to create the JSON but can load it directly from the remote store +lindi_remote_rfs_param_names = param_names +lindi_remote_rfs_params = [ + "https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json" +] + class FsspecNoCacheDirectFileReadBenchmark: param_names = param_names @@ -143,3 +160,97 @@ def track_network_activity_during_read(self, s3_url: str): self.nwbfile, self.io, retries = read_hdf5_nwbfile_ros3(s3_url=s3_url) network_tracker.asv_network_statistics.update(retries=retries) return network_tracker.asv_network_statistics + + +class LindiFileReadLocalReferenceFileSystemBenchmark: + """ + Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi + filesystem is available locally. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_hdf5_param_names + params = lindi_hdf5_params + + def setup(self, s3_url: str): + """Create the local JSON LINDI reference filesystem if it does not exist""" + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" + if not os.path.exists(self.lindi_file): + create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) + + def track_network_activity_during_read_lindi_nwbfile(self, s3_url: str): + """Read the NWB file with pynwb using LINDI with the local reference filesystem JSON""" + with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file) + return network_tracker.asv_network_statistics + + def track_network_activity_during_read_lindi_jsonrfs(self, s3_url: str): + """Read the NWB file with LINDI directly using the local reference filesystem JSON""" + with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: + self.client = read_hdf5_lindi(rfs=self.lindi_file) + return network_tracker.asv_network_statistics + + +class NWBLindiFileCreateLocalReferenceFileSystemBenchmark: + """ + Time the creation of a local Lindi JSON reference filesystem for a remote NWB file + as well as reading the NWB file with PyNWB when the local reference filesystem does not + yet exist. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_hdf5_param_names + params = lindi_hdf5_params + + def setup(self, s3_url: str): + """Clear the LINDI JSON if it still exists""" + self.teardown(s3_url=s3_url) + + def teardown(self, s3_url: str): + """Clear the LINDI JSON if it still exists""" + if os.path.exists(self.lindi_file): + os.remove(self.lindi_file) + + def track_network_activity_create_lindi_referernce_file_system(self, s3_url: str): + """Create a local Lindi JSON reference filesystem from a remote HDF5 file""" + with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" + create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) + return network_tracker.asv_network_statistics + + def track_network_activity_create_lindi_referernce_file_system_and_read_nwbfile(self, s3_url: str): + """ + Create a local Lindi JSON reference filesystem from a remote HDF5 file + and then read the NWB file with PyNWB using LINDI with the local JSON + """ + with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" + create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file) + return network_tracker.asv_network_statistics + + +class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: + """ + Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi + filesystem is available locally. + """ + + rounds = 1 + repeat = 3 + param_names = lindi_remote_rfs_param_names + params = lindi_remote_rfs_params + + def track_network_activity_time_read_lindi_nwbfile(self, s3_url: str): + """Read a remote NWB file with PyNWB using the remote LINDI JSON reference filesystem""" + with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: + self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url) + return network_tracker.asv_network_statistics + + def track_network_activity_time_read_lindi_jsonrfs(self, s3_url: str): + """Read a remote HDF5 file with LINDI using the remote LINDI JSON reference filesystem""" + with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: + self.client = read_hdf5_lindi(rfs=self.lindi_file) + return network_tracker.asv_network_statistics From 2ae82a1775183a61dc5f108797cd43073dc50649 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Sun, 28 Apr 2024 13:55:37 -0700 Subject: [PATCH 07/22] Update to new BaseBenchmark class --- .../network_tracking_remote_file_reading.py | 28 +++++++++---------- .../network_tracking_remote_slicing.py | 17 +++++------ .../benchmarks/time_remote_file_reading.py | 28 +++++++++---------- .../benchmarks/time_remote_slicing.py | 19 +++++++------ 4 files changed, 45 insertions(+), 47 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index 8c20346..7b5cb8d 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -38,15 +38,16 @@ # Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on # the remote server (i.e., we create the LINDI JSON file for these in these tests) -lindi_hdf5_param_names = param_names -lindi_hdf5_params = params +lindi_hdf5_parameter_cases = parameter_cases # Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not # to create the JSON but can load it directly from the remote store -lindi_remote_rfs_param_names = param_names -lindi_remote_rfs_params = [ - "https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json" -] +lindi_remote_rfs_parameter_cases = dict( + # TODO: Just an example case for testing. Replace with real test case + BaseExample=dict( + s3_url="https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json", + ), +) class FsspecNoCacheDirectFileReadBenchmark(BaseBenchmark): @@ -157,7 +158,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics -class LindiFileReadLocalReferenceFileSystemBenchmark: +class LindiFileReadLocalReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi filesystem is available locally. @@ -165,8 +166,7 @@ class LindiFileReadLocalReferenceFileSystemBenchmark: rounds = 1 repeat = 3 - param_names = lindi_hdf5_param_names - params = lindi_hdf5_params + parameter_cases = lindi_hdf5_parameter_cases def setup(self, s3_url: str): """Create the local JSON LINDI reference filesystem if it does not exist""" @@ -187,7 +187,7 @@ def track_network_activity_during_read_lindi_jsonrfs(self, s3_url: str): return network_tracker.asv_network_statistics -class NWBLindiFileCreateLocalReferenceFileSystemBenchmark: +class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): """ Time the creation of a local Lindi JSON reference filesystem for a remote NWB file as well as reading the NWB file with PyNWB when the local reference filesystem does not @@ -196,8 +196,7 @@ class NWBLindiFileCreateLocalReferenceFileSystemBenchmark: rounds = 1 repeat = 3 - param_names = lindi_hdf5_param_names - params = lindi_hdf5_params + parameter_cases = lindi_hdf5_parameter_cases def setup(self, s3_url: str): """Clear the LINDI JSON if it still exists""" @@ -227,7 +226,7 @@ def track_network_activity_create_lindi_referernce_file_system_and_read_nwbfile( return network_tracker.asv_network_statistics -class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: +class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi filesystem is available locally. @@ -235,8 +234,7 @@ class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: rounds = 1 repeat = 3 - param_names = lindi_remote_rfs_param_names - params = lindi_remote_rfs_params + parameter_cases = lindi_remote_rfs_parameter_cases def track_network_activity_time_read_lindi_nwbfile(self, s3_url: str): """Read a remote NWB file with PyNWB using the remote LINDI JSON reference filesystem""" diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py index c68951a..c61c3bc 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py @@ -30,10 +30,12 @@ # Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not # to create the JSON but can load it directly from the remote store -lindi_remote_rfs_param_names = param_names -lindi_remote_rfs_params = [ - "https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json" -] +lindi_remote_rfs_parameter_cases = dict( + # TODO: Just an example case for testing. Replace with real test case + BaseExample=dict( + s3_url="https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json", + ), +) class FsspecNoCacheContinuousSliceBenchmark(BaseBenchmark): @@ -115,11 +117,11 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli self._temp, self.retries = robust_ros3_read( command=self.data_to_slice.__getitem__, command_args=(slice_range,) ) - network_tracker.asv_network_statistics.update(retries=retries) + network_tracker.asv_network_statistics.update(retries=self.retries) return network_tracker.asv_network_statistics -class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark: +class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark(BaseBenchmark): """ Time the read of a data slice from a remote NWB file with pynwb using lindi with a remote JSON reference filesystem available. @@ -127,8 +129,7 @@ class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark: rounds = 1 repeat = 3 - param_names = lindi_remote_rfs_param_names - params = lindi_remote_rfs_params + parameter_cases = lindi_remote_rfs_parameter_cases def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url) diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index 58091ca..d3c6ab2 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -36,15 +36,16 @@ # Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on # the remote server (i.e., we create the LINDI JSON file for these in these tests) -lindi_hdf5_param_names = param_names -lindi_hdf5_params = params +lindi_hdf5_parameter_cases = parameter_cases # Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not # to create the JSON but can load it directly from the remote store -lindi_remote_rfs_param_names = param_names -lindi_remote_rfs_params = [ - "https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json" -] +lindi_remote_rfs_parameter_cases = dict( + # TODO: Just an example case for testing. Replace with real test case + BaseExample=dict( + s3_url="https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json", + ), +) class DirectFileReadBenchmark(BaseBenchmark): @@ -117,7 +118,7 @@ def time_read_hdf5_nwbfile_ros3(self, s3_url: str): self.nwbfile, self.io, _ = read_hdf5_nwbfile_ros3(s3_url=s3_url, retry=False) -class LindiFileReadLocalReferenceFileSystemBenchmark: +class LindiFileReadLocalReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi filesystem is available locally. @@ -125,8 +126,7 @@ class LindiFileReadLocalReferenceFileSystemBenchmark: rounds = 1 repeat = 3 - param_names = lindi_hdf5_param_names - params = lindi_hdf5_params + parameter_cases = lindi_hdf5_parameter_cases def setup(self, s3_url: str): """Create the local JSON LINDI reference filesystem if it does not exist""" @@ -143,7 +143,7 @@ def time_read_lindi_jsonrfs(self, s3_url: str): self.client = read_hdf5_lindi(rfs=self.lindi_file) -class NWBLindiFileCreateLocalReferenceFileSystemBenchmark: +class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): """ Time the creation of a local Lindi JSON reference filesystem for a remote NWB file as well as reading the NWB file with PyNWB when the local reference filesystem does not @@ -152,8 +152,7 @@ class NWBLindiFileCreateLocalReferenceFileSystemBenchmark: rounds = 1 repeat = 3 - param_names = lindi_hdf5_param_names - params = lindi_hdf5_params + parameter_cases = lindi_hdf5_parameter_cases def setup(self, s3_url: str): """Clear the LINDI JSON if it still exists""" @@ -188,7 +187,7 @@ def time_create_lindi_referernce_file_system_and_read_jsonrfs(self, s3_url: str) self.client = read_hdf5_lindi(rfs=self.lindi_file) -class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: +class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi filesystem is available locally. @@ -196,8 +195,7 @@ class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: rounds = 1 repeat = 3 - param_names = lindi_remote_rfs_param_names - params = lindi_remote_rfs_params + parameter_cases = lindi_remote_rfs_parameter_cases def time_read_lindi_nwbfile(self, s3_url: str): """Read a remote NWB file with PyNWB using the remote LINDI JSON reference filesystem""" diff --git a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py index 002f117..278ea56 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py @@ -28,12 +28,14 @@ # Parameters for LINDI pointing to a remote LINDI reference file system JSON file -lindi_remote_rfs_param_names = param_names -lindi_remote_rfs_params = [ - ["https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json"], - ["accelerometer"], - [(slice(0, 30_000), slice(0, 3))], -] +lindi_remote_rfs_parameter_cases = dict( + # TODO: Just an example case for testing. Replace with real test case + BaseExample=dict( + s3_url="https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json", + object_name="accelerometer", + slice_range=(slice(0, 30_000), slice(0, 3)), + ), +) class FsspecNoCacheContinuousSliceBenchmark(BaseBenchmark): @@ -121,7 +123,7 @@ def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice[slice_range] -class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: +class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of a data slice from a remote NWB file with pynwb using lindi with a remote JSON reference filesystem available. @@ -129,8 +131,7 @@ class NWBLindiFileReadRemoteReferenceFileSystemBenchmark: rounds = 1 repeat = 3 - param_names = lindi_remote_rfs_param_names - params = lindi_remote_rfs_params + parameter_cases = lindi_remote_rfs_parameter_cases def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url) From 17e332c5d03df9531e0dd3a71896d88db978f4fd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 17:51:57 +0000 Subject: [PATCH 08/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../network_tracking_remote_file_reading.py | 10 ++++++---- .../benchmarks/time_remote_file_reading.py | 11 +++++++---- src/nwb_benchmarks/benchmarks/time_remote_slicing.py | 1 - src/nwb_benchmarks/core/_streaming.py | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index 1ebf770..1d88b7a 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -52,7 +52,7 @@ ), ) - + zarr_parameter_cases = dict( AIBSTestCase=dict( s3_url=( @@ -174,9 +174,10 @@ def track_network_activity_during_read(self, s3_url: str): class LindiFileReadLocalReferenceFileSystemBenchmark(BaseBenchmark): """ - Time the read of the Lindi HDF5 files with and without `pynwb` assuming that a local + Time the read of the Lindi HDF5 files with and without `pynwb` assuming that a local copy of the lindi filesystem is available locally. """ + rounds = 1 repeat = 3 parameter_cases = lindi_hdf5_parameter_cases @@ -244,6 +245,7 @@ class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi filesystem is available locally. """ + rounds = 1 repeat = 3 parameter_cases = lindi_remote_rfs_parameter_cases @@ -259,8 +261,8 @@ def track_network_activity_time_read_lindi_jsonrfs(self, s3_url: str): with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: self.client = read_hdf5_lindi(rfs=self.lindi_file) return network_tracker.asv_network_statistics - - + + class ZarrDirectFileReadBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index bff0724..fa96437 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -136,6 +136,7 @@ class LindiFileReadLocalReferenceFileSystemBenchmark(BaseBenchmark): Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi filesystem is available locally. """ + rounds = 1 repeat = 3 parameter_cases = lindi_hdf5_parameter_cases @@ -161,6 +162,7 @@ class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): as well as reading the NWB file with PyNWB when the local reference filesystem does not yet exist. """ + rounds = 1 repeat = 3 parameter_cases = lindi_hdf5_parameter_cases @@ -196,13 +198,14 @@ def time_create_lindi_referernce_file_system_and_read_jsonrfs(self, s3_url: str) self.lindi_file = os.path.basename(s3_url) + ".lindi.json" create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) self.client = read_hdf5_lindi(rfs=self.lindi_file) - - + + class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi - filesystem is available locally. + filesystem is available locally. """ + rounds = 1 repeat = 3 parameter_cases = lindi_remote_rfs_parameter_cases @@ -214,7 +217,7 @@ def time_read_lindi_nwbfile(self, s3_url: str): def time_read_lindi_jsonrfs(self, s3_url: str): """Read a remote HDF5 file with LINDI using the remote LINDI JSON reference filesystem""" self.client = read_hdf5_lindi(rfs=self.lindi_file) - + class DirectZarrFileReadBenchmark(BaseBenchmark): """ diff --git a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py index 9427cbb..678734f 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py @@ -182,4 +182,3 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self._temp = self.data_to_slice[slice_range] - diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index 893e6a0..845fdcc 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -229,7 +229,7 @@ def read_hdf5_nwbfile_lindi(rfs: Union[dict, str]) -> Tuple[pynwb.NWBFile, pynwb nwbfile = io.read() return (nwbfile, io, client) - + def read_zarr(s3_url: str, open_without_consolidated_metadata: bool = False) -> zarr.Group: """ Open a Zarr file from an S3 URL using the built-in fsspec support in Zarr. From d2ab0d377b9a9d230112342c50e60143d8ba2751 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 11:12:50 -0700 Subject: [PATCH 09/22] Update docstrings --- .../benchmarks/time_remote_file_reading.py | 9 ++++++-- .../benchmarks/time_remote_slicing.py | 22 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index fa96437..cd40eca 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -36,6 +36,7 @@ ClassicRos3TestCase=dict(s3_url="https://dandiarchive.s3.amazonaws.com/ros3test.nwb"), ) + # Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on # the remote server (i.e., we create the LINDI JSON file for these in these tests) lindi_hdf5_parameter_cases = parameter_cases @@ -230,10 +231,12 @@ class DirectZarrFileReadBenchmark(BaseBenchmark): repeat = 3 parameter_cases = zarr_parameter_cases - def time_read_zarr_nwbfile(self, s3_url: str): + def time_read_zarr(self, s3_url: str): + """Read a Zarr file using consolidated metadata (if available)""" self.zarr_file = read_zarr(s3_url=s3_url, open_without_consolidated_metadata=False) - def time_read_zarr_nwbfile_force_no_consolidated(self, s3_url: str): + def time_read_zarr_force_no_consolidated(self, s3_url: str): + """Read a Zarr file without using consolidated metadata""" self.zarr_file = read_zarr(s3_url=s3_url, open_without_consolidated_metadata=True) @@ -249,7 +252,9 @@ class NWBZarrFileReadBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases def time_read_zarr_nwbfile(self, s3_url: str): + """Read NWB file with PyNWB using Zarr with consolidated metadata. (if available)""" self.nwbfile, self.io = read_zarr_nwbfile(s3_url=s3_url, mode="r") def time_read_zarr_nwbfile_force_no_consolidated(self, s3_url: str): + """Read NWB file with PyNWB using Zarr without consolidated metadata.""" self.nwbfile, self.io = read_zarr_nwbfile(s3_url=s3_url, mode="r-") diff --git a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py index 678734f..4e318c8 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py @@ -62,7 +62,7 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): - """Note: store as self._temp to avoid tracking garbage collection as well.""" + """Track network activity for slicing into a h5py.Dataset with Fsspec""" self._temp = self.data_to_slice[slice_range] @@ -82,7 +82,7 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): - """Note: store as self._temp to avoid tracking garbage collection as well.""" + """Track network activity for slicing into a h5py.Dataset with Fsspec""" self._temp = self.data_to_slice[slice_range] @@ -97,7 +97,7 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): - """Note: store as self._temp to avoid tracking garbage collection as well.""" + """Track network activity for slicing into a h5py.Dataset with RemFile""" self._temp = self.data_to_slice[slice_range] @@ -117,7 +117,7 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): - """Note: store as self._temp to avoid tracking garbage collection as well.""" + """Track network activity for slicing into a h5py.Dataset with RemFile""" self._temp = self.data_to_slice[slice_range] @@ -132,7 +132,7 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): - """Note: store as self._temp to avoid tracking garbage collection as well.""" + """Track network activity for slicing into a h5py.Dataset with Ros3""" self._temp = self.data_to_slice[slice_range] @@ -152,11 +152,15 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): - """Note: store as self._temp to avoid tracking garbage collection as well.""" + """Track network activity for slicing into a LindiDataset""" self._temp = self.data_to_slice[slice_range] class ZarrContinuousSliceBenchmark(BaseBenchmark): + """ + Benchmark network activity for slicing into a Zarr dataset using consolidated metadata (if available) + """ + rounds = 1 repeat = 3 parameter_cases = zarr_parameter_cases @@ -167,10 +171,15 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + """Track network activity for slicing into a Zarr dataset""" self._temp = self.data_to_slice[slice_range] class ZarrForceNoConsolidatedContinuousSliceBenchmark(BaseBenchmark): + """ + Benchmark network activity for slicing into a Zarr dataset without using consolidated metadata + """ + rounds = 1 repeat = 3 parameter_cases = zarr_parameter_cases @@ -181,4 +190,5 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.data_to_slice = self.neurodata_object.data def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + """Track network activity for slicing into a Zarr dataset""" self._temp = self.data_to_slice[slice_range] From 935f9dbe339ab3557aa14a0e876567e64b045efc Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 15:03:37 -0700 Subject: [PATCH 10/22] Fix usage of bad variables --- .../benchmarks/network_tracking_remote_file_reading.py | 2 +- src/nwb_benchmarks/benchmarks/time_remote_file_reading.py | 2 +- src/nwb_benchmarks/core/_streaming.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index 1d88b7a..818e7bd 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -259,7 +259,7 @@ def track_network_activity_time_read_lindi_nwbfile(self, s3_url: str): def track_network_activity_time_read_lindi_jsonrfs(self, s3_url: str): """Read a remote HDF5 file with LINDI using the remote LINDI JSON reference filesystem""" with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: - self.client = read_hdf5_lindi(rfs=self.lindi_file) + self.client = read_hdf5_lindi(rfs=s3_url) return network_tracker.asv_network_statistics diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index cd40eca..3ab3499 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -217,7 +217,7 @@ def time_read_lindi_nwbfile(self, s3_url: str): def time_read_lindi_jsonrfs(self, s3_url: str): """Read a remote HDF5 file with LINDI using the remote LINDI JSON reference filesystem""" - self.client = read_hdf5_lindi(rfs=self.lindi_file) + self.client = read_hdf5_lindi(rfs=s3_url) class DirectZarrFileReadBenchmark(BaseBenchmark): diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index 845fdcc..714f0df 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -223,7 +223,7 @@ def read_hdf5_nwbfile_lindi(rfs: Union[dict, str]) -> Tuple[pynwb.NWBFile, pynwb :param rfs: The LINDI reference file system file. This can be a dictionary or a URL or path to a .lindi.json file. """ - client = read_hdf5_lindi(s3_url=rfs) + client = read_hdf5_lindi(rfs=rfs) # Open using pynwb io = pynwb.NWBHDF5IO(file=client, mode="r") nwbfile = io.read() From 2c4325e73cf1e2d35a8c6e5542b72184c43cac68 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 15:08:14 -0700 Subject: [PATCH 11/22] Fix NWBLindiFileCreateLocalReferenceFileSystemBenchmark --- .../benchmarks/network_tracking_remote_file_reading.py | 3 +-- src/nwb_benchmarks/benchmarks/time_remote_file_reading.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index 818e7bd..ad4cfa5 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -214,6 +214,7 @@ class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): def setup(self, s3_url: str): """Clear the LINDI JSON if it still exists""" + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" self.teardown(s3_url=s3_url) def teardown(self, s3_url: str): @@ -224,7 +225,6 @@ def teardown(self, s3_url: str): def track_network_activity_create_lindi_referernce_file_system(self, s3_url: str): """Create a local Lindi JSON reference filesystem from a remote HDF5 file""" with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: - self.lindi_file = os.path.basename(s3_url) + ".lindi.json" create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) return network_tracker.asv_network_statistics @@ -234,7 +234,6 @@ def track_network_activity_create_lindi_referernce_file_system_and_read_nwbfile( and then read the NWB file with PyNWB using LINDI with the local JSON """ with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker: - self.lindi_file = os.path.basename(s3_url) + ".lindi.json" create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file) return network_tracker.asv_network_statistics diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index 3ab3499..e73f52c 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -170,6 +170,7 @@ class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): def setup(self, s3_url: str): """Clear the LINDI JSON if it still exists""" + self.lindi_file = os.path.basename(s3_url) + ".lindi.json" self.teardown(s3_url=s3_url) def teardown(self, s3_url: str): @@ -179,7 +180,6 @@ def teardown(self, s3_url: str): def time_create_lindi_referernce_file_system(self, s3_url: str): """Create a local Lindi JSON reference filesystem from a remote HDF5 file""" - self.lindi_file = os.path.basename(s3_url) + ".lindi.json" create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) def time_create_lindi_referernce_file_system_and_read_nwbfile(self, s3_url: str): @@ -187,7 +187,6 @@ def time_create_lindi_referernce_file_system_and_read_nwbfile(self, s3_url: str) Create a local Lindi JSON reference filesystem from a remote HDF5 file and then read the NWB file with PyNWB using LINDI with the local JSON """ - self.lindi_file = os.path.basename(s3_url) + ".lindi.json" create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file) @@ -196,7 +195,6 @@ def time_create_lindi_referernce_file_system_and_read_jsonrfs(self, s3_url: str) Create a local Lindi JSON reference filesystem from a remote HDF5 file and then read the HDF5 file with LINDI using the local JSON. """ - self.lindi_file = os.path.basename(s3_url) + ".lindi.json" create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file) self.client = read_hdf5_lindi(rfs=self.lindi_file) From 89c46a43e97876c8882a06ead519f6af28dfceba Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 15:35:30 -0700 Subject: [PATCH 12/22] Fix parameter setting for remote slicing network tracking --- .../benchmarks/network_tracking_remote_slicing.py | 4 ++-- src/nwb_benchmarks/benchmarks/time_remote_slicing.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py index 5eebdea..dbb3e6d 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py @@ -35,6 +35,8 @@ # TODO: Just an example case for testing. Replace with real test case BaseExample=dict( s3_url="https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json", + object_name="accelerometer", + slice_range=(slice(0, 30_000), slice(0, 3)), ), ) @@ -141,8 +143,6 @@ class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark(BaseBenchma filesystem available. """ - rounds = 1 - repeat = 3 parameter_cases = lindi_remote_rfs_parameter_cases def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): diff --git a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py index 4e318c8..a285f31 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py @@ -136,7 +136,7 @@ def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self._temp = self.data_to_slice[slice_range] -class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): +class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark(BaseBenchmark): """ Time the read of a data slice from a remote NWB file with pynwb using lindi with a remote JSON reference filesystem available. From 058fd1f7807451b7fc88d8f930595503a441fe5a Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 17:31:58 -0700 Subject: [PATCH 13/22] use write_reference_file_system function Co-authored-by: Ryan Ly --- src/nwb_benchmarks/core/_streaming.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index 714f0df..1b8c2c8 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -198,11 +198,8 @@ def create_lindi_reference_file_system(s3_url: str, outfile_path: str): """ # Create a read-only Zarr store as a wrapper for the h5 file store = lindi.LindiH5ZarrStore.from_file(s3_url) - # Generate a reference file system - rfs = store.to_reference_file_system() - # Save it to a file for later use - with open(outfile_path, "w") as f: - json.dump(rfs, f, indent=2) + # Generate a reference file system and write it to a file + store.write_reference_file_system(outfile_path) def read_hdf5_lindi(rfs: Union[dict, str]) -> lindi.LindiH5pyFile: From d3f93b86e03318c90bfc2f4f9ff748cc5032de53 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 17:32:26 -0700 Subject: [PATCH 14/22] Update src/nwb_benchmarks/core/_streaming.py Co-authored-by: Ryan Ly --- src/nwb_benchmarks/core/_streaming.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index 1b8c2c8..c0a6e60 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -216,7 +216,7 @@ def read_hdf5_lindi(rfs: Union[dict, str]) -> lindi.LindiH5pyFile: def read_hdf5_nwbfile_lindi(rfs: Union[dict, str]) -> Tuple[pynwb.NWBFile, pynwb.NWBHDF5IO, lindi.LindiH5pyFile]: """ - Read an HDF5 NWB file from an S3 URL using the ROS3 driver from h5py. + Read an HDF5 NWB file from an S3 URL using LINDI. :param rfs: The LINDI reference file system file. This can be a dictionary or a URL or path to a .lindi.json file. """ From 8e73b9fcfe6be375e9cd617b5afc2de903250582 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 17:33:23 -0700 Subject: [PATCH 15/22] Update src/nwb_benchmarks/core/_streaming.py --- src/nwb_benchmarks/core/_streaming.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nwb_benchmarks/core/_streaming.py b/src/nwb_benchmarks/core/_streaming.py index c0a6e60..e6376c9 100644 --- a/src/nwb_benchmarks/core/_streaming.py +++ b/src/nwb_benchmarks/core/_streaming.py @@ -207,8 +207,6 @@ def read_hdf5_lindi(rfs: Union[dict, str]) -> lindi.LindiH5pyFile: :param rfs: The LINDI reference file system file. This can be a dictionary or a URL or path to a .lindi.json file. """ - # TODO: Example URL of a remote .nwb.lindi.json file that we can use for initial test setup - # url = 'https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json' # Load the h5py-like client for the reference file system client = lindi.LindiH5pyFile.from_reference_file_system(rfs) return client From e75eed289d03aa349dfd86327242532e44a435fb Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 30 Apr 2024 00:33:53 -0400 Subject: [PATCH 16/22] fixes to setup and skip networks --- .../network_tracking_remote_file_reading.py | 23 +++++++++++++++++-- .../network_tracking_remote_slicing.py | 10 ++++++++ src/nwb_benchmarks/command_line_interface.py | 14 ++++++++--- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index ad4cfa5..62dee3f 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -2,6 +2,8 @@ import os +from asv_runner.benchmarks.mark import skip_benchmark_if + from nwb_benchmarks import TSHARK_PATH from nwb_benchmarks.core import ( BaseBenchmark, @@ -64,6 +66,7 @@ ) +@skip_benchmark_if(TSHARK_PATH is None) class FsspecNoCacheDirectFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -73,6 +76,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class FsspecWithCacheDirectFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -85,6 +89,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class RemfileDirectFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -94,6 +99,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class RemfileDirectFileReadBenchmarkWithCache(BaseBenchmark): parameter_cases = parameter_cases @@ -106,6 +112,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class Ros3DirectFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -116,6 +123,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class FsspecNoCacheNWBFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -125,6 +133,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class FsspecWithCacheNWBFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -139,6 +148,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class RemfileNWBFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -148,6 +158,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class RemfileNWBFileReadBenchmarkWithCache(BaseBenchmark): parameter_cases = parameter_cases @@ -162,6 +173,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class Ros3NWBFileReadBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -172,6 +184,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class LindiFileReadLocalReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of the Lindi HDF5 files with and without `pynwb` assuming that a local @@ -201,6 +214,7 @@ def track_network_activity_during_read_lindi_jsonrfs(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): """ Time the creation of a local Lindi JSON reference filesystem for a remote NWB file @@ -215,9 +229,9 @@ class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): def setup(self, s3_url: str): """Clear the LINDI JSON if it still exists""" self.lindi_file = os.path.basename(s3_url) + ".lindi.json" - self.teardown(s3_url=s3_url) + self.teardown() - def teardown(self, s3_url: str): + def teardown(self): """Clear the LINDI JSON if it still exists""" if os.path.exists(self.lindi_file): os.remove(self.lindi_file) @@ -239,6 +253,7 @@ def track_network_activity_create_lindi_referernce_file_system_and_read_nwbfile( return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): """ Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi @@ -262,6 +277,7 @@ def track_network_activity_time_read_lindi_jsonrfs(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class ZarrDirectFileReadBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases @@ -271,6 +287,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class ZarrForceNoConsolidatedDirectFileReadBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases @@ -280,6 +297,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class ZarrNWBFileReadBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases @@ -289,6 +307,7 @@ def track_network_activity_during_read(self, s3_url: str): return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class ZarrForceNoConsolidatedNWBFileReadBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py index dbb3e6d..3a2c439 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py @@ -2,6 +2,8 @@ from typing import Tuple +from asv_runner.benchmarks.mark import skip_benchmark_if + from nwb_benchmarks import TSHARK_PATH from nwb_benchmarks.core import ( BaseBenchmark, @@ -54,6 +56,7 @@ ) +@skip_benchmark_if(TSHARK_PATH is None) class FsspecNoCacheContinuousSliceBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -68,6 +71,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class FsspecWithCacheContinuousSliceBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -87,6 +91,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class RemfileContinuousSliceBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -101,6 +106,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class RemfileContinuousSliceBenchmarkWithCache(BaseBenchmark): parameter_cases = parameter_cases @@ -120,6 +126,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class Ros3ContinuousSliceBenchmark(BaseBenchmark): parameter_cases = parameter_cases @@ -137,6 +144,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark(BaseBenchmark): """ Time the read of a data slice from a remote NWB file with pynwb using lindi with a remote JSON reference @@ -156,6 +164,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class ZarrContinuousSliceBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases @@ -170,6 +179,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli return network_tracker.asv_network_statistics +@skip_benchmark_if(TSHARK_PATH is None) class ZarrForceNoConsolidatedContinuousSliceBenchmark(BaseBenchmark): parameter_cases = zarr_parameter_cases diff --git a/src/nwb_benchmarks/command_line_interface.py b/src/nwb_benchmarks/command_line_interface.py index acbe844..335993f 100644 --- a/src/nwb_benchmarks/command_line_interface.py +++ b/src/nwb_benchmarks/command_line_interface.py @@ -26,8 +26,8 @@ def main() -> None: default_asv_machine_file_path = pathlib.Path.home() / ".asv-machine.json" if command == "run": - process = subprocess.Popen(["asv", "machine", "--yes"], stdout=subprocess.PIPE) - process.wait() + aws_machine_process = subprocess.Popen(["asv", "machine", "--yes"], stdout=subprocess.PIPE) + aws_machine_process.wait() customize_asv_machine_file(file_path=default_asv_machine_file_path) commit_hash = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip() @@ -35,8 +35,16 @@ def main() -> None: # Save latest environment list from conda (most thorough) # subprocess tends to have issues inheriting `conda` entrypoint asv_root = pathlib.Path(__file__).parent.parent.parent / ".asv" + asv_root.mkdir(exist_ok=True) + raw_environment_info_file_path = asv_root / ".raw_environment_info.txt" - os.system(f"conda list > {raw_environment_info_file_path}") + environment_info_process = subprocess.Popen(["conda", "list", ">", raw_environment_info_file_path], + stdout=subprocess.PIPE, + shell=True) + environment_info_process.wait() + + if not raw_environment_info_file_path.exists(): + raise FileNotFoundError(f"Unable to create environment file at {raw_environment_info_file_path}!") # Deploy ASV cmd = [ From 3a51f44ed1780011d54c09a48b22751460d73a75 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 30 Apr 2024 04:34:03 +0000 Subject: [PATCH 17/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/nwb_benchmarks/command_line_interface.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nwb_benchmarks/command_line_interface.py b/src/nwb_benchmarks/command_line_interface.py index 335993f..030c7b7 100644 --- a/src/nwb_benchmarks/command_line_interface.py +++ b/src/nwb_benchmarks/command_line_interface.py @@ -38,9 +38,9 @@ def main() -> None: asv_root.mkdir(exist_ok=True) raw_environment_info_file_path = asv_root / ".raw_environment_info.txt" - environment_info_process = subprocess.Popen(["conda", "list", ">", raw_environment_info_file_path], - stdout=subprocess.PIPE, - shell=True) + environment_info_process = subprocess.Popen( + ["conda", "list", ">", raw_environment_info_file_path], stdout=subprocess.PIPE, shell=True + ) environment_info_process.wait() if not raw_environment_info_file_path.exists(): From 534572d79a1c187f252209108f2c119334a33e07 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 22:14:19 -0700 Subject: [PATCH 18/22] Fix bug in CLI --- src/nwb_benchmarks/command_line_interface.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/nwb_benchmarks/command_line_interface.py b/src/nwb_benchmarks/command_line_interface.py index 030c7b7..f1deb58 100644 --- a/src/nwb_benchmarks/command_line_interface.py +++ b/src/nwb_benchmarks/command_line_interface.py @@ -38,9 +38,8 @@ def main() -> None: asv_root.mkdir(exist_ok=True) raw_environment_info_file_path = asv_root / ".raw_environment_info.txt" - environment_info_process = subprocess.Popen( - ["conda", "list", ">", raw_environment_info_file_path], stdout=subprocess.PIPE, shell=True - ) + outfile = open(raw_environment_info_file_path, "w") + environment_info_process = subprocess.Popen(["conda list"], stdout=outfile, shell=True) environment_info_process.wait() if not raw_environment_info_file_path.exists(): From 4f4c54bd8f514c719f73820c71c3b21d2aa021bf Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 22:15:22 -0700 Subject: [PATCH 19/22] Update src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py Co-authored-by: Ryan Ly --- .../benchmarks/network_tracking_remote_file_reading.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index 62dee3f..b2505ee 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -256,8 +256,8 @@ def track_network_activity_create_lindi_referernce_file_system_and_read_nwbfile( @skip_benchmark_if(TSHARK_PATH is None) class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): """ - Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi - filesystem is available locally. + Time the read of a remote NWB file with pynwb using lindi with a remote JSON reference + filesystem available. """ rounds = 1 From 051c69c27c03e21a1cb223834da9f412f1f4996f Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 22:15:39 -0700 Subject: [PATCH 20/22] Update src/nwb_benchmarks/benchmarks/time_remote_file_reading.py Co-authored-by: Ryan Ly --- src/nwb_benchmarks/benchmarks/time_remote_file_reading.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py index e73f52c..26ea8fa 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_file_reading.py @@ -201,8 +201,8 @@ def time_create_lindi_referernce_file_system_and_read_jsonrfs(self, s3_url: str) class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark): """ - Time the read of the Lindi HDF5 files with `pynwb` assuming that a local copy of the lindi - filesystem is available locally. + Time the read of a remote NWB file with pynwb using lindi with a remote JSON reference + filesystem available. """ rounds = 1 From 32449fa91ffdfd644ea382e100d243b6a2879ba8 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Mon, 29 Apr 2024 23:11:24 -0700 Subject: [PATCH 21/22] Add missing s3url on teardown --- .../benchmarks/network_tracking_remote_file_reading.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py index b2505ee..3d3e425 100644 --- a/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py +++ b/src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py @@ -229,9 +229,9 @@ class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark): def setup(self, s3_url: str): """Clear the LINDI JSON if it still exists""" self.lindi_file = os.path.basename(s3_url) + ".lindi.json" - self.teardown() + self.teardown(s3_url=s3_url) - def teardown(self): + def teardown(self, s3_url: str): """Clear the LINDI JSON if it still exists""" if os.path.exists(self.lindi_file): os.remove(self.lindi_file) From 01451413cfdc030e798338cdd0f3dc5327d70405 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Tue, 30 Apr 2024 11:30:51 -0400 Subject: [PATCH 22/22] swap names --- src/nwb_benchmarks/benchmarks/time_remote_slicing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py index a285f31..f9a17fb 100644 --- a/src/nwb_benchmarks/benchmarks/time_remote_slicing.py +++ b/src/nwb_benchmarks/benchmarks/time_remote_slicing.py @@ -170,7 +170,7 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data - def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): """Track network activity for slicing into a Zarr dataset""" self._temp = self.data_to_slice[slice_range] @@ -189,6 +189,6 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name) self.data_to_slice = self.neurodata_object.data - def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): + def time_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]): """Track network activity for slicing into a Zarr dataset""" self._temp = self.data_to_slice[slice_range]