Skip to content

Commit

Permalink
Merge pull request #53 from NeurodataWithoutBorders/add/lindi
Browse files Browse the repository at this point in the history
Add benchmarks using lindi
  • Loading branch information
CodyCBakerPhD authored Apr 30, 2024
2 parents b016d10 + 0145141 commit fb464f1
Show file tree
Hide file tree
Showing 7 changed files with 393 additions and 15 deletions.
129 changes: 129 additions & 0 deletions src/nwb_benchmarks/benchmarks/network_tracking_remote_file_reading.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
"""Basic benchmarks for profiling network statistics for streaming access to NWB files and their contents."""

import os

from asv_runner.benchmarks.mark import skip_benchmark_if

from nwb_benchmarks import TSHARK_PATH
from nwb_benchmarks.core import (
BaseBenchmark,
create_lindi_reference_file_system,
get_s3_url,
network_activity_tracker,
read_hdf5_fsspec_no_cache,
read_hdf5_fsspec_with_cache,
read_hdf5_lindi,
read_hdf5_nwbfile_fsspec_no_cache,
read_hdf5_nwbfile_fsspec_with_cache,
read_hdf5_nwbfile_lindi,
read_hdf5_nwbfile_remfile,
read_hdf5_nwbfile_remfile_with_cache,
read_hdf5_nwbfile_ros3,
Expand All @@ -33,6 +40,21 @@
ClassicRos3TestCase=dict(s3_url="https://dandiarchive.s3.amazonaws.com/ros3test.nwb"),
)


# Parameters for LINDI when HDF5 files are remote without using an existing LINDI JSON reference file system on
# the remote server (i.e., we create the LINDI JSON file for these in these tests)
lindi_hdf5_parameter_cases = parameter_cases

# Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not
# to create the JSON but can load it directly from the remote store
lindi_remote_rfs_parameter_cases = dict(
# TODO: Just an example case for testing. Replace with real test case
BaseExample=dict(
s3_url="https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json",
),
)


zarr_parameter_cases = dict(
AIBSTestCase=dict(
s3_url=(
Expand All @@ -44,6 +66,7 @@
)


@skip_benchmark_if(TSHARK_PATH is None)
class FsspecNoCacheDirectFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -53,6 +76,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class FsspecWithCacheDirectFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -65,6 +89,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class RemfileDirectFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -74,6 +99,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class RemfileDirectFileReadBenchmarkWithCache(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -86,6 +112,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class Ros3DirectFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -96,6 +123,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class FsspecNoCacheNWBFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -105,6 +133,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class FsspecWithCacheNWBFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -119,6 +148,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class RemfileNWBFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -128,6 +158,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class RemfileNWBFileReadBenchmarkWithCache(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -142,6 +173,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class Ros3NWBFileReadBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -152,6 +184,100 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class LindiFileReadLocalReferenceFileSystemBenchmark(BaseBenchmark):
"""
Time the read of the Lindi HDF5 files with and without `pynwb` assuming that a local
copy of the lindi filesystem is available locally.
"""

rounds = 1
repeat = 3
parameter_cases = lindi_hdf5_parameter_cases

def setup(self, s3_url: str):
"""Create the local JSON LINDI reference filesystem if it does not exist"""
self.lindi_file = os.path.basename(s3_url) + ".lindi.json"
if not os.path.exists(self.lindi_file):
create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file)

def track_network_activity_during_read_lindi_nwbfile(self, s3_url: str):
"""Read the NWB file with pynwb using LINDI with the local reference filesystem JSON"""
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file)
return network_tracker.asv_network_statistics

def track_network_activity_during_read_lindi_jsonrfs(self, s3_url: str):
"""Read the NWB file with LINDI directly using the local reference filesystem JSON"""
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
self.client = read_hdf5_lindi(rfs=self.lindi_file)
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class NWBLindiFileCreateLocalReferenceFileSystemBenchmark(BaseBenchmark):
"""
Time the creation of a local Lindi JSON reference filesystem for a remote NWB file
as well as reading the NWB file with PyNWB when the local reference filesystem does not
yet exist.
"""

rounds = 1
repeat = 3
parameter_cases = lindi_hdf5_parameter_cases

def setup(self, s3_url: str):
"""Clear the LINDI JSON if it still exists"""
self.lindi_file = os.path.basename(s3_url) + ".lindi.json"
self.teardown(s3_url=s3_url)

def teardown(self, s3_url: str):
"""Clear the LINDI JSON if it still exists"""
if os.path.exists(self.lindi_file):
os.remove(self.lindi_file)

def track_network_activity_create_lindi_referernce_file_system(self, s3_url: str):
"""Create a local Lindi JSON reference filesystem from a remote HDF5 file"""
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file)
return network_tracker.asv_network_statistics

def track_network_activity_create_lindi_referernce_file_system_and_read_nwbfile(self, s3_url: str):
"""
Create a local Lindi JSON reference filesystem from a remote HDF5 file
and then read the NWB file with PyNWB using LINDI with the local JSON
"""
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
create_lindi_reference_file_system(s3_url=s3_url, outfile_path=self.lindi_file)
self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=self.lindi_file)
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class NWBLindiFileReadRemoteReferenceFileSystemBenchmark(BaseBenchmark):
"""
Time the read of a remote NWB file with pynwb using lindi with a remote JSON reference
filesystem available.
"""

rounds = 1
repeat = 3
parameter_cases = lindi_remote_rfs_parameter_cases

def track_network_activity_time_read_lindi_nwbfile(self, s3_url: str):
"""Read a remote NWB file with PyNWB using the remote LINDI JSON reference filesystem"""
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url)
return network_tracker.asv_network_statistics

def track_network_activity_time_read_lindi_jsonrfs(self, s3_url: str):
"""Read a remote HDF5 file with LINDI using the remote LINDI JSON reference filesystem"""
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
self.client = read_hdf5_lindi(rfs=s3_url)
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class ZarrDirectFileReadBenchmark(BaseBenchmark):
parameter_cases = zarr_parameter_cases

Expand All @@ -161,6 +287,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class ZarrForceNoConsolidatedDirectFileReadBenchmark(BaseBenchmark):
parameter_cases = zarr_parameter_cases

Expand All @@ -170,6 +297,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class ZarrNWBFileReadBenchmark(BaseBenchmark):
parameter_cases = zarr_parameter_cases

Expand All @@ -179,6 +307,7 @@ def track_network_activity_during_read(self, s3_url: str):
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class ZarrForceNoConsolidatedNWBFileReadBenchmark(BaseBenchmark):
parameter_cases = zarr_parameter_cases

Expand Down
47 changes: 45 additions & 2 deletions src/nwb_benchmarks/benchmarks/network_tracking_remote_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from typing import Tuple

from asv_runner.benchmarks.mark import skip_benchmark_if

from nwb_benchmarks import TSHARK_PATH
from nwb_benchmarks.core import (
BaseBenchmark,
Expand All @@ -10,6 +12,7 @@
network_activity_tracker,
read_hdf5_nwbfile_fsspec_no_cache,
read_hdf5_nwbfile_fsspec_with_cache,
read_hdf5_nwbfile_lindi,
read_hdf5_nwbfile_remfile,
read_hdf5_nwbfile_remfile_with_cache,
read_hdf5_nwbfile_ros3,
Expand All @@ -28,6 +31,17 @@
)
)

# Parameters for LINDI pointing to a remote LINDI reference file system JSON file. I.e., here we do not
# to create the JSON but can load it directly from the remote store
lindi_remote_rfs_parameter_cases = dict(
# TODO: Just an example case for testing. Replace with real test case
BaseExample=dict(
s3_url="https://kerchunk.neurosift.org/dandi/dandisets/000939/assets/11f512ba-5bcf-4230-a8cb-dc8d36db38cb/zarr.json",
object_name="accelerometer",
slice_range=(slice(0, 30_000), slice(0, 3)),
),
)


zarr_parameter_cases = dict(
AIBSTestCase=dict(
Expand All @@ -42,6 +56,7 @@
)


@skip_benchmark_if(TSHARK_PATH is None)
class FsspecNoCacheContinuousSliceBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -56,6 +71,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class FsspecWithCacheContinuousSliceBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -75,6 +91,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class RemfileContinuousSliceBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -89,6 +106,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class RemfileContinuousSliceBenchmarkWithCache(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -108,6 +126,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class Ros3ContinuousSliceBenchmark(BaseBenchmark):
parameter_cases = parameter_cases

Expand All @@ -118,11 +137,34 @@ def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]):

def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]):
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
self._temp, retries = robust_ros3_read(command=self.data_to_slice.__getitem__, command_args=(slice_range,))
network_tracker.asv_network_statistics.update(retries=retries)
self._temp, self.retries = robust_ros3_read(
command=self.data_to_slice.__getitem__, command_args=(slice_range,)
)
network_tracker.asv_network_statistics.update(retries=self.retries)
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class LindiFileReadRemoteReferenceFileSystemContinuousSliceBenchmark(BaseBenchmark):
"""
Time the read of a data slice from a remote NWB file with pynwb using lindi with a remote JSON reference
filesystem available.
"""

parameter_cases = lindi_remote_rfs_parameter_cases

def setup(self, s3_url: str, object_name: str, slice_range: Tuple[slice]):
self.nwbfile, self.io, self.client = read_hdf5_nwbfile_lindi(rfs=s3_url)
self.neurodata_object = get_object_by_name(nwbfile=self.nwbfile, object_name=object_name)
self.data_to_slice = self.neurodata_object.data

def track_network_activity_during_slice(self, s3_url: str, object_name: str, slice_range: Tuple[slice]):
with network_activity_tracker(tshark_path=TSHARK_PATH) as network_tracker:
self._temp = self.data_to_slice[slice_range]
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class ZarrContinuousSliceBenchmark(BaseBenchmark):
parameter_cases = zarr_parameter_cases

Expand All @@ -137,6 +179,7 @@ def track_network_activity_during_slice(self, s3_url: str, object_name: str, sli
return network_tracker.asv_network_statistics


@skip_benchmark_if(TSHARK_PATH is None)
class ZarrForceNoConsolidatedContinuousSliceBenchmark(BaseBenchmark):
parameter_cases = zarr_parameter_cases

Expand Down
Loading

0 comments on commit fb464f1

Please sign in to comment.