From 48d6aa7fb1168bfcb4db3256a3a1936aaf037c4d Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 2 Feb 2024 12:09:51 -0600 Subject: [PATCH 1/5] Revert "feat(utils): decorator for results memoization for expensive function calls" This reverts commit a2429940928f027a36b88095568b04849df6f649. --- element_interface/utils.py | 69 +------------------------------------- 1 file changed, 1 insertion(+), 68 deletions(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index c3832f4..14d4eee 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -5,9 +5,7 @@ import pathlib import sys import uuid -import json -import pickle -from datetime import datetime + from datajoint.utils import to_camel_case logger = logging.getLogger("datajoint") @@ -189,68 +187,3 @@ def __exit__(self, *args): logger.setLevel(self.prev_log_level) sys.stdout.close() sys.stdout = self._original_stdout - - -def memoized_result(parameters: dict, output_directory: str): - """ - This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. - If the function is called with the same parameters and the output files in the directory remain unchanged, - it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. - Conditions for robust usage: - - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes - - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call - Args: - parameters: parameters that would identify a unique function call - output_directory: directory location for the output files - - Returns: a decorator to enable a function call to memoize/cached the resulting files - """ - - def decorator(func): - def wrapped(*args, **kwargs): - output_dir = _to_Path(output_directory) - input_hash = dict_to_uuid(parameters) - input_hash_fp = output_dir / f".{input_hash}.json" - # check if results already exist (from previous identical run) - output_dir_files_hash = dict_to_uuid( - { - f.relative_to(output_dir).as_posix(): f.stat().st_size - for f in output_dir.rglob("*") - if f.name != f".{input_hash}.json" - } - ) - if input_hash_fp.exists(): - with open(input_hash_fp, "r") as f: - meta = json.load(f) - if str(output_dir_files_hash) == meta["output_dir_files_hash"]: - logger.info(f"Existing results found, skip '{func.__name__}'") - with open(output_dir / f".{input_hash}_results.pickle", "rb") as f: - results = pickle.load(f) - return results - # no results - trigger the run - logger.info(f"No existing results found, calling '{func.__name__}'") - start_time = datetime.utcnow() - results = func(*args, **kwargs) - - with open(output_dir / f".{input_hash}_results.pickle", "wb") as f: - pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) - - meta = { - "output_dir_files_hash": dict_to_uuid( - { - f.relative_to(output_dir).as_posix(): f.stat().st_size - for f in output_dir.rglob("*") - if f.name != f".{input_hash}.json" - } - ), - "start_time": start_time, - "completion_time": datetime.utcnow(), - } - with open(input_hash_fp, "w") as f: - json.dump(meta, f, default=str) - - return results - - return wrapped - - return decorator From 11a1a0f42838d335f291419bf9816b253b0337e7 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 20 Mar 2024 12:57:44 -0500 Subject: [PATCH 2/5] Revert "Merge branch 'main' into dev_memoized_results" This reverts commit 012e83857c13a96d36109faa16c46aebdbddc9f0, reversing changes made to b3f68292b7c7f46563d1b20525a21401044cf2e4. --- element_interface/utils.py | 69 +++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index 14d4eee..c3832f4 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -5,7 +5,9 @@ import pathlib import sys import uuid - +import json +import pickle +from datetime import datetime from datajoint.utils import to_camel_case logger = logging.getLogger("datajoint") @@ -187,3 +189,68 @@ def __exit__(self, *args): logger.setLevel(self.prev_log_level) sys.stdout.close() sys.stdout = self._original_stdout + + +def memoized_result(parameters: dict, output_directory: str): + """ + This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. + If the function is called with the same parameters and the output files in the directory remain unchanged, + it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. + Conditions for robust usage: + - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes + - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call + Args: + parameters: parameters that would identify a unique function call + output_directory: directory location for the output files + + Returns: a decorator to enable a function call to memoize/cached the resulting files + """ + + def decorator(func): + def wrapped(*args, **kwargs): + output_dir = _to_Path(output_directory) + input_hash = dict_to_uuid(parameters) + input_hash_fp = output_dir / f".{input_hash}.json" + # check if results already exist (from previous identical run) + output_dir_files_hash = dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ) + if input_hash_fp.exists(): + with open(input_hash_fp, "r") as f: + meta = json.load(f) + if str(output_dir_files_hash) == meta["output_dir_files_hash"]: + logger.info(f"Existing results found, skip '{func.__name__}'") + with open(output_dir / f".{input_hash}_results.pickle", "rb") as f: + results = pickle.load(f) + return results + # no results - trigger the run + logger.info(f"No existing results found, calling '{func.__name__}'") + start_time = datetime.utcnow() + results = func(*args, **kwargs) + + with open(output_dir / f".{input_hash}_results.pickle", "wb") as f: + pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) + + meta = { + "output_dir_files_hash": dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ), + "start_time": start_time, + "completion_time": datetime.utcnow(), + } + with open(input_hash_fp, "w") as f: + json.dump(meta, f, default=str) + + return results + + return wrapped + + return decorator From f4d84780ceb11bfbb3ec5dc53ec5331c1a9be037 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 20 Mar 2024 15:16:23 -0500 Subject: [PATCH 3/5] format: minor reformatting of the docstring --- element_interface/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index c3832f4..14da994 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -196,14 +196,16 @@ def memoized_result(parameters: dict, output_directory: str): This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. If the function is called with the same parameters and the output files in the directory remain unchanged, it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. - Conditions for robust usage: - - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes - - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call + Args: parameters: parameters that would identify a unique function call output_directory: directory location for the output files Returns: a decorator to enable a function call to memoize/cached the resulting files + + Conditions for robust usage: + - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes + - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call """ def decorator(func): From 20cf21ea86b00b9b7e31aeca0469e2ff26e37dce Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 22 May 2024 14:07:55 -0500 Subject: [PATCH 4/5] fix(suite2p_loader): allow loading suite2p results without ROI detection or trace extraction --- element_interface/suite2p_loader.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/element_interface/suite2p_loader.py b/element_interface/suite2p_loader.py index 07dbff1..2e6884b 100644 --- a/element_interface/suite2p_loader.py +++ b/element_interface/suite2p_loader.py @@ -138,15 +138,6 @@ def __init__(self, suite2p_plane_dir: str): ) self.creation_time = datetime.fromtimestamp(ops_fp.stat().st_ctime) - iscell_fp = self.fpath / "iscell.npy" - if not iscell_fp.exists(): - raise FileNotFoundError( - 'No "iscell.npy" found. Invalid suite2p plane folder: {}'.format( - self.fpath - ) - ) - self.curation_time = datetime.fromtimestamp(iscell_fp.stat().st_ctime) - # -- Initialize attributes -- for s2p_type in _suite2p_ftypes: setattr(self, "_{}".format(s2p_type), None) @@ -160,6 +151,11 @@ def __init__(self, suite2p_plane_dir: str): # -- load core files -- + @property + def curation_time(self): + print("DeprecationWarning: 'curation_time' is deprecated, set to be the same as 'creation time', no longer reliable.") + return self.creation_time + @property def ops(self): if self._ops is None: From f96d8be9309647a6f19a5378d9c59cdea88a9943 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 28 May 2024 12:11:08 -0500 Subject: [PATCH 5/5] update: minor var name change --- element_interface/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/element_interface/utils.py b/element_interface/utils.py index 14da994..2fc8ca9 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -191,14 +191,14 @@ def __exit__(self, *args): sys.stdout = self._original_stdout -def memoized_result(parameters: dict, output_directory: str): +def memoized_result(uniqueness_dict: dict, output_directory: str): """ This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. If the function is called with the same parameters and the output files in the directory remain unchanged, it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. Args: - parameters: parameters that would identify a unique function call + uniqueness_dict: a dictionary that would identify a unique function call output_directory: directory location for the output files Returns: a decorator to enable a function call to memoize/cached the resulting files @@ -211,7 +211,7 @@ def memoized_result(parameters: dict, output_directory: str): def decorator(func): def wrapped(*args, **kwargs): output_dir = _to_Path(output_directory) - input_hash = dict_to_uuid(parameters) + input_hash = dict_to_uuid(uniqueness_dict) input_hash_fp = output_dir / f".{input_hash}.json" # check if results already exist (from previous identical run) output_dir_files_hash = dict_to_uuid(