From c37cfe61880b707b8cfb3bf4d5013ce25c1b3c4a Mon Sep 17 00:00:00 2001 From: sgref Date: Tue, 5 Apr 2022 15:53:10 +0200 Subject: [PATCH 1/6] Don't drop paths containts globs --- adlfs/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index 9e904078..f798c8f6 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1511,7 +1511,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None, **kwargs): ) # Sets whether to return the parent dir if isinstance(path, list): - path = [f"{p.strip('/')}" for p in path if not p.endswith("*")] + path = [f"{p.strip('/')}" if not p.endswith("*") else p for p in path] else: if not path.endswith("*"): path = f"{path.strip('/')}" From 505495017aa44fe70a4484dbbcb930f91c56f1f0 Mon Sep 17 00:00:00 2001 From: sgref Date: Wed, 6 Apr 2022 10:38:13 +0200 Subject: [PATCH 2/6] Fix typo --- adlfs/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index f798c8f6..2316e3c3 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1036,7 +1036,7 @@ async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwar return {name: files[name] for name in names} async def _glob_find(self, path, maxdepth=None, withdirs=False, **kwargs): - """List all files below path in a recusrsive manner. + """List all files below path in a recursive manner. Like posix ``find`` command without conditions Parameters ---------- From 35697b4f186ede46079980e1fb4e646ded8a8e52 Mon Sep 17 00:00:00 2001 From: sgref Date: Wed, 6 Apr 2022 10:38:59 +0200 Subject: [PATCH 3/6] Fix _glob_find returns nothing if called with a glob path --- adlfs/spec.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/adlfs/spec.py b/adlfs/spec.py index 2316e3c3..dc95660c 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1049,6 +1049,9 @@ async def _glob_find(self, path, maxdepth=None, withdirs=False, **kwargs): kwargs are passed to ``ls``. """ # TODO: allow equivalent of -name parameter + + path = path.rstrip('*') + path = path.rstrip('/') path = self._strip_protocol(path) out = dict() detail = kwargs.pop("detail", False) From 79beefcff73541868833b602fc0d72fdebdf4a7b Mon Sep 17 00:00:00 2001 From: sgref Date: Wed, 6 Apr 2022 10:40:58 +0200 Subject: [PATCH 4/6] Fix recursive _expand_path when called with a glob --- adlfs/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index dc95660c..f2e6b410 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1530,7 +1530,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None, **kwargs): bit = set(await self._glob(p)) out |= bit if recursive: - bit2 = set(await self._expand_path(p)) + bit2 = set(await self._glob_find(p, withdirs=True)) out |= bit2 continue elif recursive: From d7d5a167ab9da7335e212d3b37db94dbd44c014b Mon Sep 17 00:00:00 2001 From: sgref Date: Wed, 6 Apr 2022 10:41:19 +0200 Subject: [PATCH 5/6] Add test for the current behaviour of expand_path --- adlfs/tests/test_spec.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 99bf8760..126bad18 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -1348,3 +1348,41 @@ def test_find_with_prefix(storage): assert test_1s == [test_bucket_name + "/prefixes/test_1"] + [ test_bucket_name + f"/prefixes/test_{cursor}" for cursor in range(10, 20) ] + + +def test_expand_path(storage): + test_bucket = "data" + test_dir = f"{test_bucket}/testexpandpath" + sub_dir_1 = f"{test_dir}/subdir1" + sub_dir_2 = f"{sub_dir_1}/subdir2" + test_blobs = [ + f"{test_dir}/blob1", + f"{test_dir}/blob2", + f"{test_dir}/subdir1/blob3", + f"{test_dir}/subdir1/blob4", + f"{test_dir}/subdir1/subdir2/blob5", + ] + + expected_dirs_w_trailing_slash = test_blobs.copy() + expected_dirs_w_trailing_slash.append(test_dir) + expected_dirs_w_trailing_slash.append(sub_dir_1 + "/") + expected_dirs_w_trailing_slash.append(sub_dir_2 + "/") + + expected_dirs_wo_trailing_slash = test_blobs.copy() + expected_dirs_wo_trailing_slash.append(sub_dir_1) + expected_dirs_wo_trailing_slash.append(sub_dir_2) + + fs = AzureBlobFileSystem( + account_name=storage.account_name, connection_string=CONN_STR + ) + for blob in test_blobs: + fs.touch(blob) + + result_without_slash = fs.expand_path(test_dir, recursive=True) + assert sorted(result_without_slash) == sorted(expected_dirs_w_trailing_slash) + + result_with_slash = fs.expand_path(test_dir + "/", recursive=True) + assert sorted(result_with_slash) == sorted(expected_dirs_w_trailing_slash) + + result_glob = fs.expand_path(test_dir + "/*", recursive=True) + assert sorted(result_glob) == sorted(expected_dirs_wo_trailing_slash) From dc2c6670280c4a8a0eab076f5d5365f0d812c839 Mon Sep 17 00:00:00 2001 From: sgref Date: Wed, 6 Apr 2022 18:54:45 +0200 Subject: [PATCH 6/6] Fix upload if path is a dir (would wait endless) --- adlfs/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adlfs/spec.py b/adlfs/spec.py index f2e6b410..ff3e3f53 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1570,7 +1570,7 @@ async def _put_file( container_name, path = self.split_path(rpath, delimiter=delimiter) if os.path.isdir(lpath): - self.makedirs(rpath, exist_ok=True) + return else: try: with open(lpath, "rb") as f1: