diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 96f8b16..9d78b21 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,6 +18,22 @@ Possible types of changes are: +Unreleased +---------- +Fixed +''''' +- Infinite loop in ``GCSFS.fix_storage()`` when the bucket contains blobs or + directories (with or without empty blob) on the same hierarchy level as + ``root_path`` and whose name also starts with the ``root_path`` directory + name. For example, having ``root_path = "gs://mybucket/abc"`` and these + blobs in the bucket: + + .. code-block:: + + gs://mybucket/abc/file.txt + gs://mybucket/abc123/file.txt + + 1.5.1 - 03.06.2022 ------------------ Fixed diff --git a/fs_gcsfs/_gcsfs.py b/fs_gcsfs/_gcsfs.py index 6c869bf..baac7a2 100644 --- a/fs_gcsfs/_gcsfs.py +++ b/fs_gcsfs/_gcsfs.py @@ -481,7 +481,7 @@ def fix_storage(self) -> None: # TODO test As GCS is no real file system but only a key-value store, there is also no concept of folders. S3FS and GCSFS overcome this limitation by adding empty files with the name "/" every time a directory is created, see https://fs-gcsfs.readthedocs.io/en/latest/#limitations. """ - names = [blob.name for blob in self.bucket.list_blobs(prefix=self.root_path)] + names = [blob.name for blob in self.client.list_blobs(self.bucket, prefix=forcedir(self.root_path))] marked_dirs = set() all_dirs = set() diff --git a/fs_gcsfs/tests/test_gcsfs.py b/fs_gcsfs/tests/test_gcsfs.py index c51dd77..77e91e8 100644 --- a/fs_gcsfs/tests/test_gcsfs.py +++ b/fs_gcsfs/tests/test_gcsfs.py @@ -117,15 +117,29 @@ def test_create_property_does_not_create_file_if_emptyish_root_path(root_path, c def test_fix_storage_adds_binary_blobs_with_empty_string_as_directory_marker(bucket, gcsfs): # Creating a 'nested' hierarchy of blobs without directory marker + content = b"Is this a test? It has to be. Otherwise I can't go on." for path in ["foo/test", "foo/bar/test", "foo/baz/test", "foo/bar/egg/test"]: key = gcsfs._path_to_key(path) blob = bucket.blob(key) - blob.upload_from_string(b"Is this a test? It has to be. Otherwise I can't go on.") + blob.upload_from_string(content) + + # Create a blob inside a directory that is sibling to `root_path` and whose path happens to be the same as the + # `root_path`, but with an added suffix. That directory should be left untouched, as it is not contained under + # `root_path`, despite sharing the same path prefix. + sibling_dir = gcsfs.root_path.rstrip("/") + "_suffix" + file_in_sibling_dir = f"{sibling_dir}/test" + bucket.blob(file_in_sibling_dir).upload_from_string(content) + gcsfs.fix_storage() + # Assert that empty blobs were created for directories in the hierarchy under `root_path` for path in ["", "foo", "foo/bar", "foo/baz", "foo/bar/egg"]: assert gcsfs.isdir(path) + # Assert that the sibling directory was left untouched + assert not bucket.blob(sibling_dir).exists() + assert bucket.blob(file_in_sibling_dir).exists() + def test_fix_storage_does_not_overwrite_existing_directory_markers_with_custom_content(bucket, gcsfs): for path in ["foo/test"]: