From e9d7427857b0bae9faec82740aa3c8f0ffdf5ed2 Mon Sep 17 00:00:00 2001 From: Chris Holdgraf Date: Wed, 13 May 2020 14:05:34 -0700 Subject: [PATCH 1/4] skipping artifact paths --- jupyter_cache/cache/main.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/jupyter_cache/cache/main.py b/jupyter_cache/cache/main.py index deb9b83..61be127 100644 --- a/jupyter_cache/cache/main.py +++ b/jupyter_cache/cache/main.py @@ -23,19 +23,32 @@ CACHE_LIMIT_KEY = "cache_limit" DEFAULT_CACHE_LIMIT = 1000 +ARTIFACT_SKIP_PATTERNS = ["__pycache__"] class NbArtifacts(NbArtifactsAbstract): """Container for artefacts of a notebook execution.""" - def __init__(self, paths: List[str], in_folder, check_existence=True): + def __init__( + self, + paths: List[str], + in_folder, + check_existence=True, + skip_patterns=ARTIFACT_SKIP_PATTERNS, + ): """Initiate NbArtifacts :param paths: list of paths :param check_existence: check the paths exist :param in_folder: The folder that all paths should be in (or subfolder). + :param skip_patterns: Exclude paths that contain one of these patterns :raises IOError: if check_existence and file does not exist """ + + def path_not_in_skip_artifacts(path): + return all(path != pattern for pattern in skip_patterns) + + paths = list(filter(path_not_in_skip_artifacts, paths)) self.paths = [Path(p).absolute() for p in paths] self.in_folder = Path(in_folder).absolute() to_relative_paths(self.paths, self.in_folder, check_existence=check_existence) From 0a56dd1d2f5901b7d7c9845895f90567e4c4a4b3 Mon Sep 17 00:00:00 2001 From: Aakash Gupta Date: Sat, 13 Jun 2020 01:12:28 +1000 Subject: [PATCH 2/4] added a test and some changes in artofacts skip test --- jupyter_cache/cache/main.py | 15 ++++++++++++++- tests/test_cache.py | 26 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/jupyter_cache/cache/main.py b/jupyter_cache/cache/main.py index deb9b83..6f086cc 100644 --- a/jupyter_cache/cache/main.py +++ b/jupyter_cache/cache/main.py @@ -23,19 +23,32 @@ CACHE_LIMIT_KEY = "cache_limit" DEFAULT_CACHE_LIMIT = 1000 +ARTIFACT_SKIP_PATTERNS = ["__pycache__"] class NbArtifacts(NbArtifactsAbstract): """Container for artefacts of a notebook execution.""" - def __init__(self, paths: List[str], in_folder, check_existence=True): + def __init__( + self, + paths: List[str], + in_folder, + check_existence=True, + skip_patterns=ARTIFACT_SKIP_PATTERNS, + ): """Initiate NbArtifacts :param paths: list of paths :param check_existence: check the paths exist :param in_folder: The folder that all paths should be in (or subfolder). + :param skip_patterns: Exclude paths that contain one of these patterns :raises IOError: if check_existence and file does not exist """ + + def path_not_in_skip_artifacts(path): + return all(pattern not in str(path) for pattern in skip_patterns) + + paths = list(filter(path_not_in_skip_artifacts, paths)) self.paths = [Path(p).absolute() for p in paths] self.in_folder = Path(in_folder).absolute() to_relative_paths(self.paths, self.in_folder, check_existence=check_existence) diff --git a/tests/test_cache.py b/tests/test_cache.py index d9d289b..d2eefbb 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -146,6 +146,32 @@ def test_artifacts(tmp_path): assert path.joinpath("artifact_folder").exists() +def test_artifacts_skip_patterns(tmp_path): + cache = JupyterCacheBase(str(tmp_path)) + with pytest.raises(IOError): + cache.cache_notebook_file( + path=os.path.join(NB_PATH, "basic.ipynb"), + uri="basic.ipynb", + artifacts=(os.path.join(NB_PATH),), + check_validity=False, + ) + cache.cache_notebook_file( + path=os.path.join(NB_PATH, "basic.ipynb"), + uri="basic.ipynb", + artifacts=( + os.path.join(NB_PATH, "artifact_folder", "artifact.txt"), + os.path.join(NB_PATH, "artifact_folder", "__pycache__"), + ), + check_validity=False, + ) + + # __pycache__ is ignored and not saved as artifact in cache + bundle = cache.get_cache_bundle(1) + assert {str(p) for p in bundle.artifacts.relative_paths} == { + "artifact_folder/artifact.txt" + } + + # jupyter_client/session.py:371: DeprecationWarning: # Session._key_changed is deprecated in traitlets: use @observe and @unobserve instead @pytest.mark.filterwarnings("ignore") From a18dccaf0def871677a78ca00c5a011fd6224a00 Mon Sep 17 00:00:00 2001 From: Aakash Gupta Date: Sat, 13 Jun 2020 01:21:38 +1000 Subject: [PATCH 3/4] modified .gitignore to allow __pycache__ in artifact_folder --- .gitignore | 1 + tests/notebooks/artifact_folder/__pycache__/empty.txt | 0 2 files changed, 1 insertion(+) create mode 100644 tests/notebooks/artifact_folder/__pycache__/empty.txt diff --git a/.gitignore b/.gitignore index f92b4a7..c9674ec 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ +!tests/notebooks/artifact_folder/__pycache__/ # Translations *.mo diff --git a/tests/notebooks/artifact_folder/__pycache__/empty.txt b/tests/notebooks/artifact_folder/__pycache__/empty.txt new file mode 100644 index 0000000..e69de29 From c2e50740d2feb57d022ed26c5d00119c999fc93c Mon Sep 17 00:00:00 2001 From: Chris Holdgraf Date: Fri, 12 Jun 2020 08:44:23 -0700 Subject: [PATCH 4/4] Update jupyter_cache/cache/main.py --- jupyter_cache/cache/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyter_cache/cache/main.py b/jupyter_cache/cache/main.py index 6f086cc..b4cccfc 100644 --- a/jupyter_cache/cache/main.py +++ b/jupyter_cache/cache/main.py @@ -46,6 +46,7 @@ def __init__( """ def path_not_in_skip_artifacts(path): + # TODO: This should probably be a more-specific regex in the future return all(pattern not in str(path) for pattern in skip_patterns) paths = list(filter(path_not_in_skip_artifacts, paths))