diff --git a/.gitignore b/.gitignore index f92b4a7..c9674ec 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml *.py,cover .hypothesis/ .pytest_cache/ +!tests/notebooks/artifact_folder/__pycache__/ # Translations *.mo diff --git a/jupyter_cache/cache/main.py b/jupyter_cache/cache/main.py index deb9b83..b4cccfc 100644 --- a/jupyter_cache/cache/main.py +++ b/jupyter_cache/cache/main.py @@ -23,19 +23,33 @@ CACHE_LIMIT_KEY = "cache_limit" DEFAULT_CACHE_LIMIT = 1000 +ARTIFACT_SKIP_PATTERNS = ["__pycache__"] class NbArtifacts(NbArtifactsAbstract): """Container for artefacts of a notebook execution.""" - def __init__(self, paths: List[str], in_folder, check_existence=True): + def __init__( + self, + paths: List[str], + in_folder, + check_existence=True, + skip_patterns=ARTIFACT_SKIP_PATTERNS, + ): """Initiate NbArtifacts :param paths: list of paths :param check_existence: check the paths exist :param in_folder: The folder that all paths should be in (or subfolder). + :param skip_patterns: Exclude paths that contain one of these patterns :raises IOError: if check_existence and file does not exist """ + + def path_not_in_skip_artifacts(path): + # TODO: This should probably be a more-specific regex in the future + return all(pattern not in str(path) for pattern in skip_patterns) + + paths = list(filter(path_not_in_skip_artifacts, paths)) self.paths = [Path(p).absolute() for p in paths] self.in_folder = Path(in_folder).absolute() to_relative_paths(self.paths, self.in_folder, check_existence=check_existence) diff --git a/tests/notebooks/artifact_folder/__pycache__/empty.txt b/tests/notebooks/artifact_folder/__pycache__/empty.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cache.py b/tests/test_cache.py index d9d289b..d2eefbb 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -146,6 +146,32 @@ def test_artifacts(tmp_path): assert path.joinpath("artifact_folder").exists() +def test_artifacts_skip_patterns(tmp_path): + cache = JupyterCacheBase(str(tmp_path)) + with pytest.raises(IOError): + cache.cache_notebook_file( + path=os.path.join(NB_PATH, "basic.ipynb"), + uri="basic.ipynb", + artifacts=(os.path.join(NB_PATH),), + check_validity=False, + ) + cache.cache_notebook_file( + path=os.path.join(NB_PATH, "basic.ipynb"), + uri="basic.ipynb", + artifacts=( + os.path.join(NB_PATH, "artifact_folder", "artifact.txt"), + os.path.join(NB_PATH, "artifact_folder", "__pycache__"), + ), + check_validity=False, + ) + + # __pycache__ is ignored and not saved as artifact in cache + bundle = cache.get_cache_bundle(1) + assert {str(p) for p in bundle.artifacts.relative_paths} == { + "artifact_folder/artifact.txt" + } + + # jupyter_client/session.py:371: DeprecationWarning: # Session._key_changed is deprecated in traitlets: use @observe and @unobserve instead @pytest.mark.filterwarnings("ignore")