From bd322177f69509f3800976c46a179fd7cce5de73 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 13:10:14 +1100 Subject: [PATCH 1/8] Rename `commit` -> `cache` Also rename user-facing `PK` -> `ID` --- README.md | 94 ++++++------- jupyter_cache/base.py | 86 ++++++------ jupyter_cache/cache/__init__.py | 2 +- jupyter_cache/cache/db.py | 38 +++--- jupyter_cache/cache/main.py | 174 +++++++++++------------- jupyter_cache/cli/arguments.py | 4 +- jupyter_cache/cli/commands/cmd_cache.py | 112 ++++++++------- jupyter_cache/cli/options.py | 2 +- jupyter_cache/executors/base.py | 4 +- jupyter_cache/executors/basic.py | 6 +- tests/test_cache.py | 46 +++---- tests/test_cli.py | 38 +++--- tests/test_db.py | 12 +- 13 files changed, 303 insertions(+), 315 deletions(-) diff --git a/README.md b/README.md index 0083d6a..9b64470 100644 --- a/README.md +++ b/README.md @@ -55,72 +55,72 @@ Options: -h, --help Show this message and exit. Commands: - cat-artifact Print the contents of a commit artefact. - clear Clear the cache completely. - commit-limit Change the commit limit of the cache. - commit-nb Commit a notebook that has already been executed. - commit-nbs Commit notebook(s) that have already been executed. - diff-nb Print a diff of a notebook to one stored in the cache. - execute Execute outdated notebooks. - list-commits List committed notebook records in the cache. - list-staged List notebooks staged for possible execution. - remove-commits Remove notebook commit(s) from the cache. - show-commit Show details of a committed notebook in the cache. - show-staged Show details of a staged notebook. - stage-nb Commit a notebook, with possible assets. - stage-nbs Stage notebook(s) for execution. - unstage-nbs Unstage notebook(s) for execution. + cache-limit Change the maximum number of notebooks stored in the cache. + cache-nb Cache a notebook that has already been executed. + cache-nbs Cache notebook(s) that have already been executed. + cat-artifact Print the contents of a cached artefact. + clear Clear the cache completely. + diff-nb Print a diff of a notebook to one stored in the cache. + execute Execute outdated notebooks. + list-cached List cached notebook records in the cache. + list-staged List notebooks staged for possible execution. + remove-cached Remove notebooks stored in the cache. + show-cached Show details of a cached notebook in the cache. + show-staged Show details of a staged notebook. + stage-nb Cache a notebook, with possible assets. + stage-nbs Stage notebook(s) for execution. + unstage-nbs Unstage notebook(s) for execution. ``` -### Commit Executed Notebooks +### Caching Executed Notebooks -You can commit notebooks straight into the cache. When committing, a check will be made that the notebooks look to have been executed correctly, i.e. the cell execution counts go sequentially up from 1. +You can cache notebooks straight into the cache. When caching, a check will be made that the notebooks look to have been executed correctly, i.e. the cell execution counts go sequentially up from 1. ```console -$ jcache commit-nbs tests/notebooks/basic.ipynb +$ jcache cache-nbs tests/notebooks/basic.ipynb Cache path: /Users/cjs14/GitHub/sandbox/.jupyter_cache The cache does not yet exist, do you want to create it? [y/N]: y -Committing: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb +Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb Validity Error: Expected cell 1 to have execution_count 1 not 2 -The notebook may not have been executed, continue committing? [y/N]: y +The notebook may not have been executed, continue caching? [y/N]: y Success! ``` Or to skip validation: ```console -$ jcache commit-nbs --no-validate tests/notebooks/*.ipynb -Committing: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb -Committing: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_failing.ipynb -Committing: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_unrun.ipynb -Committing: /Users/cjs14/GitHub/sandbox/tests/notebooks/complex_outputs.ipynb +$ jcache cache-nbs --no-validate tests/notebooks/*.ipynb +Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb +Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_failing.ipynb +Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_unrun.ipynb +Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/complex_outputs.ipynb Success! ``` -Once you've committed some notebooks, you can look at the 'commit records' for what has been cached. +Once you've cached some notebooks, you can look at the 'cache records' for what has been cached. -Each notebook is hashed (code cells and kernel spec only), which is used to compare against 'staged' notebooks. Multiple hashes for the same URI can be added (the URI is just there for inspetion) and the size of the cache is limited (current default 1000) so that, at this size, the last accessed records begin to be deleted. You can remove cached records by the Primary Key (PK). +Each notebook is hashed (code cells and kernel spec only), which is used to compare against 'staged' notebooks. Multiple hashes for the same URI can be added (the URI is just there for inspetion) and the size of the cache is limited (current default 1000) so that, at this size, the last accessed records begin to be deleted. You can remove cached records by their ID. ```console -$ jcache list-commits --hashkeys - PK URI Created Accessed Hashkey +$ jcache list-cached --hashkeys + ID URI Created Accessed Hashkey ---- --------------------- ---------------- ---------------- -------------------------------- 4 complex_outputs.ipynb 2020-02-23 20:33 2020-02-23 20:33 800c4a057730a55a384cfe579e3850aa 3 basic_unrun.ipynb 2020-02-23 20:33 2020-02-23 20:33 818f3412b998fcf4fe9ca3cca11a3fc3 2 basic_failing.ipynb 2020-02-23 20:33 2020-02-23 20:33 72859c2bf1e12f35f30ef131f0bef320 ``` -You can also commit with artefacts (external outputs of the notebook execution). +You can also cache notebooks with artefacts (external outputs of the notebook execution). ```console -$ jcache commit-nb -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt -Committing: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb +$ jcache cache-nb -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt +Caching: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb Success! ``` ```console -$ jcache show-commit 1 -PK: 1 +$ jcache show-cached 1 +ID: 1 URI: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb Created: 2020-02-24 14:58 Accessed: 2020-02-24 14:58 @@ -138,23 +138,23 @@ An artifact These must be 'upstream' of the notebook folder: ```console -$ jcache commit-nb -nb tests/notebooks/basic.ipynb tests/test_db.py -Committing: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb +$ jcache cache-nb -nb tests/notebooks/basic.ipynb tests/test_db.py +Caching: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb Artifact Error: Path '/Users/cjs14/GitHub/jupyter-cache/tests/test_db.py' is not in folder '/Users/cjs14/GitHub/jupyter-cache/tests/notebooks'' ``` ```console -$ jcache remove-commits 3 -Removing PK = 3 +$ jcache remove-cached 3 +Removing Cache ID = 3 Success! ``` -You can also diff any of the commit records with any (external) notebook: +You can also diff any of the cached notebooks with any (external) notebook: ```console $ jcache diff-nb 2 tests/notebooks/basic.ipynb nbdiff ---- committed pk=2 +--- cached pk=2 +++ other: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb ## inserted before nb/cells/1: + code cell: @@ -193,7 +193,7 @@ Success! ```console $ jcache list-staged - PK URI Created Commit Pk + ID URI Created Cache ID ---- ------------------------------------- ---------------- ----------- 4 tests/notebooks/complex_outputs.ipynb 2020-02-23 20:48 4 3 tests/notebooks/basic_unrun.ipynb 2020-02-23 20:48 @@ -212,12 +212,12 @@ Success: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_unrun.ipynb Finished! ``` -Successfully executed notebooks will be committed to the cache, +Successfully executed notebooks will be cached to the cache, along with any 'artefacts' created by the execution, that are inside the notebook folder, and data supplied by the executor. ```console $ jcache list-staged - PK URI Created Commit Pk + ID URI Created Commit ID ---- ------------------------------------- ---------------- ----------- 5 tests/notebooks/basic.ipynb 2020-02-23 20:57 5 4 tests/notebooks/complex_outputs.ipynb 2020-02-23 20:48 4 @@ -226,8 +226,8 @@ $ jcache list-staged ``` ```console -jcache show-commit 5 -PK: 1 +jcache show-cached 5 +ID: 1 URI: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb Created: 2020-02-25 19:21 Accessed: 2020-02-25 19:21 @@ -255,14 +255,14 @@ Success! ```console $ jcache list-staged - PK URI Created Assets + ID URI Created Assets ---- --------------------------- ---------------- -------- 1 tests/notebooks/basic.ipynb 2020-02-25 10:01 1 ``` ```console $ jcache show-staged 1 -PK: 1 +ID: 1 URI: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb Created: 2020-02-25 10:01 Assets: diff --git a/jupyter_cache/base.py b/jupyter_cache/base.py index b810e35..a3a0e28 100644 --- a/jupyter_cache/base.py +++ b/jupyter_cache/base.py @@ -13,7 +13,7 @@ import nbformat as nbf # TODO make these abstract -from jupyter_cache.cache.db import NbCommitRecord, NbStageRecord +from jupyter_cache.cache.db import NbCacheRecord, NbStageRecord NB_VERSION = 4 @@ -27,7 +27,7 @@ class RetrievalError(Exception): class NbValidityError(Exception): - """Signals a notebook may not be valid to commit. + """Signals a notebook may not be valid to cache. For example, because it has not yet been executed. """ @@ -54,7 +54,7 @@ def __iter__(self) -> Iterable[Tuple[Path, io.BufferedReader]]: @attr.s(frozen=True, slots=True) class NbBundleIn: - """A container for notebooks and their associated data to commit.""" + """A container for notebooks and their associated data to cache.""" nb: nbf.NotebookNode = attr.ib( validator=instance_of(nbf.NotebookNode), metadata={"help": "the notebook"} @@ -77,12 +77,12 @@ class NbBundleIn: @attr.s(frozen=True, slots=True) class NbBundleOut: - """A container for notebooks and their associated data that have been committed.""" + """A container for notebooks and their associated data that have been cached.""" nb: nbf.NotebookNode = attr.ib( validator=instance_of(nbf.NotebookNode), metadata={"help": "the notebook"} ) - commit: NbCommitRecord = attr.ib(metadata={"help": "the commit record"}) + record: NbCacheRecord = attr.ib(metadata={"help": "the cache record"}) artifacts: Optional[NbArtifactsAbstract] = attr.ib( default=None, metadata={"help": "artifacts created during the notebook execution"}, @@ -98,23 +98,23 @@ def clear_cache(self): pass @abstractmethod - def commit_notebook_bundle( + def cache_notebook_bundle( self, bundle: NbBundleIn, check_validity: bool = True, overwrite: bool = False - ) -> NbCommitRecord: - """Commit an executed notebook, returning its commit record. + ) -> NbCacheRecord: + """Commit an executed notebook, returning its cache record. Note: non-code source text (e.g. markdown) is not stored in the cache. :param bundle: The notebook bundle :param check_validity: check that the notebook has been executed correctly, by asserting `execution_count`s are consecutive and start at 1. - :param overwrite: Allow overwrite of commit with matching hash - :return: The primary key of the commit + :param overwrite: Allow overwrite of cache with matching hash + :return: The primary key of the cache """ pass @abstractmethod - def commit_notebook_file( + def cache_notebook_file( self, path: str, uri: Optional[str] = None, @@ -122,64 +122,64 @@ def commit_notebook_file( data: Optional[dict] = None, check_validity: bool = True, overwrite: bool = False, - ) -> NbCommitRecord: - """Commit an executed notebook, returning its commit record. + ) -> NbCacheRecord: + """Commit an executed notebook, returning its cache record. Note: non-code source text (e.g. markdown) is not stored in the cache. :param path: path to the notebook - :param uri: alternative URI to store in the commit record (defaults to path) + :param uri: alternative URI to store in the cache record (defaults to path) :param artifacts: list of paths to outputs of the executed notebook. Artifacts must be in the same folder as the notebook (or a sub-folder) - :param data: additional, JSONable, data about the commit + :param data: additional, JSONable, data about the cache :param check_validity: check that the notebook has been executed correctly, by asserting `execution_count`s are consecutive and start at 1. - :param overwrite: Allow overwrite of commit with matching hash - :return: The primary key of the commit + :param overwrite: Allow overwrite of cache with matching hash + :return: The primary key of the cache """ pass @abstractmethod - def list_commit_records(self) -> List[NbCommitRecord]: - """Return a list of committed notebook records.""" + def list_cache_records(self) -> List[NbCacheRecord]: + """Return a list of cached notebook records.""" pass - def get_commit_record(self, pk: int) -> NbCommitRecord: - """Return the record of a commit, by its primary key""" + def get_cache_record(self, pk: int) -> NbCacheRecord: + """Return the record of a cache, by its primary key""" pass @abstractmethod - def get_commit_bundle(self, pk: int) -> NbBundleOut: + def get_cache_bundle(self, pk: int) -> NbBundleOut: """Return an executed notebook bundle, by its primary key""" pass @abstractmethod - def commit_artefacts_temppath(self, pk: int) -> Path: + def cache_artefacts_temppath(self, pk: int) -> Path: """Context manager to provide a temporary folder path to the notebook artifacts. Note this path is only guaranteed to exist within the scope of the context, and should only be used for read/copy operations:: - with cache.commit_artefacts_temppath(1) as path: + with cache.cache_artefacts_temppath(1) as path: shutil.copytree(path, destination) """ pass @abstractmethod - def match_commit_notebook(self, nb: nbf.NotebookNode) -> NbCommitRecord: + def match_cache_notebook(self, nb: nbf.NotebookNode) -> NbCacheRecord: """Match to an executed notebook, returning its primary key. :raises KeyError: if no match is found """ pass - def match_commit_file(self, path: str) -> NbCommitRecord: + def match_cache_file(self, path: str) -> NbCacheRecord: """Match to an executed notebook, returning its primary key. :raises KeyError: if no match is found """ notebook = nbf.read(path, NB_VERSION) - return self.match_commit_notebook(notebook) + return self.match_cache_notebook(notebook) @abstractmethod def merge_match_into_notebook( @@ -191,10 +191,10 @@ def merge_match_into_notebook( """Match to an executed notebook and return a merged version :param nb: The input notebook - :param nb_meta: metadata keys to merge from the commit (all if None) - :param cell_meta: cell metadata keys to merge from the commit (all if None) + :param nb_meta: metadata keys to merge from the cache (all if None) + :param cell_meta: cell metadata keys to merge from the cache (all if None) :raises KeyError: if no match is found - :return: pk, input notebook with committed code cells and metadata merged. + :return: pk, input notebook with cached code cells and metadata merged. """ pass @@ -207,33 +207,33 @@ def merge_match_into_file( """Match to an executed notebook and return a merged version :param path: The input notebook path - :param nb_meta: metadata keys to merge from the commit (all if None) - :param cell_meta: cell metadata keys to merge from the commit (all if None) + :param nb_meta: metadata keys to merge from the cache (all if None) + :param cell_meta: cell metadata keys to merge from the cache (all if None) :raises KeyError: if no match is found - :return: pk, input notebook with committed code cells and metadata merged. + :return: pk, input notebook with cached code cells and metadata merged. """ nb = nbf.read(path, NB_VERSION) return self.merge_match_into_notebook(nb, nb_meta, cell_meta) @abstractmethod - def diff_nbnode_with_commit( + def diff_nbnode_with_cache( self, pk: int, nb: nbf.NotebookNode, uri: str = "", as_str=False, **kwargs ) -> Union[str, dict]: - """Return a diff of a notebook to a committed one. + """Return a diff of a notebook to a cached one. Note: this will not diff markdown content, since it is not stored in the cache. """ pass - def diff_nbfile_with_commit( + def diff_nbfile_with_cache( self, pk: int, path: str, as_str=False, **kwargs ) -> Union[str, dict]: - """Return a diff of a notebook to a committed one. + """Return a diff of a notebook to a cached one. Note: this will not diff markdown content, since it is not stored in the cache. """ nb = nbf.read(path, NB_VERSION) - return self.diff_nbnode_with_commit(pk, nb, uri=path, as_str=as_str, **kwargs) + return self.diff_nbnode_with_cache(pk, nb, uri=path, as_str=as_str, **kwargs) @abstractmethod def stage_notebook_file(self, uri: str, assets: List[str] = ()) -> NbStageRecord: @@ -266,20 +266,20 @@ def get_staged_notebook(self, uri_or_pk: Union[int, str]) -> NbBundleIn: pass @abstractmethod - def get_commit_record_of_staged( + def get_cache_record_of_staged( self, uri_or_pk: Union[int, str] - ) -> Optional[NbCommitRecord]: + ) -> Optional[NbCacheRecord]: pass @abstractmethod def list_nbs_to_exec(self) -> List[NbStageRecord]: - """List staged notebooks, whose hash is not present in the cache commits.""" + """List staged notebooks, whose hash is not present in the cache.""" pass # removed until defined use case # @abstractmethod - # def get_commit_codecell(self, pk: int, index: int) -> nbf.NotebookNode: - # """Return a code cell from a committed notebook. + # def get_cache_codecell(self, pk: int, index: int) -> nbf.NotebookNode: + # """Return a code cell from a cached notebook. # NOTE: the index **only** refers to the list of code cells, e.g. # `[codecell_0, textcell_1, codecell_2]` diff --git a/jupyter_cache/cache/__init__.py b/jupyter_cache/cache/__init__.py index cb1f90a..6be4ace 100644 --- a/jupyter_cache/cache/__init__.py +++ b/jupyter_cache/cache/__init__.py @@ -1 +1 @@ -from .main import JupyterCacheBase, DEFAULT_COMMIT_LIMIT # noqa: F401 +from .main import JupyterCacheBase, DEFAULT_CACHE_LIMIT # noqa: F401 diff --git a/jupyter_cache/cache/db.py b/jupyter_cache/cache/db.py index 4e02a6a..e69152c 100644 --- a/jupyter_cache/cache/db.py +++ b/jupyter_cache/cache/db.py @@ -77,10 +77,10 @@ def get_dict(db: Engine) -> dict: return {k: v for k, v in results} -class NbCommitRecord(OrmBase): - """A record of an executed notebook commit.""" +class NbCacheRecord(OrmBase): + """A record of an executed notebook cache.""" - __tablename__ = "nbcommit" + __tablename__ = "nbcache" pk = Column(Integer(), primary_key=True) hashkey = Column(String(255), nullable=False, unique=True) @@ -96,9 +96,9 @@ def to_dict(self): return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} @staticmethod - def create_record(uri: str, hashkey: str, db: Engine, **kwargs) -> "NbCommitRecord": + def create_record(uri: str, hashkey: str, db: Engine, **kwargs) -> "NbCacheRecord": with session_context(db) as session: # type: Session - record = NbCommitRecord(hashkey=hashkey, uri=uri, **kwargs) + record = NbCacheRecord(hashkey=hashkey, uri=uri, **kwargs) session.add(record) try: session.commit() @@ -110,16 +110,16 @@ def create_record(uri: str, hashkey: str, db: Engine, **kwargs) -> "NbCommitReco def remove_records(pks: List[int], db: Engine): with session_context(db) as session: # type: Session - session.query(NbCommitRecord).filter(NbCommitRecord.pk.in_(pks)).delete( + session.query(NbCacheRecord).filter(NbCacheRecord.pk.in_(pks)).delete( synchronize_session=False ) session.commit() @staticmethod - def record_from_hashkey(hashkey: str, db: Engine) -> "NbCommitRecord": + def record_from_hashkey(hashkey: str, db: Engine) -> "NbCacheRecord": with session_context(db) as session: # type: Session result = ( - session.query(NbCommitRecord).filter_by(hashkey=hashkey).one_or_none() + session.query(NbCacheRecord).filter_by(hashkey=hashkey).one_or_none() ) if result is None: raise KeyError(hashkey) @@ -127,9 +127,9 @@ def record_from_hashkey(hashkey: str, db: Engine) -> "NbCommitRecord": return result @staticmethod - def record_from_pk(pk: int, db: Engine) -> "NbCommitRecord": + def record_from_pk(pk: int, db: Engine) -> "NbCacheRecord": with session_context(db) as session: # type: Session - result = session.query(NbCommitRecord).filter_by(pk=pk).one_or_none() + result = session.query(NbCacheRecord).filter_by(pk=pk).one_or_none() if result is None: raise KeyError(pk) session.expunge(result) @@ -138,7 +138,7 @@ def record_from_pk(pk: int, db: Engine) -> "NbCommitRecord": def touch(pk, db: Engine): """Touch a record, to change its last accessed time.""" with session_context(db) as session: # type: Session - record = session.query(NbCommitRecord).filter_by(pk=pk).one_or_none() + record = session.query(NbCacheRecord).filter_by(pk=pk).one_or_none() if record is None: raise KeyError(pk) record.accessed = datetime.utcnow() @@ -149,7 +149,7 @@ def touch_hashkey(hashkey, db: Engine): """Touch a record, to change its last accessed time.""" with session_context(db) as session: # type: Session record = ( - session.query(NbCommitRecord).filter_by(hashkey=hashkey).one_or_none() + session.query(NbCacheRecord).filter_by(hashkey=hashkey).one_or_none() ) if record is None: raise KeyError(hashkey) @@ -158,16 +158,16 @@ def touch_hashkey(hashkey, db: Engine): session.commit() @staticmethod - def records_from_uri(uri: str, db: Engine) -> "NbCommitRecord": + def records_from_uri(uri: str, db: Engine) -> "NbCacheRecord": with session_context(db) as session: # type: Session - results = session.query(NbCommitRecord).filter_by(uri=uri).all() + results = session.query(NbCacheRecord).filter_by(uri=uri).all() session.expunge_all() return results @staticmethod - def records_all(db: Engine) -> "NbCommitRecord": + def records_all(db: Engine) -> "NbCacheRecord": with session_context(db) as session: # type: Session - results = session.query(NbCommitRecord).all() + results = session.query(NbCacheRecord).all() session.expunge_all() return results @@ -176,15 +176,15 @@ def records_to_delete(keep: int, db: Engine) -> List[int]: with session_context(db) as session: # type: Session pks_to_keep = [ pk - for pk, in session.query(NbCommitRecord.pk) + for pk, in session.query(NbCacheRecord.pk) .order_by(desc("accessed")) .limit(keep) .all() ] pks_to_delete = [ pk - for pk, in session.query(NbCommitRecord.pk) - .filter(NbCommitRecord.pk.notin_(pks_to_keep)) + for pk, in session.query(NbCacheRecord.pk) + .filter(NbCacheRecord.pk.notin_(pks_to_keep)) .all() ] return pks_to_delete diff --git a/jupyter_cache/cache/main.py b/jupyter_cache/cache/main.py index 75a2df4..c9765a5 100644 --- a/jupyter_cache/cache/main.py +++ b/jupyter_cache/cache/main.py @@ -19,10 +19,10 @@ NbArtifactsAbstract, ) from jupyter_cache.utils import to_relative_paths -from .db import create_db, NbCommitRecord, NbStageRecord, Setting +from .db import create_db, NbCacheRecord, NbStageRecord, Setting -COMMIT_LIMIT_KEY = "commit_limit" -DEFAULT_COMMIT_LIMIT = 1000 +CACHE_LIMIT_KEY = "cache_limit" +DEFAULT_CACHE_LIMIT = 1000 class NbArtifacts(NbArtifactsAbstract): @@ -81,32 +81,30 @@ def clear_cache(self): shutil.rmtree(self.path) self._db = None - def _get_notebook_path_commit(self, hashkey, raise_on_missing=False) -> Path: + def _get_notebook_path_cache(self, hashkey, raise_on_missing=False) -> Path: """"Retrieve a relative path in the cache to a notebook, from its hash.""" path = self.path.joinpath(Path("executed", hashkey, "base.ipynb")) if not path.exists() and raise_on_missing: raise RetrievalError("hashkey not in cache: {}".format(hashkey)) return path - def _get_artifact_path_commit(self, hashkey) -> Path: + def _get_artifact_path_cache(self, hashkey) -> Path: """"Retrieve a relative path in the cache to a notebook, from its hash.""" path = self.path.joinpath(Path("executed", hashkey, "artifacts")) return path - def truncate_commits(self): - """If the number of commits exceeds the set limit, delete the oldest.""" - commit_limit = Setting.get_value( - COMMIT_LIMIT_KEY, self.db, DEFAULT_COMMIT_LIMIT - ) - # TODO you could have better control over this by e.g. tagging certain commits + def truncate_caches(self): + """If the number of cached notebooks exceeds set limit, delete the oldest.""" + cache_limit = Setting.get_value(CACHE_LIMIT_KEY, self.db, DEFAULT_CACHE_LIMIT) + # TODO you could have better control over this by e.g. tagging certain caches # that should not be deleted. - pks = NbCommitRecord.records_to_delete(commit_limit, self.db) + pks = NbCacheRecord.records_to_delete(cache_limit, self.db) for pk in pks: - self.remove_commit(pk) + self.remove_cache(pk) - def change_commit_limit(self, size: int): + def change_cache_limit(self, size: int): assert isinstance(size, int) and size > 0 - Setting.set_value(COMMIT_LIMIT_KEY, size, self.db) + Setting.set_value(CACHE_LIMIT_KEY, size, self.db) def _create_hashable_nb( self, @@ -158,7 +156,7 @@ def _hash_notebook( return hashlib.md5(string.encode()).hexdigest() def _validate_nb_bundle(self, nb_bundle: NbBundleIn): - """Validate that a notebook bundle should be committed. + """Validate that a notebook bundle should be cached. We check that the notebook has been executed correctly, by asserting `execution_count`s are consecutive and start at 1. @@ -178,7 +176,7 @@ def _validate_nb_bundle(self, nb_bundle: NbBundleIn): # TODO check for output exceptions? # TODO assets - def _prepare_nb_for_commit(self, nb: nbf.NotebookNode, deepcopy=False): + def _prepare_nb_for_cache(self, nb: nbf.NotebookNode, deepcopy=False): """Prepare in-place, we remove non-code cells. """ if deepcopy: @@ -186,31 +184,31 @@ def _prepare_nb_for_commit(self, nb: nbf.NotebookNode, deepcopy=False): nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"] return nb - def commit_notebook_bundle( + def cache_notebook_bundle( self, bundle: NbBundleIn, check_validity: bool = True, overwrite: bool = False, description="", - ) -> NbCommitRecord: - """Commit an executed notebook.""" + ) -> NbCacheRecord: + """Cache an executed notebook.""" # TODO it would be ideal to have some 'rollback' mechanism on exception if check_validity: self._validate_nb_bundle(bundle) hashkey = self._hash_notebook(bundle.nb) - path = self._get_notebook_path_commit(hashkey) + path = self._get_notebook_path_cache(hashkey) if path.exists(): if not overwrite: raise CachingError( "Notebook already exists in cache and overwrite=False." ) shutil.rmtree(path.parent) - record = NbCommitRecord.record_from_hashkey(hashkey, self.db) + record = NbCacheRecord.record_from_hashkey(hashkey, self.db) # TODO record should be changed rather than deleted? - NbCommitRecord.remove_records([record.pk], self.db) + NbCacheRecord.remove_records([record.pk], self.db) - record = NbCommitRecord.create_record( + record = NbCacheRecord.create_record( uri=bundle.uri, hashkey=hashkey, db=self.db, @@ -218,11 +216,11 @@ def commit_notebook_bundle( description=description, ) path.parent.mkdir(parents=True) - self._prepare_nb_for_commit(bundle.nb) + self._prepare_nb_for_cache(bundle.nb) path.write_text(nbf.writes(bundle.nb, NB_VERSION)) # write artifacts - artifact_folder = self._get_artifact_path_commit(hashkey) + artifact_folder = self._get_artifact_path_cache(hashkey) if artifact_folder.exists(): shutil.rmtree(artifact_folder) for rel_path, handle in bundle.artifacts or []: @@ -230,11 +228,11 @@ def commit_notebook_bundle( write_path.parent.mkdir(parents=True, exist_ok=True) write_path.write_bytes(handle.read()) - self.truncate_commits() + self.truncate_caches() return record - def commit_notebook_file( + def cache_notebook_file( self, path: str, uri: Optional[str] = None, @@ -242,23 +240,23 @@ def commit_notebook_file( data: Optional[dict] = None, check_validity: bool = True, overwrite: bool = False, - ) -> NbCommitRecord: - """Commit an executed notebook, returning its primary key. + ) -> NbCacheRecord: + """Cache an executed notebook, returning its primary key. Note: non-code source text (e.g. markdown) is not stored in the cache. :param path: path to the notebook - :param uri: alternative URI to store in the commit record (defaults to path) + :param uri: alternative URI to store in the cache record (defaults to path) :param artifacts: list of paths to outputs of the executed notebook. Artifacts must be in the same folder as the notebook (or a sub-folder) - :param data: additional, JSONable, data about the commit + :param data: additional, JSONable, data to store in the cache record :param check_validity: check that the notebook has been executed correctly, by asserting `execution_count`s are consecutive and start at 1. - :param overwrite: Allow overwrite of commit with matching hash - :return: The primary key of the commit + :param overwrite: Allow overwrite of cached notebooks with matching hash + :return: The primary key of the cache record """ notebook = nbf.read(path, NB_VERSION) - return self.commit_notebook_bundle( + return self.cache_notebook_bundle( NbBundleIn( notebook, uri or path, @@ -269,23 +267,23 @@ def commit_notebook_file( overwrite=overwrite, ) - def list_commit_records(self) -> List[NbCommitRecord]: - return NbCommitRecord.records_all(self.db) + def list_cache_records(self) -> List[NbCacheRecord]: + return NbCacheRecord.records_all(self.db) - def get_commit_record(self, pk: int) -> NbCommitRecord: - return NbCommitRecord.record_from_pk(pk, self.db) + def get_cache_record(self, pk: int) -> NbCacheRecord: + return NbCacheRecord.record_from_pk(pk, self.db) - def get_commit_bundle(self, pk: int) -> NbBundleOut: - record = NbCommitRecord.record_from_pk(pk, self.db) - NbCommitRecord.touch(pk, self.db) - path = self._get_notebook_path_commit(record.hashkey) - artifact_folder = self._get_artifact_path_commit(record.hashkey) + def get_cache_bundle(self, pk: int) -> NbBundleOut: + record = NbCacheRecord.record_from_pk(pk, self.db) + NbCacheRecord.touch(pk, self.db) + path = self._get_notebook_path_cache(record.hashkey) + artifact_folder = self._get_artifact_path_cache(record.hashkey) if not path.exists(): raise KeyError(pk) return NbBundleOut( nbf.reads(path.read_text(), NB_VERSION), - commit=record, + record=record, artifacts=NbArtifacts( [p for p in artifact_folder.glob("**/*") if p.is_file()], in_folder=artifact_folder, @@ -293,34 +291,34 @@ def get_commit_bundle(self, pk: int) -> NbBundleOut: ) @contextmanager - def commit_artefacts_temppath(self, pk: int) -> Path: + def cache_artefacts_temppath(self, pk: int) -> Path: """Context manager to provide a temporary folder path to the notebook artifacts. Note this path is only guaranteed to exist within the scope of the context, and should only be used for read/copy operations:: - with cache.commit_artefacts_temppath(1) as path: + with cache.cache_artefacts_temppath(1) as path: shutil.copytree(path, destination) """ - record = NbCommitRecord.record_from_pk(pk, self.db) - yield self._get_artifact_path_commit(record.hashkey) + record = NbCacheRecord.record_from_pk(pk, self.db) + yield self._get_artifact_path_cache(record.hashkey) - def remove_commit(self, pk: int): - record = NbCommitRecord.record_from_pk(pk, self.db) - path = self._get_notebook_path_commit(record.hashkey) + def remove_cache(self, pk: int): + record = NbCacheRecord.record_from_pk(pk, self.db) + path = self._get_notebook_path_cache(record.hashkey) if not path.exists(): raise KeyError(pk) shutil.rmtree(path.parent) - NbCommitRecord.remove_records([pk], self.db) + NbCacheRecord.remove_records([pk], self.db) - def match_commit_notebook(self, nb: nbf.NotebookNode) -> NbCommitRecord: + def match_cache_notebook(self, nb: nbf.NotebookNode) -> NbCacheRecord: """Match to an executed notebook, returning its primary key. :raises KeyError: if no match is found """ hashkey = self._hash_notebook(nb) - commit_record = NbCommitRecord.record_from_hashkey(hashkey, self.db) - return commit_record + cache_record = NbCacheRecord.record_from_hashkey(hashkey, self.db) + return cache_record def merge_match_into_notebook( self, @@ -331,58 +329,52 @@ def merge_match_into_notebook( """Match to an executed notebook and return a merged version :param nb: The input notebook - :param nb_meta: metadata keys to merge from the commit (all if None) - :param cell_meta: cell metadata keys to merge from the commit (all if None) + :param nb_meta: metadata keys to merge from the cached notebook (all if None) + :param cell_meta: cell metadata keys to merge from cached notebook (all if None) :raises KeyError: if no match is found - :return: pk, input notebook with committed code cells and metadata merged. + :return: pk, input notebook with cached code cells and metadata merged. """ - pk = self.match_commit_notebook(nb).pk - commit_nb = self.get_commit_bundle(pk).nb + pk = self.match_cache_notebook(nb).pk + cache_nb = self.get_cache_bundle(pk).nb nb = copy.deepcopy(nb) if nb_meta is None: - nb.metadata = commit_nb.metadata + nb.metadata = cache_nb.metadata else: for key in nb_meta: - if key in commit_nb: - nb.metadata[key] = commit_nb.metadata[key] + if key in cache_nb: + nb.metadata[key] = cache_nb.metadata[key] for idx in range(len(nb.cells)): if nb.cells[idx].cell_type == "code": - commit_cell = commit_nb.cells.pop(0) + cache_cell = cache_nb.cells.pop(0) if cell_meta is not None: - # update the input metadata with select commit metadata - # then add the input metadata to the commit cell + # update the input metadata with select cached notebook metadata + # then add the input metadata to the cached cell nb.cells[idx].metadata.update( - { - k: v - for k, v in commit_cell.metadata.items() - if k in cell_meta - } + {k: v for k, v in cache_cell.metadata.items() if k in cell_meta} ) - commit_cell.metadata = nb.cells[idx].metadata - nb.cells[idx] = commit_cell + cache_cell.metadata = nb.cells[idx].metadata + nb.cells[idx] = cache_cell return pk, nb - def diff_nbnode_with_commit( + def diff_nbnode_with_cache( self, pk: int, nb: nbf.NotebookNode, uri: str = "", as_str=False, **kwargs ): - """Return a diff of a notebook to a committed one. + """Return a diff of a notebook to a cached one. Note: this will not diff markdown content, since it is not stored in the cache. """ import nbdime from nbdime.prettyprint import pretty_print_diff, PrettyPrintConfig - committed_nb = self.get_commit_bundle(pk).nb - nb = self._prepare_nb_for_commit(nb, deepcopy=True) - diff = nbdime.diff_notebooks(committed_nb, nb) + cached_nb = self.get_cache_bundle(pk).nb + nb = self._prepare_nb_for_cache(nb, deepcopy=True) + diff = nbdime.diff_notebooks(cached_nb, nb) if not as_str: return diff stream = io.StringIO() - stream.writelines( - ["nbdiff\n", f"--- committed pk={pk}\n", f"+++ other: {uri}\n"] - ) + stream.writelines(["nbdiff\n", f"--- cached pk={pk}\n", f"+++ other: {uri}\n"]) pretty_print_diff( - committed_nb, diff, "nb", PrettyPrintConfig(out=stream, **kwargs) + cached_nb, diff, "nb", PrettyPrintConfig(out=stream, **kwargs) ) return stream.getvalue() @@ -423,9 +415,9 @@ def get_staged_notebook(self, uri_or_pk: Union[int, str]) -> NbBundleIn: notebook = nbf.read(uri_or_pk, NB_VERSION) return NbBundleIn(notebook, uri_or_pk) - def get_commit_record_of_staged( + def get_cache_record_of_staged( self, uri_or_pk: Union[int, str] - ) -> Optional[NbCommitRecord]: + ) -> Optional[NbCacheRecord]: if isinstance(uri_or_pk, int): record = NbStageRecord.record_from_pk(uri_or_pk, self.db) else: @@ -433,31 +425,31 @@ def get_commit_record_of_staged( nb = self.get_staged_notebook(record.uri).nb hashkey = self._hash_notebook(nb) try: - return NbCommitRecord.record_from_hashkey(hashkey, self.db) + return NbCacheRecord.record_from_hashkey(hashkey, self.db) except KeyError: return None def list_nbs_to_exec(self) -> List[NbStageRecord]: - """List staged notebooks, whose hash is not present in the cache commits.""" + """List staged notebooks, whose hash is not present in the cached notebooks.""" records = [] for record in self.list_staged_records(): nb = self.get_staged_notebook(record.uri).nb hashkey = self._hash_notebook(nb) try: - NbCommitRecord.record_from_hashkey(hashkey, self.db) + NbCacheRecord.record_from_hashkey(hashkey, self.db) except KeyError: records.append(record) return records # removed until defined use case - # def get_commit_codecell(self, pk: int, index: int) -> nbf.NotebookNode: - # """Return a code cell from a committed notebook. + # def get_cache_codecell(self, pk: int, index: int) -> nbf.NotebookNode: + # """Return a code cell from a cached notebook. # NOTE: the index **only** refers to the list of code cells, e.g. # `[codecell_0, textcell_1, codecell_2]` # would map {0: codecell_0, 1: codecell_2} # """ - # nb_bundle = self.get_commit_bundle(pk) + # nb_bundle = self.get_cache_bundle(pk) # _code_index = 0 # for cell in nb_bundle.nb.cells: # if cell.cell_type != "code": diff --git a/jupyter_cache/cli/arguments.py b/jupyter_cache/cli/arguments.py index 5ee0746..12ef22f 100644 --- a/jupyter_cache/cli/arguments.py +++ b/jupyter_cache/cli/arguments.py @@ -31,6 +31,6 @@ ) -PK = click.argument("pk", metavar="PK", type=int) +PK = click.argument("pk", metavar="ID", type=int) -PKS = click.argument("pks", metavar="PKs", nargs=-1, type=int) +PKS = click.argument("pks", metavar="IDs", nargs=-1, type=int) diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index cb7791c..4d08b28 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -34,19 +34,19 @@ def clear_cache(cache_path): click.secho("Cache cleared!", fg="green") -@jcache.command("commit-limit") +@jcache.command("cache-limit") @options.CACHE_PATH -@click.argument("limit", metavar="COMMIT_LIMIT", type=int) -def change_commit_limit(cache_path, limit): - """Change the commit limit of the cache.""" +@click.argument("limit", metavar="CACHE_LIMIT", type=int) +def change_cache_limit(cache_path, limit): + """Change the maximum number of notebooks stored in the cache.""" db = JupyterCacheBase(cache_path) - db.change_commit_limit(limit) - click.secho("Limit changed!", fg="green") + db.change_cache_limit(limit) + click.secho("Cache limit changed!", fg="green") -def format_commit_record(record, hashkeys, path_length): +def format_cache_record(record, hashkeys, path_length): data = { - "PK": record.pk, + "ID": record.pk, "URI": str(shorten_path(record.uri, path_length)), "Created": record.created.isoformat(" ", "minutes"), "Accessed": record.accessed.isoformat(" ", "minutes"), @@ -57,21 +57,21 @@ def format_commit_record(record, hashkeys, path_length): return data -@jcache.command("list-commits") +@jcache.command("list-cached") @options.CACHE_PATH @click.option("-h", "--hashkeys", is_flag=True, help="Whether to show hashkeys.") @options.PATH_LENGTH -def list_commits(cache_path, hashkeys, path_length): - """List committed notebook records in the cache.""" +def list_caches(cache_path, hashkeys, path_length): + """List cached notebook records in the cache.""" db = JupyterCacheBase(cache_path) - records = db.list_commit_records() + records = db.list_cache_records() if not records: - click.secho("No Commited Notebooks", fg="blue") + click.secho("No Cached Notebooks", fg="blue") # TODO optionally list number of artifacts click.echo( tabulate.tabulate( [ - format_commit_record(r, hashkeys, path_length) + format_cache_record(r, hashkeys, path_length) for r in sorted(records, key=lambda r: r.accessed, reverse=True) ], headers="keys", @@ -79,16 +79,16 @@ def list_commits(cache_path, hashkeys, path_length): ) -@jcache.command("show-commit") +@jcache.command("show-cached") @options.CACHE_PATH @arguments.PK -def show_commit(cache_path, pk): - """Show details of a committed notebook in the cache.""" +def show_cache(cache_path, pk): + """Show details of a cached notebook in the cache.""" db = JupyterCacheBase(cache_path) - record = db.get_commit_record(pk) - data = format_commit_record(record, True, None) + record = db.get_cache_record(pk) + data = format_cache_record(record, True, None) click.echo(yaml.safe_dump(data, sort_keys=False), nl=False) - with db.commit_artefacts_temppath(pk) as folder: + with db.cache_artefacts_temppath(pk) as folder: paths = [str(p.relative_to(folder)) for p in folder.glob("**/*") if p.is_file()] if not (paths or record.data): click.echo("") @@ -106,9 +106,9 @@ def show_commit(cache_path, pk): @arguments.PK @arguments.ARTIFACT_RPATH def cat_artifact(cache_path, pk, artifact_rpath): - """Print the contents of a commit artefact.""" + """Print the contents of a cached artefact.""" db = JupyterCacheBase(cache_path) - with db.commit_artefacts_temppath(pk) as path: + with db.cache_artefacts_temppath(pk) as path: artifact_path = path.joinpath(artifact_rpath) if not artifact_path.exists(): click.secho("Artifact does not exist", fg="red") @@ -120,10 +120,10 @@ def cat_artifact(cache_path, pk, artifact_rpath): click.echo(text) -def commit_file(db, nbpath, validate, overwrite, artifact_paths=()): - click.echo("Committing: {}".format(nbpath)) +def cache_file(db, nbpath, validate, overwrite, artifact_paths=()): + click.echo("Caching: {}".format(nbpath)) try: - db.commit_notebook_file( + db.cache_notebook_file( nbpath, artifacts=artifact_paths, check_validity=validate, @@ -132,11 +132,9 @@ def commit_file(db, nbpath, validate, overwrite, artifact_paths=()): except NbValidityError as error: click.secho("Validity Error: ", fg="red", nl=False) click.echo(str(error)) - if click.confirm( - "The notebook may not have been executed, continue committing?" - ): + if click.confirm("The notebook may not have been executed, continue caching?"): try: - db.commit_notebook_file( + db.cache_notebook_file( nbpath, artifacts=artifact_paths, check_validity=False, @@ -153,51 +151,51 @@ def commit_file(db, nbpath, validate, overwrite, artifact_paths=()): return True -@jcache.command("commit-nb") +@jcache.command("cache-nb") @arguments.ARTIFACT_PATHS @options.NB_PATH @options.CACHE_PATH @options.VALIDATE_NB -@options.OVERWRITE_COMMIT -def commit_nb(cache_path, artifact_paths, nbpath, validate, overwrite): - """Commit a notebook that has already been executed.""" +@options.OVERWRITE_CACHED +def cache_nb(cache_path, artifact_paths, nbpath, validate, overwrite): + """Cache a notebook that has already been executed.""" db = JupyterCacheBase(cache_path) - success = commit_file(db, nbpath, validate, overwrite, artifact_paths) + success = cache_file(db, nbpath, validate, overwrite, artifact_paths) if success: click.secho("Success!", fg="green") -@jcache.command("commit-nbs") +@jcache.command("cache-nbs") @arguments.NB_PATHS @options.CACHE_PATH @options.VALIDATE_NB -@options.OVERWRITE_COMMIT -def commit_nbs(cache_path, nbpaths, validate, overwrite): - """Commit notebook(s) that have already been executed.""" +@options.OVERWRITE_CACHED +def cache_nbs(cache_path, nbpaths, validate, overwrite): + """Cache notebook(s) that have already been executed.""" db = JupyterCacheBase(cache_path) success = True for nbpath in nbpaths: # TODO deal with errors (print all at end? or option to ignore) - if not commit_file(db, nbpath, validate, overwrite): + if not cache_file(db, nbpath, validate, overwrite): success = False if success: click.secho("Success!", fg="green") -@jcache.command("remove-commits") +@jcache.command("remove-cached") @arguments.PKS @options.CACHE_PATH @options.REMOVE_ALL -def remove_commits(cache_path, pks, remove_all): - """Remove notebook commit(s) from the cache.""" +def remove_caches(cache_path, pks, remove_all): + """Remove notebooks stored in the cache.""" db = JupyterCacheBase(cache_path) if remove_all: - pks = [r.pk for r in db.list_commit_records()] + pks = [r.pk for r in db.list_cache_records()] for pk in pks: # TODO deal with errors (print all at end? or option to ignore) - click.echo("Removing PK = {}".format(pk)) + click.echo("Removing Cache ID = {}".format(pk)) try: - db.remove_commit(pk) + db.remove_cache(pk) except KeyError: click.secho("Does not exist", fg="red") except CachingError as err: @@ -213,7 +211,7 @@ def remove_commits(cache_path, pks, remove_all): def diff_nb(cache_path, pk, nbpath): """Print a diff of a notebook to one stored in the cache.""" db = JupyterCacheBase(cache_path) - click.echo(db.diff_nbfile_with_commit(pk, nbpath, as_str=True)) + click.echo(db.diff_nbfile_with_cache(pk, nbpath, as_str=True)) click.secho("Success!", fg="green") @@ -235,7 +233,7 @@ def stage_nbs(cache_path, nbpaths): @options.NB_PATH @options.CACHE_PATH def stage_nb(cache_path, nbpath, asset_paths): - """Commit a notebook, with possible assets.""" + """Cache a notebook, with possible assets.""" db = JupyterCacheBase(cache_path) db.stage_notebook_file(nbpath, asset_paths) click.secho("Success!", fg="green") @@ -257,16 +255,16 @@ def unstage_nbs(cache_path, nbpaths, remove_all): click.secho("Success!", fg="green") -def format_staged_record(record, commit, path_length, assets=True): +def format_staged_record(record, cache_record, path_length, assets=True): data = { - "PK": record.pk, + "ID": record.pk, "URI": str(shorten_path(record.uri, path_length)), "Created": record.created.isoformat(" ", "minutes"), } if assets: data["Assets"] = len(record.assets) - if commit: - data["Commit Pk"] = commit.pk + if cache_record: + data["Cache ID"] = cache_record.pk return data @@ -276,7 +274,7 @@ def format_staged_record(record, commit, path_length, assets=True): "--compare/--no-compare", default=True, show_default=True, - help="Compare to committed notebooks (to find PK).", + help="Compare to cached notebooks (to find cache ID).", ) @options.PATH_LENGTH def list_staged(cache_path, compare, path_length): @@ -287,10 +285,10 @@ def list_staged(cache_path, compare, path_length): click.secho("No Staged Notebooks", fg="blue") rows = [] for record in sorted(records, key=lambda r: r.created, reverse=True): - commit = None + cache_record = None if compare: - commit = db.get_commit_record_of_staged(record.uri) - rows.append(format_staged_record(record, commit, path_length)) + cache_record = db.get_cache_record_of_staged(record.uri) + rows.append(format_staged_record(record, cache_record, path_length)) click.echo(tabulate.tabulate(rows, headers="keys")) @@ -301,8 +299,8 @@ def show_staged(cache_path, pk): """Show details of a staged notebook.""" db = JupyterCacheBase(cache_path) record = db.get_staged_record(pk) - commit = db.get_commit_record_of_staged(record.uri) - data = format_staged_record(record, commit, None, assets=False) + cache_record = db.get_cache_record_of_staged(record.uri) + data = format_staged_record(record, cache_record, None, assets=False) click.echo(yaml.safe_dump(data, sort_keys=False), nl=False) if not record.assets: click.echo("") diff --git a/jupyter_cache/cli/options.py b/jupyter_cache/cli/options.py index d61f5b8..fa685d8 100644 --- a/jupyter_cache/cli/options.py +++ b/jupyter_cache/cli/options.py @@ -94,7 +94,7 @@ def check_cache_exists(ctx, param, value): ) -OVERWRITE_COMMIT = click.option( +OVERWRITE_CACHED = click.option( "--overwrite/--no-overwrite", default=True, show_default=True, diff --git a/jupyter_cache/executors/base.py b/jupyter_cache/executors/base.py index db24621..e3864ec 100644 --- a/jupyter_cache/executors/base.py +++ b/jupyter_cache/executors/base.py @@ -6,7 +6,7 @@ from jupyter_cache.base import JupyterCacheAbstract # TODO abstact -from jupyter_cache.cache.db import NbCommitRecord +from jupyter_cache.cache.db import NbCacheRecord ENTRY_POINT_GROUP = "jupyter_executors" @@ -33,7 +33,7 @@ def logger(self): return self._logger @abstractmethod - def run(self, uri_filter: Optional[List[str]] = None) -> List[NbCommitRecord]: + def run(self, uri_filter: Optional[List[str]] = None) -> List[NbCacheRecord]: """Run execution, stage successfully executed notebooks and return their URIs Parameters diff --git a/jupyter_cache/executors/basic.py b/jupyter_cache/executors/basic.py index bc41b05..1873760 100644 --- a/jupyter_cache/executors/basic.py +++ b/jupyter_cache/executors/basic.py @@ -62,9 +62,9 @@ def run(self, uri_filter=None): data={"execution_seconds": timer.last_split}, ) try: - self.cache.commit_notebook_bundle(final_bundle, overwrite=True) + self.cache.cache_notebook_bundle(final_bundle, overwrite=True) except Exception: - self.logger.error("Failed Commit: {}".format(uri), exc_info=True) + self.logger.error("Failed Caching: {}".format(uri), exc_info=True) continue self.logger.info("Success: {}".format(uri)) @@ -76,7 +76,7 @@ def run(self, uri_filter=None): # TODO it would also be ideal to tag all notebooks # that were executed at the same time (just part of `data` or separate column?). # TODO maybe the status of success/failure could be stored on - # the stage record (commit_status=Enum('OK', 'FAILED', 'MISSING')) + # the stage record (cache_status=Enum('OK', 'FAILED', 'MISSING')) # also failed notebooks could be stored in the cache, which would be # accessed by stage pk (and would be deleted when removing the stage record) # see: https://python.quantecon.org/status.html diff --git a/tests/test_cache.py b/tests/test_cache.py index e6633c3..266a1b9 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -14,21 +14,21 @@ def test_basic_workflow(tmp_path): cache = JupyterCacheBase(str(tmp_path)) with pytest.raises(NbValidityError): - cache.commit_notebook_file(path=os.path.join(NB_PATH, "basic.ipynb")) - cache.commit_notebook_file( + cache.cache_notebook_file(path=os.path.join(NB_PATH, "basic.ipynb")) + cache.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), uri="basic.ipynb", check_validity=False, ) - assert cache.list_commit_records()[0].uri == "basic.ipynb" - pk = cache.match_commit_file(path=os.path.join(NB_PATH, "basic.ipynb")).pk - nb_bundle = cache.get_commit_bundle(pk) + assert cache.list_cache_records()[0].uri == "basic.ipynb" + pk = cache.match_cache_file(path=os.path.join(NB_PATH, "basic.ipynb")).pk + nb_bundle = cache.get_cache_bundle(pk) assert nb_bundle.nb.metadata["kernelspec"] == { "display_name": "Python 3", "language": "python", "name": "python3", } - assert set(nb_bundle.commit.to_dict().keys()) == { + assert set(nb_bundle.record.to_dict().keys()) == { "pk", "hashkey", "uri", @@ -37,14 +37,14 @@ def test_basic_workflow(tmp_path): "accessed", "description", } - # assert cache.get_commit_codecell(pk, 0).source == "a=1\nprint(a)" + # assert cache.get_cache_codecell(pk, 0).source == "a=1\nprint(a)" path = os.path.join(NB_PATH, "basic_failing.ipynb") - diff = cache.diff_nbfile_with_commit(pk, path, as_str=True, use_color=False) + diff = cache.diff_nbfile_with_cache(pk, path, as_str=True, use_color=False) assert diff == dedent( f"""\ nbdiff - --- committed pk=1 + --- cached pk=1 +++ other: {path} ## inserted before nb/cells/0: + code cell: @@ -66,10 +66,10 @@ def test_basic_workflow(tmp_path): """ ) - cache.remove_commit(pk) - assert cache.list_commit_records() == [] + cache.remove_cache(pk) + assert cache.list_cache_records() == [] - cache.commit_notebook_file( + cache.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), uri="basic.ipynb", check_validity=False, @@ -91,12 +91,12 @@ def test_basic_workflow(tmp_path): assert bundle.nb.metadata cache.clear_cache() - assert cache.list_commit_records() == [] + assert cache.list_cache_records() == [] def test_merge_match_into_notebook(tmp_path): cache = JupyterCacheBase(str(tmp_path)) - cache.commit_notebook_file( + cache.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), check_validity=False ) nb = nbf.read(os.path.join(NB_PATH, "basic_unrun.ipynb"), 4) @@ -113,19 +113,19 @@ def test_merge_match_into_notebook(tmp_path): def test_artifacts(tmp_path): cache = JupyterCacheBase(str(tmp_path)) with pytest.raises(IOError): - cache.commit_notebook_file( + cache.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), uri="basic.ipynb", artifacts=(os.path.join(NB_PATH),), check_validity=False, ) - cache.commit_notebook_file( + cache.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), uri="basic.ipynb", artifacts=(os.path.join(NB_PATH, "artifact_folder", "artifact.txt"),), check_validity=False, ) - hashkey = cache.get_commit_record(1).hashkey + hashkey = cache.get_cache_record(1).hashkey assert { str(p.relative_to(tmp_path)) for p in tmp_path.glob("**/*") if p.is_file() } == { @@ -134,7 +134,7 @@ def test_artifacts(tmp_path): f"executed/{hashkey}/artifacts/artifact_folder/artifact.txt", } - bundle = cache.get_commit_bundle(1) + bundle = cache.get_cache_bundle(1) assert {str(p) for p in bundle.artifacts.relative_paths} == { "artifact_folder/artifact.txt" } @@ -142,7 +142,7 @@ def test_artifacts(tmp_path): text = list(h.read().decode() for r, h in bundle.artifacts)[0] assert text.rstrip() == "An artifact" - with cache.commit_artefacts_temppath(1) as path: + with cache.cache_artefacts_temppath(1) as path: assert path.joinpath("artifact_folder").exists() @@ -161,8 +161,8 @@ def test_execution(tmp_path): os.path.join(NB_PATH, "basic_unrun.ipynb"), os.path.join(NB_PATH, "external_output.ipynb"), ] - assert len(db.list_commit_records()) == 2 - bundle = db.get_commit_bundle(1) + assert len(db.list_cache_records()) == 2 + bundle = db.get_cache_bundle(1) assert bundle.nb.cells[0] == { "cell_type": "code", "execution_count": 1, @@ -170,8 +170,8 @@ def test_execution(tmp_path): "outputs": [{"name": "stdout", "output_type": "stream", "text": "1\n"}], "source": "a=1\nprint(a)", } - assert "execution_seconds" in bundle.commit.data - with db.commit_artefacts_temppath(2) as path: + assert "execution_seconds" in bundle.record.data + with db.cache_artefacts_temppath(2) as path: paths = [str(p.relative_to(path)) for p in path.glob("**/*") if p.is_file()] assert paths == ["artifact.txt"] assert path.joinpath("artifact.txt").read_text() == "hi" diff --git a/tests/test_cli.py b/tests/test_cli.py index 8be4b27..c33bbe6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -25,32 +25,32 @@ def test_clear_cache(tmp_path): assert "Cache cleared!" in result.output.strip(), result.output -def test_list_commits(tmp_path): +def test_list_caches(tmp_path): db = JupyterCacheBase(str(tmp_path)) - db.commit_notebook_file( + db.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), uri="basic.ipynb", check_validity=False, ) runner = CliRunner() - result = runner.invoke(cmd_cache.list_commits, ["-p", tmp_path]) + result = runner.invoke(cmd_cache.list_caches, ["-p", tmp_path]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output -def test_commit_with_artifact(tmp_path): +def test_cache_with_artifact(tmp_path): JupyterCacheBase(str(tmp_path)) nb_path = os.path.join(NB_PATH, "basic.ipynb") a_path = os.path.join(NB_PATH, "artifact_folder", "artifact.txt") runner = CliRunner() result = runner.invoke( - cmd_cache.commit_nb, ["-p", tmp_path, "--no-validate", "-nb", nb_path, a_path] + cmd_cache.cache_nb, ["-p", tmp_path, "--no-validate", "-nb", nb_path, a_path] ) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output - result = runner.invoke(cmd_cache.show_commit, ["-p", tmp_path, "1"]) + result = runner.invoke(cmd_cache.show_cache, ["-p", tmp_path, "1"]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "- artifact_folder/artifact.txt" in result.output.strip(), result.output @@ -62,40 +62,38 @@ def test_commit_with_artifact(tmp_path): assert "An artifact" in result.output.strip(), result.output -def test_commit_nbs(tmp_path): +def test_cache_nbs(tmp_path): db = JupyterCacheBase(str(tmp_path)) path = os.path.join(NB_PATH, "basic.ipynb") runner = CliRunner() - result = runner.invoke( - cmd_cache.commit_nbs, ["-p", tmp_path, "--no-validate", path] - ) + result = runner.invoke(cmd_cache.cache_nbs, ["-p", tmp_path, "--no-validate", path]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output - assert db.list_commit_records()[0].uri == path + assert db.list_cache_records()[0].uri == path -def test_remove_commits(tmp_path): +def test_remove_caches(tmp_path): db = JupyterCacheBase(str(tmp_path)) - db.commit_notebook_file( + db.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), uri="basic.ipynb", check_validity=False, ) runner = CliRunner() - result = runner.invoke(cmd_cache.remove_commits, ["-p", tmp_path, "1"]) + result = runner.invoke(cmd_cache.remove_caches, ["-p", tmp_path, "1"]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "Success" in result.output.strip(), result.output - assert db.list_commit_records() == [] + assert db.list_cache_records() == [] def test_diff_nbs(tmp_path): db = JupyterCacheBase(str(tmp_path)) path = os.path.join(NB_PATH, "basic.ipynb") path2 = os.path.join(NB_PATH, "basic_failing.ipynb") - db.commit_notebook_file(path, check_validity=False) - # nb_bundle = db.get_commit_bundle(1) + db.cache_notebook_file(path, check_validity=False) + # nb_bundle = db.get_cache_bundle(1) # nb_bundle.nb.cells[0].source = "# New Title" # db.stage_notebook_bundle(nb_bundle) @@ -105,7 +103,7 @@ def test_diff_nbs(tmp_path): assert result.exit_code == 0, result.output print(result.output.splitlines()[2:]) assert result.output.splitlines()[1:] == [ - "--- committed pk=1", + "--- cached pk=1", f"+++ other: {path2}", "## inserted before nb/cells/0:", "+ code cell:", @@ -155,7 +153,7 @@ def test_unstage_nbs(tmp_path): def test_list_staged(tmp_path): db = JupyterCacheBase(str(tmp_path)) - db.commit_notebook_file( + db.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), check_validity=False ) db.stage_notebook_file(path=os.path.join(NB_PATH, "basic.ipynb")) @@ -170,7 +168,7 @@ def test_list_staged(tmp_path): def test_show_staged(tmp_path): db = JupyterCacheBase(str(tmp_path)) - db.commit_notebook_file( + db.cache_notebook_file( path=os.path.join(NB_PATH, "basic.ipynb"), check_validity=False ) db.stage_notebook_file(path=os.path.join(NB_PATH, "basic.ipynb")) diff --git a/tests/test_db.py b/tests/test_db.py index 2775c83..12b8bd1 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -1,6 +1,6 @@ import pytest -from jupyter_cache.cache.db import create_db, NbCommitRecord, Setting +from jupyter_cache.cache.db import create_db, NbCacheRecord, Setting def test_setting(tmp_path): @@ -12,10 +12,10 @@ def test_setting(tmp_path): def test_nb_record(tmp_path): db = create_db(tmp_path) - bundle = NbCommitRecord.create_record("a", "b", db) + bundle = NbCacheRecord.create_record("a", "b", db) assert bundle.hashkey == "b" with pytest.raises(ValueError): - NbCommitRecord.create_record("a", "b", db) - NbCommitRecord.create_record("a", "c", db, data="a") - assert NbCommitRecord.record_from_hashkey("b", db).uri == "a" - assert {b.hashkey for b in NbCommitRecord.records_from_uri("a", db)} == {"b", "c"} + NbCacheRecord.create_record("a", "b", db) + NbCacheRecord.create_record("a", "c", db, data="a") + assert NbCacheRecord.record_from_hashkey("b", db).uri == "a" + assert {b.hashkey for b in NbCacheRecord.records_from_uri("a", db)} == {"b", "c"} From 2a052f5a044c00bcb8b5b55e64b6d7fa04929685 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 13:21:41 +1100 Subject: [PATCH 2/8] ignore known warning --- tests/test_cache.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_cache.py b/tests/test_cache.py index 266a1b9..88d7a4f 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -146,6 +146,9 @@ def test_artifacts(tmp_path): assert path.joinpath("artifact_folder").exists() +# jupyter_client/session.py:371: DeprecationWarning: +# Session._key_changed is deprecated in traitlets: use @observe and @unobserve instead +@pytest.mark.filterwarnings("ignore") def test_execution(tmp_path): from jupyter_cache.executors import load_executor From ab198d4e9de1aae090b5b6c80eb4f37ecbc2f149 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 14:13:23 +1100 Subject: [PATCH 3/8] Move CLI commands to seperate groups --- jupyter_cache/cli/commands/__init__.py | 2 + jupyter_cache/cli/commands/cmd_cache.py | 142 ++--------------------- jupyter_cache/cli/commands/cmd_config.py | 21 ++++ jupyter_cache/cli/commands/cmd_exec.py | 2 +- jupyter_cache/cli/commands/cmd_main.py | 12 ++ jupyter_cache/cli/commands/cmd_stage.py | 114 ++++++++++++++++++ jupyter_cache/cli/utils.py | 10 ++ tests/test_cli.py | 14 +-- 8 files changed, 179 insertions(+), 138 deletions(-) create mode 100644 jupyter_cache/cli/commands/cmd_config.py create mode 100644 jupyter_cache/cli/commands/cmd_stage.py create mode 100644 jupyter_cache/cli/utils.py diff --git a/jupyter_cache/cli/commands/__init__.py b/jupyter_cache/cli/commands/__init__.py index cfb56f7..596b563 100644 --- a/jupyter_cache/cli/commands/__init__.py +++ b/jupyter_cache/cli/commands/__init__.py @@ -1,2 +1,4 @@ from .cmd_cache import * # noqa: F401,F403 +from .cmd_config import * # noqa: F401,F403 from .cmd_exec import * # noqa: F401,F403 +from .cmd_stage import * # noqa: F401,F403 diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index 4d08b28..0267ce5 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -1,4 +1,3 @@ -from pathlib import Path import sys import click @@ -7,6 +6,7 @@ from jupyter_cache.cli.commands.cmd_main import jcache from jupyter_cache.cli import arguments, options +from jupyter_cache.cli.utils import shorten_path from jupyter_cache.cache import JupyterCacheBase from jupyter_cache.base import ( # noqa: F401 CachingError, @@ -15,33 +15,10 @@ ) -def shorten_path(file_path, length): - """Split the path into separate parts, - select the last 'length' elements and join them again - """ - if length is None: - return Path(file_path) - return Path(*Path(file_path).parts[-length:]) - - -@jcache.command("clear") -@options.CACHE_PATH -def clear_cache(cache_path): - """Clear the cache completely.""" - db = JupyterCacheBase(cache_path) - click.confirm("Are you sure you want to permanently clear the cache!?", abort=True) - db.clear_cache() - click.secho("Cache cleared!", fg="green") - - -@jcache.command("cache-limit") -@options.CACHE_PATH -@click.argument("limit", metavar="CACHE_LIMIT", type=int) -def change_cache_limit(cache_path, limit): - """Change the maximum number of notebooks stored in the cache.""" - db = JupyterCacheBase(cache_path) - db.change_cache_limit(limit) - click.secho("Cache limit changed!", fg="green") +@jcache.group("cache") +def cmnd_cache(): + """Commands for adding to and inspecting the cache.""" + pass def format_cache_record(record, hashkeys, path_length): @@ -57,7 +34,7 @@ def format_cache_record(record, hashkeys, path_length): return data -@jcache.command("list-cached") +@cmnd_cache.command("list") @options.CACHE_PATH @click.option("-h", "--hashkeys", is_flag=True, help="Whether to show hashkeys.") @options.PATH_LENGTH @@ -79,7 +56,7 @@ def list_caches(cache_path, hashkeys, path_length): ) -@jcache.command("show-cached") +@cmnd_cache.command("show") @options.CACHE_PATH @arguments.PK def show_cache(cache_path, pk): @@ -101,7 +78,7 @@ def show_cache(cache_path, pk): click.echo(yaml.safe_dump({"Data": record.data})) -@jcache.command("cat-artifact") +@cmnd_cache.command("cat-artifact") @options.CACHE_PATH @arguments.PK @arguments.ARTIFACT_RPATH @@ -151,7 +128,7 @@ def cache_file(db, nbpath, validate, overwrite, artifact_paths=()): return True -@jcache.command("cache-nb") +@cmnd_cache.command("add-one") @arguments.ARTIFACT_PATHS @options.NB_PATH @options.CACHE_PATH @@ -165,7 +142,7 @@ def cache_nb(cache_path, artifact_paths, nbpath, validate, overwrite): click.secho("Success!", fg="green") -@jcache.command("cache-nbs") +@cmnd_cache.command("add-many") @arguments.NB_PATHS @options.CACHE_PATH @options.VALIDATE_NB @@ -182,7 +159,7 @@ def cache_nbs(cache_path, nbpaths, validate, overwrite): click.secho("Success!", fg="green") -@jcache.command("remove-cached") +@cmnd_cache.command("remove") @arguments.PKS @options.CACHE_PATH @options.REMOVE_ALL @@ -204,7 +181,7 @@ def remove_caches(cache_path, pks, remove_all): click.secho("Success!", fg="green") -@jcache.command("diff-nb") +@cmnd_cache.command("diff-nb") @arguments.PK @arguments.NB_PATH @options.CACHE_PATH @@ -213,98 +190,3 @@ def diff_nb(cache_path, pk, nbpath): db = JupyterCacheBase(cache_path) click.echo(db.diff_nbfile_with_cache(pk, nbpath, as_str=True)) click.secho("Success!", fg="green") - - -@jcache.command("stage-nbs") -@arguments.NB_PATHS -@options.CACHE_PATH -def stage_nbs(cache_path, nbpaths): - """Stage notebook(s) for execution.""" - db = JupyterCacheBase(cache_path) - for path in nbpaths: - # TODO deal with errors (print all at end? or option to ignore) - click.echo("Staging: {}".format(path)) - db.stage_notebook_file(path) - click.secho("Success!", fg="green") - - -@jcache.command("stage-nb") -@arguments.ASSET_PATHS -@options.NB_PATH -@options.CACHE_PATH -def stage_nb(cache_path, nbpath, asset_paths): - """Cache a notebook, with possible assets.""" - db = JupyterCacheBase(cache_path) - db.stage_notebook_file(nbpath, asset_paths) - click.secho("Success!", fg="green") - - -@jcache.command("unstage-nbs") -@arguments.NB_PATHS -@options.CACHE_PATH -@options.REMOVE_ALL -def unstage_nbs(cache_path, nbpaths, remove_all): - """Unstage notebook(s) for execution.""" - db = JupyterCacheBase(cache_path) - if remove_all: - nbpaths = [record.uri for record in db.list_staged_records()] - for path in nbpaths: - # TODO deal with errors (print all at end? or option to ignore) - click.echo("Unstaging: {}".format(path)) - db.discard_staged_notebook(path) - click.secho("Success!", fg="green") - - -def format_staged_record(record, cache_record, path_length, assets=True): - data = { - "ID": record.pk, - "URI": str(shorten_path(record.uri, path_length)), - "Created": record.created.isoformat(" ", "minutes"), - } - if assets: - data["Assets"] = len(record.assets) - if cache_record: - data["Cache ID"] = cache_record.pk - return data - - -@jcache.command("list-staged") -@options.CACHE_PATH -@click.option( - "--compare/--no-compare", - default=True, - show_default=True, - help="Compare to cached notebooks (to find cache ID).", -) -@options.PATH_LENGTH -def list_staged(cache_path, compare, path_length): - """List notebooks staged for possible execution.""" - db = JupyterCacheBase(cache_path) - records = db.list_staged_records() - if not records: - click.secho("No Staged Notebooks", fg="blue") - rows = [] - for record in sorted(records, key=lambda r: r.created, reverse=True): - cache_record = None - if compare: - cache_record = db.get_cache_record_of_staged(record.uri) - rows.append(format_staged_record(record, cache_record, path_length)) - click.echo(tabulate.tabulate(rows, headers="keys")) - - -@jcache.command("show-staged") -@options.CACHE_PATH -@arguments.PK -def show_staged(cache_path, pk): - """Show details of a staged notebook.""" - db = JupyterCacheBase(cache_path) - record = db.get_staged_record(pk) - cache_record = db.get_cache_record_of_staged(record.uri) - data = format_staged_record(record, cache_record, None, assets=False) - click.echo(yaml.safe_dump(data, sort_keys=False), nl=False) - if not record.assets: - click.echo("") - return - click.echo(f"Assets:") - for path in record.assets: - click.echo(f"- {path}") diff --git a/jupyter_cache/cli/commands/cmd_config.py b/jupyter_cache/cli/commands/cmd_config.py new file mode 100644 index 0000000..defa0cb --- /dev/null +++ b/jupyter_cache/cli/commands/cmd_config.py @@ -0,0 +1,21 @@ +import click + +from jupyter_cache.cache import JupyterCacheBase +from jupyter_cache.cli.commands.cmd_main import jcache +from jupyter_cache.cli import options + + +@jcache.group("config") +def cmnd_config(): + """Commands for configuring the cache.""" + pass + + +@cmnd_config.command("cache-limit") +@options.CACHE_PATH +@click.argument("limit", metavar="CACHE_LIMIT", type=int) +def change_cache_limit(cache_path, limit): + """Change the maximum number of notebooks stored in the cache.""" + db = JupyterCacheBase(cache_path) + db.change_cache_limit(limit) + click.secho("Cache limit changed!", fg="green") diff --git a/jupyter_cache/cli/commands/cmd_exec.py b/jupyter_cache/cli/commands/cmd_exec.py index c6a6ac1..a7b8c20 100644 --- a/jupyter_cache/cli/commands/cmd_exec.py +++ b/jupyter_cache/cli/commands/cmd_exec.py @@ -21,7 +21,7 @@ @options.EXEC_ENTRYPOINT @options.CACHE_PATH def execute_nbs(cache_path, entry_point): - """Execute outdated notebooks.""" + """Execute staged notebooks that are outdated.""" from jupyter_cache.executors import load_executor db = JupyterCacheBase(cache_path) diff --git a/jupyter_cache/cli/commands/cmd_main.py b/jupyter_cache/cli/commands/cmd_main.py index 4f309f2..c73417f 100644 --- a/jupyter_cache/cli/commands/cmd_main.py +++ b/jupyter_cache/cli/commands/cmd_main.py @@ -11,3 +11,15 @@ # @options.AUTOCOMPLETE # doesn't allow file path autocompletion def jcache(cache_path): """The command line interface of jupyter-cache.""" + + +@jcache.command("clear") +@options.CACHE_PATH +def clear_cache(cache_path): + """Clear the cache completely.""" + from jupyter_cache.cache import JupyterCacheBase + + db = JupyterCacheBase(cache_path) + click.confirm("Are you sure you want to permanently clear the cache!?", abort=True) + db.clear_cache() + click.secho("Cache cleared!", fg="green") diff --git a/jupyter_cache/cli/commands/cmd_stage.py b/jupyter_cache/cli/commands/cmd_stage.py new file mode 100644 index 0000000..1a8b41d --- /dev/null +++ b/jupyter_cache/cli/commands/cmd_stage.py @@ -0,0 +1,114 @@ +import click +import tabulate +import yaml + +from jupyter_cache.cli.commands.cmd_main import jcache +from jupyter_cache.cli import arguments, options +from jupyter_cache.cli.utils import shorten_path +from jupyter_cache.cache import JupyterCacheBase +from jupyter_cache.base import ( # noqa: F401 + CachingError, + RetrievalError, + NbValidityError, +) + + +@jcache.group("stage") +def cmnd_stage(): + """Commands for staging notebooks to be executed.""" + pass + + +@cmnd_stage.command("add-many") +@arguments.NB_PATHS +@options.CACHE_PATH +def stage_nbs(cache_path, nbpaths): + """Stage notebook(s) for execution.""" + db = JupyterCacheBase(cache_path) + for path in nbpaths: + # TODO deal with errors (print all at end? or option to ignore) + click.echo("Staging: {}".format(path)) + db.stage_notebook_file(path) + click.secho("Success!", fg="green") + + +@cmnd_stage.command("add-one") +@arguments.ASSET_PATHS +@options.NB_PATH +@options.CACHE_PATH +def stage_nb(cache_path, nbpath, asset_paths): + """Stage a notebook, with possible assets.""" + db = JupyterCacheBase(cache_path) + db.stage_notebook_file(nbpath, asset_paths) + click.secho("Success!", fg="green") + + +@cmnd_stage.command("remove") +@arguments.NB_PATHS +@options.CACHE_PATH +@options.REMOVE_ALL +def unstage_nbs(cache_path, nbpaths, remove_all): + """Unstage notebook(s) for execution.""" + db = JupyterCacheBase(cache_path) + if remove_all: + nbpaths = [record.uri for record in db.list_staged_records()] + for path in nbpaths: + # TODO deal with errors (print all at end? or option to ignore) + click.echo("Unstaging: {}".format(path)) + db.discard_staged_notebook(path) + click.secho("Success!", fg="green") + + +def format_staged_record(record, cache_record, path_length, assets=True): + data = { + "ID": record.pk, + "URI": str(shorten_path(record.uri, path_length)), + "Created": record.created.isoformat(" ", "minutes"), + } + if assets: + data["Assets"] = len(record.assets) + if cache_record: + data["Cache ID"] = cache_record.pk + return data + + +@cmnd_stage.command("list") +@options.CACHE_PATH +@click.option( + "--compare/--no-compare", + default=True, + show_default=True, + help="Compare to cached notebooks (to find cache ID).", +) +@options.PATH_LENGTH +def list_staged(cache_path, compare, path_length): + """List notebooks staged for possible execution.""" + db = JupyterCacheBase(cache_path) + records = db.list_staged_records() + if not records: + click.secho("No Staged Notebooks", fg="blue") + rows = [] + for record in sorted(records, key=lambda r: r.created, reverse=True): + cache_record = None + if compare: + cache_record = db.get_cache_record_of_staged(record.uri) + rows.append(format_staged_record(record, cache_record, path_length)) + click.echo(tabulate.tabulate(rows, headers="keys")) + + +@cmnd_stage.command("show") +@options.CACHE_PATH +@arguments.PK +def show_staged(cache_path, pk): + """Show details of a staged notebook.""" + db = JupyterCacheBase(cache_path) + record = db.get_staged_record(pk) + cache_record = db.get_cache_record_of_staged(record.uri) + data = format_staged_record(record, cache_record, None, assets=False) + click.echo(yaml.safe_dump(data, sort_keys=False), nl=False) + if not record.assets: + click.echo("") + return + click.echo(f"Assets:") + for path in record.assets: + click.echo(f"- {path}") diff --git a/jupyter_cache/cli/utils.py b/jupyter_cache/cli/utils.py new file mode 100644 index 0000000..9cedb54 --- /dev/null +++ b/jupyter_cache/cli/utils.py @@ -0,0 +1,10 @@ +from pathlib import Path + + +def shorten_path(file_path, length): + """Split the path into separate parts, + select the last 'length' elements and join them again + """ + if length is None: + return Path(file_path) + return Path(*Path(file_path).parts[-length:]) diff --git a/tests/test_cli.py b/tests/test_cli.py index c33bbe6..9207182 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,7 +3,7 @@ from click.testing import CliRunner from jupyter_cache.cache import JupyterCacheBase -from jupyter_cache.cli.commands import cmd_main, cmd_cache +from jupyter_cache.cli.commands import cmd_main, cmd_cache, cmd_stage NB_PATH = os.path.join(os.path.realpath(os.path.dirname(__file__)), "notebooks") @@ -19,7 +19,7 @@ def test_base(): def test_clear_cache(tmp_path): JupyterCacheBase(str(tmp_path)) runner = CliRunner() - result = runner.invoke(cmd_cache.clear_cache, ["-p", tmp_path], input="y") + result = runner.invoke(cmd_main.clear_cache, ["-p", tmp_path], input="y") assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "Cache cleared!" in result.output.strip(), result.output @@ -132,7 +132,7 @@ def test_stage_nbs(tmp_path): db = JupyterCacheBase(str(tmp_path)) path = os.path.join(NB_PATH, "basic.ipynb") runner = CliRunner() - result = runner.invoke(cmd_cache.stage_nbs, ["-p", tmp_path, path]) + result = runner.invoke(cmd_stage.stage_nbs, ["-p", tmp_path, path]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output @@ -143,8 +143,8 @@ def test_unstage_nbs(tmp_path): db = JupyterCacheBase(str(tmp_path)) path = os.path.join(NB_PATH, "basic.ipynb") runner = CliRunner() - result = runner.invoke(cmd_cache.stage_nbs, ["-p", tmp_path, path]) - result = runner.invoke(cmd_cache.unstage_nbs, ["-p", tmp_path, path]) + result = runner.invoke(cmd_stage.stage_nbs, ["-p", tmp_path, path]) + result = runner.invoke(cmd_stage.unstage_nbs, ["-p", tmp_path, path]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output @@ -160,7 +160,7 @@ def test_list_staged(tmp_path): db.stage_notebook_file(path=os.path.join(NB_PATH, "basic_failing.ipynb")) runner = CliRunner() - result = runner.invoke(cmd_cache.list_staged, ["-p", tmp_path]) + result = runner.invoke(cmd_stage.list_staged, ["-p", tmp_path]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output @@ -174,7 +174,7 @@ def test_show_staged(tmp_path): db.stage_notebook_file(path=os.path.join(NB_PATH, "basic.ipynb")) runner = CliRunner() - result = runner.invoke(cmd_cache.show_staged, ["-p", tmp_path, "1"]) + result = runner.invoke(cmd_stage.show_staged, ["-p", tmp_path, "1"]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output From d57974b7c11e9a72f7e2947719b023752b56bd8a Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 14:49:12 +1100 Subject: [PATCH 4/8] Update README.md --- README.md | 225 +++++++++++++++--------- jupyter_cache/cli/commands/cmd_cache.py | 6 +- jupyter_cache/cli/commands/cmd_stage.py | 30 +++- tests/test_cli.py | 2 +- 4 files changed, 171 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index 9b64470..068a2ab 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ to come ... ## Example CLI usage -From checked-out folder: +From checked-out repository folder: ```console $ jcache -h @@ -55,32 +55,41 @@ Options: -h, --help Show this message and exit. Commands: - cache-limit Change the maximum number of notebooks stored in the cache. - cache-nb Cache a notebook that has already been executed. - cache-nbs Cache notebook(s) that have already been executed. - cat-artifact Print the contents of a cached artefact. - clear Clear the cache completely. - diff-nb Print a diff of a notebook to one stored in the cache. - execute Execute outdated notebooks. - list-cached List cached notebook records in the cache. - list-staged List notebooks staged for possible execution. - remove-cached Remove notebooks stored in the cache. - show-cached Show details of a cached notebook in the cache. - show-staged Show details of a staged notebook. - stage-nb Cache a notebook, with possible assets. - stage-nbs Stage notebook(s) for execution. - unstage-nbs Unstage notebook(s) for execution. + cache Commands for adding to and inspecting the cache. + clear Clear the cache completely. + config Commands for configuring the cache. + execute Execute staged notebooks that are outdated. + stage Commands for staging notebooks to be executed. ``` ### Caching Executed Notebooks -You can cache notebooks straight into the cache. When caching, a check will be made that the notebooks look to have been executed correctly, i.e. the cell execution counts go sequentially up from 1. +```console +$ jcache cache -h +Usage: jcache cache [OPTIONS] COMMAND [ARGS]... + + Commands for adding to and inspecting the cache. + +Options: + -h, --help Show this message and exit. + +Commands: + add-many Cache notebook(s) that have already been executed. + add-one Cache a notebook that has already been executed. + cat-artifact Print the contents of a cached artefact. + diff-nb Print a diff of a notebook to one stored in the cache. + list List cached notebook records in the cache. + remove Remove notebooks stored in the cache. + show Show details of a cached notebook in the cache. +``` + +You can add notebooks straight into the cache. When caching, a check will be made that the notebooks look to have been executed correctly, i.e. the cell execution counts go sequentially up from 1. ```console -$ jcache cache-nbs tests/notebooks/basic.ipynb -Cache path: /Users/cjs14/GitHub/sandbox/.jupyter_cache +$ jcache cache add-many tests/notebooks/basic.ipynb +Cache path: jupyter-cache/.jupyter_cache The cache does not yet exist, do you want to create it? [y/N]: y -Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb +Caching: jupyter-cache/tests/notebooks/basic.ipynb Validity Error: Expected cell 1 to have execution_count 1 not 2 The notebook may not have been executed, continue caching? [y/N]: y Success! @@ -89,11 +98,12 @@ Success! Or to skip validation: ```console -$ jcache cache-nbs --no-validate tests/notebooks/*.ipynb -Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb -Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_failing.ipynb -Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_unrun.ipynb -Caching: /Users/cjs14/GitHub/sandbox/tests/notebooks/complex_outputs.ipynb +jcache cache add-many --no-validate tests/notebooks/*.ipynb +Caching: jupyter-cache/tests/notebooks/basic.ipynb +Caching: jupyter-cache/tests/notebooks/basic_failing.ipynb +Caching: jupyter-cache/tests/notebooks/basic_unrun.ipynb +Caching: jupyter-cache/tests/notebooks/complex_outputs.ipynb +Caching: jupyter-cache/tests/notebooks/external_output.ipynb Success! ``` @@ -102,60 +112,67 @@ Once you've cached some notebooks, you can look at the 'cache records' for what Each notebook is hashed (code cells and kernel spec only), which is used to compare against 'staged' notebooks. Multiple hashes for the same URI can be added (the URI is just there for inspetion) and the size of the cache is limited (current default 1000) so that, at this size, the last accessed records begin to be deleted. You can remove cached records by their ID. ```console -$ jcache list-cached --hashkeys - ID URI Created Accessed Hashkey ----- --------------------- ---------------- ---------------- -------------------------------- - 4 complex_outputs.ipynb 2020-02-23 20:33 2020-02-23 20:33 800c4a057730a55a384cfe579e3850aa - 3 basic_unrun.ipynb 2020-02-23 20:33 2020-02-23 20:33 818f3412b998fcf4fe9ca3cca11a3fc3 - 2 basic_failing.ipynb 2020-02-23 20:33 2020-02-23 20:33 72859c2bf1e12f35f30ef131f0bef320 +$ jcache cache list + ID URI Created Accessed +---- ------------------------------------- ---------------- ---------------- + 5 tests/notebooks/external_output.ipynb 2020-02-29 03:17 2020-02-29 03:17 + 4 tests/notebooks/complex_outputs.ipynb 2020-02-29 03:17 2020-02-29 03:17 + 3 tests/notebooks/basic_unrun.ipynb 2020-02-29 03:17 2020-02-29 03:17 + 2 tests/notebooks/basic_failing.ipynb 2020-02-29 03:17 2020-02-29 03:17 ``` You can also cache notebooks with artefacts (external outputs of the notebook execution). ```console -$ jcache cache-nb -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt -Caching: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb +$ jcache cache add-one -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt +Caching: jupyter-cache/tests/notebooks/basic.ipynb Success! ``` +Show a full description of a cached notebook by referring to its ID + ```console -$ jcache show-cached 1 -ID: 1 -URI: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb -Created: 2020-02-24 14:58 -Accessed: 2020-02-24 14:58 +$ jcache cache show 6 +ID: 6 +URI: jupyter-cache/tests/notebooks/basic.ipynb +Created: 2020-02-29 03:19 +Accessed: 2020-02-29 03:19 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Artifacts: - artifact_folder/artifact.txt ``` -```console -$ jcache cat-artifact 1 artifact_folder/artifact.txt -An artifact +Note artefact paths must be 'upstream' of the notebook folder: +```console +$ jcache cache add-one -nb tests/notebooks/basic.ipynb tests/test_db.py +Caching: jupyter-cache/tests/notebooks/basic.ipynb +Artifact Error: Path 'jupyter-cache/tests/test_db.py' is not in folder 'jupyter-cache/tests/notebooks'' ``` -These must be 'upstream' of the notebook folder: +To view the contents of an execution artefact: ```console -$ jcache cache-nb -nb tests/notebooks/basic.ipynb tests/test_db.py -Caching: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb -Artifact Error: Path '/Users/cjs14/GitHub/jupyter-cache/tests/test_db.py' is not in folder '/Users/cjs14/GitHub/jupyter-cache/tests/notebooks'' +$ jcache cache cat-artifact 1 artifact_folder/artifact.txt +An artifact + ``` +You can directly remove a cached notebook by its ID: + ```console -$ jcache remove-cached 3 -Removing Cache ID = 3 +$ jcache cache remove 4 +Removing Cache ID = 4 Success! ``` You can also diff any of the cached notebooks with any (external) notebook: ```console -$ jcache diff-nb 2 tests/notebooks/basic.ipynb +$ jcache cache diff-nb 2 tests/notebooks/basic.ipynb nbdiff --- cached pk=2 -+++ other: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb ++++ other: sandbox/tests/notebooks/basic.ipynb ## inserted before nb/cells/1: + code cell: + execution_count: 2 @@ -177,38 +194,71 @@ nbdiff ### Staging Notebooks for execution +```console +$ jcache stage -h +Usage: jcache stage [OPTIONS] COMMAND [ARGS]... + + Commands for staging notebooks to be executed. + +Options: + -h, --help Show this message and exit. + +Commands: + add-many Stage notebook(s) for execution. + add-one Stage a notebook, with possible assets. + list List notebooks staged for possible execution. + remove-ids Un-stage notebook(s), by ID. + remove-uris Un-stage notebook(s), by URI. + show Show details of a staged notebook. +``` + Staged notebooks are recorded as pointers to their URI, i.e. no physical copying takes place until execution time. -If you stage some notebooks for execution, then you can list them to see which have existing records in the cache (by hash) and which will require execution: +If you stage some notebooks for execution, +then you can list them to see which have existing records in the cache (by hash), +and which will require execution: ```console -$ jcache stage-nbs tests/notebooks/*.ipynb -Staging: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb -Staging: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_failing.ipynb -Staging: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_unrun.ipynb -Staging: /Users/cjs14/GitHub/sandbox/tests/notebooks/complex_outputs.ipynb +$ jcache stage add-many tests/notebooks/*.ipynb +Staging: jupyter-cache/tests/notebooks/basic.ipynb +Staging: jupyter-cache/tests/notebooks/basic_failing.ipynb +Staging: jupyter-cache/tests/notebooks/basic_unrun.ipynb +Staging: jupyter-cache/tests/notebooks/complex_outputs.ipynb +Staging: jupyter-cache/tests/notebooks/external_output.ipynb Success! ``` ```console -$ jcache list-staged - ID URI Created Cache ID ----- ------------------------------------- ---------------- ----------- - 4 tests/notebooks/complex_outputs.ipynb 2020-02-23 20:48 4 - 3 tests/notebooks/basic_unrun.ipynb 2020-02-23 20:48 - 2 tests/notebooks/basic_failing.ipynb 2020-02-23 20:48 2 - 1 tests/notebooks/basic.ipynb 2020-02-23 20:48 +$ jcache stage list + ID URI Created Assets Cache ID +---- ------------------------------------- ---------------- -------- ---------- + 5 tests/notebooks/external_output.ipynb 2020-02-29 03:29 0 5 + 4 tests/notebooks/complex_outputs.ipynb 2020-02-29 03:29 0 + 3 tests/notebooks/basic_unrun.ipynb 2020-02-29 03:29 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-02-29 03:29 0 2 + 1 tests/notebooks/basic.ipynb 2020-02-29 03:29 0 6 +``` + +You can remove a staged notebook by its URI or ID: + +```console +$ jcache stage remove-ids 4 +Unstaging ID: 4 +Success! ``` You can then run a basic execution of the required notebooks: ```console +$ jcache cache remove 6 +Removing Cache ID = 6 +Success! $ jcache execute -Executing: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb -Success: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic.ipynb -Executing: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_unrun.ipynb -Success: /Users/cjs14/GitHub/sandbox/tests/notebooks/basic_unrun.ipynb +Executing: jupyter-cache/tests/notebooks/basic.ipynb +Success: jupyter-cache/tests/notebooks/basic.ipynb +Executing: jupyter-cache/tests/notebooks/basic_unrun.ipynb +Success: jupyter-cache/tests/notebooks/basic_unrun.ipynb Finished! ``` @@ -216,32 +266,35 @@ Successfully executed notebooks will be cached to the cache, along with any 'artefacts' created by the execution, that are inside the notebook folder, and data supplied by the executor. ```console -$ jcache list-staged - ID URI Created Commit ID ----- ------------------------------------- ---------------- ----------- - 5 tests/notebooks/basic.ipynb 2020-02-23 20:57 5 - 4 tests/notebooks/complex_outputs.ipynb 2020-02-23 20:48 4 - 3 tests/notebooks/basic_unrun.ipynb 2020-02-23 20:48 6 - 2 tests/notebooks/basic_failing.ipynb 2020-02-23 20:48 2 +$ jcache stage list + ID URI Created Assets Cache ID +---- ------------------------------------- ---------------- -------- ---------- + 5 tests/notebooks/external_output.ipynb 2020-02-29 03:29 0 5 + 3 tests/notebooks/basic_unrun.ipynb 2020-02-29 03:29 0 6 + 2 tests/notebooks/basic_failing.ipynb 2020-02-29 03:29 0 2 + 1 tests/notebooks/basic.ipynb 2020-02-29 03:29 0 6 ``` ```console -jcache show-cached 5 -ID: 1 -URI: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb -Created: 2020-02-25 19:21 -Accessed: 2020-02-25 19:21 +$ jcache cache show 6 +ID: 6 +URI: jupyter-cache/tests/notebooks/basic_unrun.ipynb +Created: 2020-02-29 03:41 +Accessed: 2020-02-29 03:41 Hashkey: 818f3412b998fcf4fe9ca3cca11a3fc3 Data: - execution_seconds: 1.4187269599999999 + execution_seconds: 1.2328746560000003 ``` Once executed you may leave staged notebooks, for later re-execution, or remove them: ```console -$ jcache unstage-nbs --all +$ jcache stage remove-ids --all Are you sure you want to remove all? [y/N]: y -Unstaging: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb +Unstaging ID: 1 +Unstaging ID: 2 +Unstaging ID: 3 +Unstaging ID: 5 Success! ``` @@ -249,24 +302,24 @@ You can also stage notebooks with assets; external files that are required by th these files must be in the same folder as the notebook, or a sub-folder. ```console -$ jcache stage-nb -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt +$ jcache stage add-one -nb tests/notebooks/basic.ipynb tests/notebooks/artifact_folder/artifact.txt Success! ``` ```console -$ jcache list-staged +$ jcache stage list ID URI Created Assets ---- --------------------------- ---------------- -------- 1 tests/notebooks/basic.ipynb 2020-02-25 10:01 1 ``` ```console -$ jcache show-staged 1 +$ jcache stage show 1 ID: 1 -URI: /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/basic.ipynb +URI: jupyter-cache/tests/notebooks/basic.ipynb Created: 2020-02-25 10:01 Assets: -- /Users/cjs14/GitHub/jupyter-cache/tests/notebooks/artifact_folder/artifact.txt +- jupyter-cache/tests/notebooks/artifact_folder/artifact.txt ``` ## Contributing diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index 0267ce5..20bb40c 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -62,7 +62,11 @@ def list_caches(cache_path, hashkeys, path_length): def show_cache(cache_path, pk): """Show details of a cached notebook in the cache.""" db = JupyterCacheBase(cache_path) - record = db.get_cache_record(pk) + try: + record = db.get_cache_record(pk) + except KeyError: + click.secho("ID {} does not exist, Aborting!".format(pk), fg="red") + sys.exit(1) data = format_cache_record(record, True, None) click.echo(yaml.safe_dump(data, sort_keys=False), nl=False) with db.cache_artefacts_temppath(pk) as folder: diff --git a/jupyter_cache/cli/commands/cmd_stage.py b/jupyter_cache/cli/commands/cmd_stage.py index 1a8b41d..c7e5519 100644 --- a/jupyter_cache/cli/commands/cmd_stage.py +++ b/jupyter_cache/cli/commands/cmd_stage.py @@ -1,3 +1,5 @@ +import sys + import click import tabulate import yaml @@ -43,12 +45,12 @@ def stage_nb(cache_path, nbpath, asset_paths): click.secho("Success!", fg="green") -@cmnd_stage.command("remove") +@cmnd_stage.command("remove-uris") @arguments.NB_PATHS @options.CACHE_PATH @options.REMOVE_ALL -def unstage_nbs(cache_path, nbpaths, remove_all): - """Unstage notebook(s) for execution.""" +def unstage_nbs_uri(cache_path, nbpaths, remove_all): + """Un-stage notebook(s), by URI.""" db = JupyterCacheBase(cache_path) if remove_all: nbpaths = [record.uri for record in db.list_staged_records()] @@ -59,6 +61,22 @@ def unstage_nbs(cache_path, nbpaths, remove_all): click.secho("Success!", fg="green") +@cmnd_stage.command("remove-ids") +@arguments.PKS +@options.CACHE_PATH +@options.REMOVE_ALL +def unstage_nbs_id(cache_path, pks, remove_all): + """Un-stage notebook(s), by ID.""" + db = JupyterCacheBase(cache_path) + if remove_all: + pks = [record.pk for record in db.list_staged_records()] + for pk in pks: + # TODO deal with errors (print all at end? or option to ignore) + click.echo("Unstaging ID: {}".format(pk)) + db.discard_staged_notebook(pk) + click.secho("Success!", fg="green") + + def format_staged_record(record, cache_record, path_length, assets=True): data = { "ID": record.pk, @@ -102,7 +120,11 @@ def list_staged(cache_path, compare, path_length): def show_staged(cache_path, pk): """Show details of a staged notebook.""" db = JupyterCacheBase(cache_path) - record = db.get_staged_record(pk) + try: + record = db.get_staged_record(pk) + except KeyError: + click.secho("ID {} does not exist, Aborting!".format(pk), fg="red") + sys.exit(1) cache_record = db.get_cache_record_of_staged(record.uri) data = format_staged_record(record, cache_record, None, assets=False) click.echo(yaml.safe_dump(data, sort_keys=False), nl=False) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9207182..8070dd2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -144,7 +144,7 @@ def test_unstage_nbs(tmp_path): path = os.path.join(NB_PATH, "basic.ipynb") runner = CliRunner() result = runner.invoke(cmd_stage.stage_nbs, ["-p", tmp_path, path]) - result = runner.invoke(cmd_stage.unstage_nbs, ["-p", tmp_path, path]) + result = runner.invoke(cmd_stage.unstage_nbs_uri, ["-p", tmp_path, path]) assert result.exception is None, result.output assert result.exit_code == 0, result.output assert "basic.ipynb" in result.output.strip(), result.output From fe04f7c0a80fe7c17f73bf5aae4131f8a66eadd2 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 15:01:27 +1100 Subject: [PATCH 5/8] Add click-completion --- README.md | 13 ++++++++++--- jupyter_cache/cli/commands/__init__.py | 5 +++++ jupyter_cache/cli/commands/cmd_main.py | 4 ++-- jupyter_cache/cli/options.py | 4 ++-- setup.py | 2 +- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 068a2ab..8a38267 100644 --- a/README.md +++ b/README.md @@ -50,9 +50,10 @@ Usage: jcache [OPTIONS] COMMAND [ARGS]... The command line interface of jupyter-cache. Options: - -v, --version Show the version and exit. - -p, --cache-path Print the current cache path and exit. - -h, --help Show this message and exit. + -v, --version Show the version and exit. + -p, --cache-path Print the current cache path and exit. + -a, --autocomplete Print the terminal autocompletion command and exit. + -h, --help Show this message and exit. Commands: cache Commands for adding to and inspecting the cache. @@ -62,6 +63,12 @@ Commands: stage Commands for staging notebooks to be executed. ``` +**Important**: Execute this in the terminal for auto-completion: + +```console +eval "$(_JCACHE_COMPLETE=source jcache)" +``` + ### Caching Executed Notebooks ```console diff --git a/jupyter_cache/cli/commands/__init__.py b/jupyter_cache/cli/commands/__init__.py index 596b563..58254b6 100644 --- a/jupyter_cache/cli/commands/__init__.py +++ b/jupyter_cache/cli/commands/__init__.py @@ -1,3 +1,8 @@ +import click_completion + +# Activate the completion of parameter types provided by the click_completion package +click_completion.init() + from .cmd_cache import * # noqa: F401,F403 from .cmd_config import * # noqa: F401,F403 from .cmd_exec import * # noqa: F401,F403 diff --git a/jupyter_cache/cli/commands/cmd_main.py b/jupyter_cache/cli/commands/cmd_main.py index c73417f..1715a00 100644 --- a/jupyter_cache/cli/commands/cmd_main.py +++ b/jupyter_cache/cli/commands/cmd_main.py @@ -8,8 +8,8 @@ None, "-v", "--version", message="jupyter-cache version %(version)s" ) @options.PRINT_CACHE_PATH -# @options.AUTOCOMPLETE # doesn't allow file path autocompletion -def jcache(cache_path): +@options.AUTOCOMPLETE +def jcache(*args): """The command line interface of jupyter-cache.""" diff --git a/jupyter_cache/cli/options.py b/jupyter_cache/cli/options.py index fa685d8..5f8a6ff 100644 --- a/jupyter_cache/cli/options.py +++ b/jupyter_cache/cli/options.py @@ -4,7 +4,7 @@ def callback_autocomplete(ctx, param, value): if value and not ctx.resilient_parsing: - click.echo("Run this in the terminal for auto-completion:") + click.echo("Execute this in the terminal for auto-completion:") click.echo('eval "$(_JCACHE_COMPLETE=source jcache)"') ctx.exit() @@ -12,7 +12,7 @@ def callback_autocomplete(ctx, param, value): AUTOCOMPLETE = click.option( "-a", "--autocomplete", - help="Print the terminal autocompletion command and exit.", + help="Print the autocompletion command and exit.", is_flag=True, expose_value=True, is_eager=True, diff --git a/setup.py b/setup.py index 794c833..150851a 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ # note: nbdime could be made an extra install_requires=["attrs", "nbformat", "nbdime", "nbconvert", "sqlalchemy"], extras_require={ - "cli": ["click", "click-log", "tabulate", "pyyaml"], + "cli": ["click", "click-completion", "click-log", "tabulate", "pyyaml"], "code_style": ["flake8<3.8.0,>=3.7.0", "black", "pre-commit==1.17.0"], "testing": ["coverage", "pytest>=3.6,<4", "pytest-cov", "pytest-regressions"], }, From 254bcc7f32b16e5541b726656f533a4af990accc Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 15:25:53 +1100 Subject: [PATCH 6/8] Make imports in CLI lazy, to improve responsiveness --- jupyter_cache/cache/__init__.py | 1 - jupyter_cache/cli/commands/cmd_cache.py | 33 ++++++++++++------------ jupyter_cache/cli/commands/cmd_config.py | 4 +-- jupyter_cache/cli/commands/cmd_exec.py | 9 ++----- jupyter_cache/cli/commands/cmd_main.py | 4 +-- jupyter_cache/cli/commands/cmd_stage.py | 26 ++++++++----------- jupyter_cache/cli/utils.py | 12 ++++++--- jupyter_cache/executors/__init__.py | 2 +- tests/test_cache.py | 2 +- tests/test_cli.py | 2 +- 10 files changed, 46 insertions(+), 49 deletions(-) diff --git a/jupyter_cache/cache/__init__.py b/jupyter_cache/cache/__init__.py index 6be4ace..e69de29 100644 --- a/jupyter_cache/cache/__init__.py +++ b/jupyter_cache/cache/__init__.py @@ -1 +0,0 @@ -from .main import JupyterCacheBase, DEFAULT_CACHE_LIMIT # noqa: F401 diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index 20bb40c..c5f98d9 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -1,18 +1,10 @@ import sys import click -import tabulate -import yaml from jupyter_cache.cli.commands.cmd_main import jcache from jupyter_cache.cli import arguments, options -from jupyter_cache.cli.utils import shorten_path -from jupyter_cache.cache import JupyterCacheBase -from jupyter_cache.base import ( # noqa: F401 - CachingError, - RetrievalError, - NbValidityError, -) +from jupyter_cache.cli.utils import shorten_path, get_cache @jcache.group("cache") @@ -40,7 +32,9 @@ def format_cache_record(record, hashkeys, path_length): @options.PATH_LENGTH def list_caches(cache_path, hashkeys, path_length): """List cached notebook records in the cache.""" - db = JupyterCacheBase(cache_path) + import tabulate + + db = get_cache(cache_path) records = db.list_cache_records() if not records: click.secho("No Cached Notebooks", fg="blue") @@ -61,7 +55,9 @@ def list_caches(cache_path, hashkeys, path_length): @arguments.PK def show_cache(cache_path, pk): """Show details of a cached notebook in the cache.""" - db = JupyterCacheBase(cache_path) + import yaml + + db = get_cache(cache_path) try: record = db.get_cache_record(pk) except KeyError: @@ -88,7 +84,7 @@ def show_cache(cache_path, pk): @arguments.ARTIFACT_RPATH def cat_artifact(cache_path, pk, artifact_rpath): """Print the contents of a cached artefact.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) with db.cache_artefacts_temppath(pk) as path: artifact_path = path.joinpath(artifact_rpath) if not artifact_path.exists(): @@ -102,6 +98,9 @@ def cat_artifact(cache_path, pk, artifact_rpath): def cache_file(db, nbpath, validate, overwrite, artifact_paths=()): + + from jupyter_cache.base import NbValidityError + click.echo("Caching: {}".format(nbpath)) try: db.cache_notebook_file( @@ -140,7 +139,7 @@ def cache_file(db, nbpath, validate, overwrite, artifact_paths=()): @options.OVERWRITE_CACHED def cache_nb(cache_path, artifact_paths, nbpath, validate, overwrite): """Cache a notebook that has already been executed.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) success = cache_file(db, nbpath, validate, overwrite, artifact_paths) if success: click.secho("Success!", fg="green") @@ -153,7 +152,7 @@ def cache_nb(cache_path, artifact_paths, nbpath, validate, overwrite): @options.OVERWRITE_CACHED def cache_nbs(cache_path, nbpaths, validate, overwrite): """Cache notebook(s) that have already been executed.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) success = True for nbpath in nbpaths: # TODO deal with errors (print all at end? or option to ignore) @@ -169,7 +168,9 @@ def cache_nbs(cache_path, nbpaths, validate, overwrite): @options.REMOVE_ALL def remove_caches(cache_path, pks, remove_all): """Remove notebooks stored in the cache.""" - db = JupyterCacheBase(cache_path) + from jupyter_cache.base import CachingError + + db = get_cache(cache_path) if remove_all: pks = [r.pk for r in db.list_cache_records()] for pk in pks: @@ -191,6 +192,6 @@ def remove_caches(cache_path, pks, remove_all): @options.CACHE_PATH def diff_nb(cache_path, pk, nbpath): """Print a diff of a notebook to one stored in the cache.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) click.echo(db.diff_nbfile_with_cache(pk, nbpath, as_str=True)) click.secho("Success!", fg="green") diff --git a/jupyter_cache/cli/commands/cmd_config.py b/jupyter_cache/cli/commands/cmd_config.py index defa0cb..b5b1bba 100644 --- a/jupyter_cache/cli/commands/cmd_config.py +++ b/jupyter_cache/cli/commands/cmd_config.py @@ -1,8 +1,8 @@ import click -from jupyter_cache.cache import JupyterCacheBase from jupyter_cache.cli.commands.cmd_main import jcache from jupyter_cache.cli import options +from jupyter_cache.cli.utils import get_cache @jcache.group("config") @@ -16,6 +16,6 @@ def cmnd_config(): @click.argument("limit", metavar="CACHE_LIMIT", type=int) def change_cache_limit(cache_path, limit): """Change the maximum number of notebooks stored in the cache.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) db.change_cache_limit(limit) click.secho("Cache limit changed!", fg="green") diff --git a/jupyter_cache/cli/commands/cmd_exec.py b/jupyter_cache/cli/commands/cmd_exec.py index a7b8c20..5535991 100644 --- a/jupyter_cache/cli/commands/cmd_exec.py +++ b/jupyter_cache/cli/commands/cmd_exec.py @@ -5,12 +5,7 @@ from jupyter_cache.cli.commands.cmd_main import jcache from jupyter_cache.cli import options -from jupyter_cache.cache import JupyterCacheBase -from jupyter_cache.base import ( # noqa: F401 - CachingError, - RetrievalError, - NbValidityError, -) +from jupyter_cache.cli.utils import get_cache logger = logging.getLogger(__name__) click_log.basic_config(logger) @@ -24,7 +19,7 @@ def execute_nbs(cache_path, entry_point): """Execute staged notebooks that are outdated.""" from jupyter_cache.executors import load_executor - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) try: executor = load_executor("basic", db, logger=logger) except ImportError as error: diff --git a/jupyter_cache/cli/commands/cmd_main.py b/jupyter_cache/cli/commands/cmd_main.py index 1715a00..8703cfb 100644 --- a/jupyter_cache/cli/commands/cmd_main.py +++ b/jupyter_cache/cli/commands/cmd_main.py @@ -9,7 +9,7 @@ ) @options.PRINT_CACHE_PATH @options.AUTOCOMPLETE -def jcache(*args): +def jcache(*args, **kwargs): """The command line interface of jupyter-cache.""" @@ -17,7 +17,7 @@ def jcache(*args): @options.CACHE_PATH def clear_cache(cache_path): """Clear the cache completely.""" - from jupyter_cache.cache import JupyterCacheBase + from jupyter_cache.cache.main import JupyterCacheBase db = JupyterCacheBase(cache_path) click.confirm("Are you sure you want to permanently clear the cache!?", abort=True) diff --git a/jupyter_cache/cli/commands/cmd_stage.py b/jupyter_cache/cli/commands/cmd_stage.py index c7e5519..2f6e34a 100644 --- a/jupyter_cache/cli/commands/cmd_stage.py +++ b/jupyter_cache/cli/commands/cmd_stage.py @@ -1,18 +1,10 @@ import sys import click -import tabulate -import yaml from jupyter_cache.cli.commands.cmd_main import jcache from jupyter_cache.cli import arguments, options -from jupyter_cache.cli.utils import shorten_path -from jupyter_cache.cache import JupyterCacheBase -from jupyter_cache.base import ( # noqa: F401 - CachingError, - RetrievalError, - NbValidityError, -) +from jupyter_cache.cli.utils import shorten_path, get_cache @jcache.group("stage") @@ -26,7 +18,7 @@ def cmnd_stage(): @options.CACHE_PATH def stage_nbs(cache_path, nbpaths): """Stage notebook(s) for execution.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) for path in nbpaths: # TODO deal with errors (print all at end? or option to ignore) click.echo("Staging: {}".format(path)) @@ -40,7 +32,7 @@ def stage_nbs(cache_path, nbpaths): @options.CACHE_PATH def stage_nb(cache_path, nbpath, asset_paths): """Stage a notebook, with possible assets.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) db.stage_notebook_file(nbpath, asset_paths) click.secho("Success!", fg="green") @@ -51,7 +43,7 @@ def stage_nb(cache_path, nbpath, asset_paths): @options.REMOVE_ALL def unstage_nbs_uri(cache_path, nbpaths, remove_all): """Un-stage notebook(s), by URI.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) if remove_all: nbpaths = [record.uri for record in db.list_staged_records()] for path in nbpaths: @@ -67,7 +59,7 @@ def unstage_nbs_uri(cache_path, nbpaths, remove_all): @options.REMOVE_ALL def unstage_nbs_id(cache_path, pks, remove_all): """Un-stage notebook(s), by ID.""" - db = JupyterCacheBase(cache_path) + db = get_cache(cache_path) if remove_all: pks = [record.pk for record in db.list_staged_records()] for pk in pks: @@ -101,7 +93,9 @@ def format_staged_record(record, cache_record, path_length, assets=True): @options.PATH_LENGTH def list_staged(cache_path, compare, path_length): """List notebooks staged for possible execution.""" - db = JupyterCacheBase(cache_path) + import tabulate + + db = get_cache(cache_path) records = db.list_staged_records() if not records: click.secho("No Staged Notebooks", fg="blue") @@ -119,7 +113,9 @@ def list_staged(cache_path, compare, path_length): @arguments.PK def show_staged(cache_path, pk): """Show details of a staged notebook.""" - db = JupyterCacheBase(cache_path) + import yaml + + db = get_cache(cache_path) try: record = db.get_staged_record(pk) except KeyError: diff --git a/jupyter_cache/cli/utils.py b/jupyter_cache/cli/utils.py index 9cedb54..d036797 100644 --- a/jupyter_cache/cli/utils.py +++ b/jupyter_cache/cli/utils.py @@ -1,10 +1,16 @@ -from pathlib import Path - - def shorten_path(file_path, length): """Split the path into separate parts, select the last 'length' elements and join them again """ + from pathlib import Path + if length is None: return Path(file_path) return Path(*Path(file_path).parts[-length:]) + + +def get_cache(path): + # load lazily, to improve CLI speed + from jupyter_cache.cache.main import JupyterCacheBase + + return JupyterCacheBase(path) diff --git a/jupyter_cache/executors/__init__.py b/jupyter_cache/executors/__init__.py index b184694..5bbf1ce 100644 --- a/jupyter_cache/executors/__init__.py +++ b/jupyter_cache/executors/__init__.py @@ -1 +1 @@ -from .base import JupyterCacheAbstract, load_executor # noqa: F401 +from .base import JupyterExecutorAbstract, load_executor # noqa: F401 diff --git a/tests/test_cache.py b/tests/test_cache.py index 88d7a4f..f725547 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -4,7 +4,7 @@ import nbformat as nbf import pytest -from jupyter_cache.cache import JupyterCacheBase +from jupyter_cache.cache.main import JupyterCacheBase from jupyter_cache.base import NbValidityError diff --git a/tests/test_cli.py b/tests/test_cli.py index 8070dd2..c970382 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,7 +2,7 @@ from click.testing import CliRunner -from jupyter_cache.cache import JupyterCacheBase +from jupyter_cache.cache.main import JupyterCacheBase from jupyter_cache.cli.commands import cmd_main, cmd_cache, cmd_stage NB_PATH = os.path.join(os.path.realpath(os.path.dirname(__file__)), "notebooks") From 7ced49549aa152bb9411888ce4e983c98e0fc739 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 15:35:25 +1100 Subject: [PATCH 7/8] Minor command description update --- jupyter_cache/cli/commands/cmd_cache.py | 2 +- jupyter_cache/cli/commands/cmd_stage.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index c5f98d9..1724040 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -138,7 +138,7 @@ def cache_file(db, nbpath, validate, overwrite, artifact_paths=()): @options.VALIDATE_NB @options.OVERWRITE_CACHED def cache_nb(cache_path, artifact_paths, nbpath, validate, overwrite): - """Cache a notebook that has already been executed.""" + """Cache a notebook, with possible artefact files.""" db = get_cache(cache_path) success = cache_file(db, nbpath, validate, overwrite, artifact_paths) if success: diff --git a/jupyter_cache/cli/commands/cmd_stage.py b/jupyter_cache/cli/commands/cmd_stage.py index 2f6e34a..e2420a9 100644 --- a/jupyter_cache/cli/commands/cmd_stage.py +++ b/jupyter_cache/cli/commands/cmd_stage.py @@ -31,7 +31,7 @@ def stage_nbs(cache_path, nbpaths): @options.NB_PATH @options.CACHE_PATH def stage_nb(cache_path, nbpath, asset_paths): - """Stage a notebook, with possible assets.""" + """Stage a notebook, with possible asset files.""" db = get_cache(cache_path) db.stage_notebook_file(nbpath, asset_paths) click.secho("Success!", fg="green") From 700a94aac6dc406dca4294bdc64e17221a0765da Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Sat, 29 Feb 2020 15:36:52 +1100 Subject: [PATCH 8/8] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8a38267..85030fd 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Options: Commands: add-many Cache notebook(s) that have already been executed. - add-one Cache a notebook that has already been executed. + add-one Cache a notebook, with possible artefact files. cat-artifact Print the contents of a cached artefact. diff-nb Print a diff of a notebook to one stored in the cache. list List cached notebook records in the cache. @@ -212,7 +212,7 @@ Options: Commands: add-many Stage notebook(s) for execution. - add-one Stage a notebook, with possible assets. + add-one Stage a notebook, with possible asset files. list List notebooks staged for possible execution. remove-ids Un-stage notebook(s), by ID. remove-uris Un-stage notebook(s), by URI.