Skip to content

Commit

Permalink
feat: data export, needs more polish & testing
Browse files Browse the repository at this point in the history
  • Loading branch information
makkus committed Feb 6, 2024
1 parent 1312a5f commit b68b6db
Show file tree
Hide file tree
Showing 95 changed files with 3,806 additions and 1,230 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff==0.0.257
pip install ruff>=0.1.8
# Include `--format=github` to enable automatic inline annotations.
- name: Run Ruff
run: ruff --format=github src/
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,4 @@ environment.yaml
.tzapinclude
.pixi
dev.py
store.sqlite
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ repos:

- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: 'v0.0.257'
rev: 'v0.1.8'
hooks:
- id: ruff

Expand Down
26 changes: 26 additions & 0 deletions docs/development/stores.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# *kiara* stores

This page contains some information about how *kiara* stores work.

Practically, there are two types of stores in *kiara*:

- *archives*: stores that can only be read from, but not written to
- *stores*: atual 'stores', those are read as well as write

*kiara* has different store types, depending on what exactly is stored:

- *data stores*: stores that store actual data, those are the most important ones
- *alias stores*: stores that keep human readable references (aliases), and link them to actual data (using their value_id)
- *job stores*: stores details and records about past jobs that were run in a *kiara* instance

## Base store

All archives & stores inherit from the base class 'kiara.registries.BaseArchive', which manages basic attributes like thie stores id, it's configuration, and it holds a reference to the current kiara context.

As a developer, you probably won't be using this directly, but you will inherit from either a higher level abstract base class, in case of data-stores that would be:

- `kiara.registries.data.DataArchive`
- `kiara.registries.data.DataStore`

Depending on what you want to store, it's a good idea to check out the source code of those base classes, and look up which methods you need to implement.
Also, you can check out the default implementation of such a store/archive ('filesystem'-based in all cases), to get an idea what needs to happen in each of those methods.
20 changes: 15 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ dependencies = [
"humanfriendly>=10.0",
"importlib-metadata>=3.0.0.0,<8.0.0",
"jinja2>=3.0.1",
"lz4>=4.3.0",
"patool>=1.12",
"mistune>=3.0.0",
"mmhash3>=3.0.1",
Expand All @@ -92,6 +93,7 @@ dependencies = [
"stevedore>=5.0.0,<6.0.0",
"structlog>=21.5.0",
"tzlocal>=2.1,<6.0",
"zstandard>=0.22.0"
]
dynamic = ["version"]

Expand All @@ -106,10 +108,16 @@ filesystem_job_archive = "kiara.registries.jobs.job_store.filesystem_store:FileS
filesystem_job_store = "kiara.registries.jobs.job_store.filesystem_store:FileSystemJobStore"
filesystem_alias_archive = "kiara.registries.aliases.archives:FileSystemAliasArchive"
filesystem_alias_store = "kiara.registries.aliases.archives:FileSystemAliasStore"
filesystem_destiny_archive = "kiara.registries.destinies.filesystem_store:FileSystemDestinyArchive"
filesystem_destiny_store = "kiara.registries.destinies.filesystem_store:FileSystemDestinyStore"
filesystem_workflow_archive = "kiara.registries.workflows.archives:FileSystemWorkflowArchive"
filesystem_workflow_store = "kiara.registries.workflows.archives:FileSystemWorkflowStore"
sqlite_data_archive = "kiara.registries.data.data_store.sqlite_store:SqliteDataArchive"
sqlite_data_store = "kiara.registries.data.data_store.sqlite_store:SqliteDataStore"
sqlite_alias_archive = "kiara.registries.aliases.sqlite_store:SqliteAliasArchive"
sqlite_alias_store = "kiara.registries.aliases.sqlite_store:SqliteAliasStore"
sqlite_job_archive = "kiara.registries.jobs.job_store.sqlite_store:SqliteJobArchive"
sqlite_job_store = "kiara.registries.jobs.job_store.sqlite_store:SqliteJobStore"
sqlite_workflow_archive = "kiara.registries.workflows.sqlite_store:SqliteWorkflowArchive"
sqlite_workflow_store = "kiara.registries.workflows.sqlite_store:SqliteWorkflowStore"

[project.entry-points."kiara.cli_subcommands"]

Expand Down Expand Up @@ -262,7 +270,7 @@ dev_utils = [
"pytest>=6.2.2",
"pytest-xdist>=3.2.1",
"setup-cfg-fmt>=1.16.0",
"ruff>=0.0.272",
"ruff>=0.1.8",
"types-Deprecated",
"types-PyYAML",
"types-pkg-resources",
Expand Down Expand Up @@ -385,7 +393,7 @@ select = [
"PIE",
]
#select = ["E", "F", "RUF100", "W", "I001"]
ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901"]
ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901", "S603"]

fix = true
fixable = ["E", "F", "RUF100", "I001", "Q", "D"]
Expand Down Expand Up @@ -488,6 +496,7 @@ module = [
"importlib_metadata.*",
"importlib_resources.*",
"jupytext",
"lz4.*",
"mmh3",
"pickle5",
"pp",
Expand All @@ -509,6 +518,7 @@ module = [
"streamlit.*",
"textual.*",
"uvloop",
"uvicorn"
"uvicorn",
"zstandard"
]
ignore_missing_imports = true
2 changes: 1 addition & 1 deletion src/kiara/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def get_version() -> str:
else:
__version__ = "unknown"

except (Exception):
except Exception:
__version__ = "unknown"

if __version__ is None:
Expand Down
74 changes: 57 additions & 17 deletions src/kiara/context/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from kiara.registries import KiaraArchive
from kiara.registries.aliases import AliasRegistry
from kiara.registries.data import DataRegistry
from kiara.registries.destinies.registry import DestinyRegistry
from kiara.registries.environment import EnvironmentRegistry
from kiara.registries.events.metadata import CreateMetadataDestinies
from kiara.registries.events.registry import EventRegistry
Expand All @@ -45,9 +44,10 @@
from kiara.registries.rendering import RenderRegistry
from kiara.registries.types import TypeRegistry
from kiara.registries.workflows import WorkflowRegistry
from kiara.utils import log_exception
from kiara.utils import log_exception, log_message
from kiara.utils.class_loading import find_all_archive_types
from kiara.utils.operations import filter_operations
from kiara.utils.stores import check_external_archive

# Copyright (c) 2021, University of Luxembourg / DHARPA project
# Copyright (c) 2021, Markus Binsteiner
Expand Down Expand Up @@ -77,7 +77,6 @@ def explain(item: Any, kiara: Union[None, "Kiara"] = None):


class Kiara(object):

"""
The core context of a kiara session.
Expand Down Expand Up @@ -140,7 +139,7 @@ def __init__(
self._kiara_model_registry: ModelRegistry = ModelRegistry.instance()

self._alias_registry: AliasRegistry = AliasRegistry(kiara=self)
self._destiny_registry: DestinyRegistry = DestinyRegistry(kiara=self)
# self._destiny_registry: DestinyRegistry = DestinyRegistry(kiara=self)

self._workflow_registry: WorkflowRegistry = WorkflowRegistry(kiara=self)

Expand All @@ -160,31 +159,37 @@ def __init__(
self._archives: Dict[str, KiaraArchive] = {}

for archive_alias, archive in self._config.archives.items():

# this is just to make old context that still had that not error out
if "_destiny_" in archive.archive_type:
continue

archive_cls = self._archive_types.get(archive.archive_type, None)

if archive_cls is None:
raise Exception(
f"Can't create context: no archive type '{archive.archive_type}' available. Available types: {', '.join(self._archive_types.keys())}"
)

config_cls = archive_cls._config_cls
archive_config = config_cls(**archive.config)
archive_obj = archive_cls(archive_id=archive.archive_uuid, config=archive_config) # type: ignore
archive_obj = archive_cls(archive_alias=archive_alias, archive_config=archive_config) # type: ignore
for supported_type in archive_obj.supported_item_types():
if supported_type == "data":
self.data_registry.register_data_archive(
archive_obj, alias=archive_alias # type: ignore
archive_obj, # type: ignore
)
if supported_type == "job_record":
self.job_registry.register_job_archive(archive_obj, alias=archive_alias) # type: ignore
self.job_registry.register_job_archive(archive_obj) # type: ignore

if supported_type == "alias":
self.alias_registry.register_archive(archive_obj, alias=archive_alias) # type: ignore
self.alias_registry.register_archive(archive_obj) # type: ignore

if supported_type == "destiny":
self.destiny_registry.register_destiny_archive(archive_obj, alias=archive_alias) # type: ignore
# if supported_type == "destiny":
# self.destiny_registry.register_destiny_archive(archive_obj) # type: ignore

if supported_type == "workflow":
self.workflow_registry.register_archive(archive_obj, alias=archive_alias) # type: ignore
self.workflow_registry.register_archive(archive_obj) # type: ignore

if self._runtime_config.lock_context:
self.lock_context()
Expand Down Expand Up @@ -258,9 +263,9 @@ def kiara_model_registry(self) -> ModelRegistry:
def alias_registry(self) -> AliasRegistry:
return self._alias_registry

@property
def destiny_registry(self) -> DestinyRegistry:
return self._destiny_registry
# @property
# def destiny_registry(self) -> DestinyRegistry:
# return self._destiny_registry

@property
def job_registry(self) -> JobRegistry:
Expand Down Expand Up @@ -313,6 +318,41 @@ def module_type_names(self) -> Iterable[str]:
# ===================================================================================================
# kiara session API methods

def register_external_archive(
self,
archive: Union[str, KiaraArchive, Iterable[Union[KiaraArchive, str]]],
allow_write_access: bool = False,
) -> Dict[str, str]:
"""Register one or several external archives with the context.
In case you provide KiaraArchive instances, they will be modified in case the provided 'allow_write_access' is different from the 'is_force_read_only' attribute of the archive.
"""

archive_instances = check_external_archive(
archive=archive, allow_write_access=allow_write_access
)

result = {}
for archive_type, _archive_inst in archive_instances.items():
log_message(
"register.external.archive",
archive=_archive_inst.archive_alias,
allow_write_access=allow_write_access,
)

_archive_inst.set_force_read_only(not allow_write_access)

if archive_type == "data":
result["data"] = self.data_registry.register_data_archive(_archive_inst) # type: ignore
if archive_type == "alias":
result["alias"] = self.alias_registry.register_archive(_archive_inst) # type: ignore
if archive_type == "job_record":
result["job_record"] = self.job_registry.register_job_archive(_archive_inst) # type: ignore
else:
raise Exception(f"Can't register archive of type '{archive_type}'.")

return result

def create_manifest(
self, module_or_operation: str, config: Union[Mapping[str, Any], None] = None
) -> Manifest:
Expand Down Expand Up @@ -433,8 +473,8 @@ def get_all_archives(self) -> Dict[KiaraArchive, Set[str]]:
result.setdefault(archive, set()).add(alias)
for alias, archive in self.alias_registry.alias_archives.items():
result.setdefault(archive, set()).add(alias)
for alias, archive in self.destiny_registry.destiny_archives.items():
result.setdefault(archive, set()).add(alias)
# for alias, archive in self.destiny_registry.destiny_archives.items():
# result.setdefault(archive, set()).add(alias)
for alias, archive in self.job_registry.job_archives.items():
result.setdefault(archive, set()).add(alias)
for alias, archive in self.workflow_registry.workflow_archives.items():
Expand Down Expand Up @@ -513,7 +553,7 @@ def _retrieve_data_to_hash(self) -> Any:

def get_info(self, item_type: str, item_id: str) -> ItemInfo:

if item_type == "data_type" or item_type == "data_types":
if item_type in ("data_type", "data_types"):
group_info: InfoItemGroup = self.data_types
elif "module" in item_type:
group_info = self.module_types
Expand Down
Loading

0 comments on commit b68b6db

Please sign in to comment.