From b68b6db553a5dcbc6ff904129f030c80eaf73998 Mon Sep 17 00:00:00 2001 From: Markus Binsteiner Date: Tue, 6 Feb 2024 12:50:19 +0100 Subject: [PATCH] feat: data export, needs more polish & testing --- .github/workflows/build-linux.yaml | 2 +- .gitignore | 1 + .pre-commit-config.yaml | 2 +- docs/development/stores.md | 26 + pyproject.toml | 20 +- src/kiara/__init__.py | 2 +- src/kiara/context/__init__.py | 74 +- src/kiara/context/config.py | 439 ++++++++--- src/kiara/context/runtime_config.py | 2 +- src/kiara/data_types/__init__.py | 2 - .../included_core_types/__init__.py | 6 - .../included_core_types/filesystem.py | 7 +- .../included_core_types/internal/__init__.py | 4 - .../internal/render_value.py | 2 - .../included_core_types/metadata.py | 1 - .../included_core_types/serialization.py | 1 - src/kiara/defaults.py | 6 +- src/kiara/doc/__init__.py | 1 - src/kiara/doc/mkdocs_macros_kiara.py | 2 +- src/kiara/doc/mkdocstrings/handler.py | 1 - src/kiara/exceptions.py | 2 +- src/kiara/interfaces/__init__.py | 14 +- src/kiara/interfaces/cli/context/commands.py | 16 +- src/kiara/interfaces/cli/data/commands.py | 220 +++++- src/kiara/interfaces/python_api/__init__.py | 132 +++- src/kiara/interfaces/python_api/models/doc.py | 3 - .../interfaces/python_api/models/info.py | 139 +++- .../interfaces/python_api/models/workflow.py | 1 - src/kiara/interfaces/python_api/workflow.py | 2 - src/kiara/models/__init__.py | 1 - src/kiara/models/aliases/__init__.py | 1 - src/kiara/models/archives.py | 10 +- src/kiara/models/documentation.py | 5 - src/kiara/models/events/alias_registry.py | 5 +- src/kiara/models/filesystem.py | 10 +- src/kiara/models/module/__init__.py | 1 - src/kiara/models/module/destiny.py | 15 +- src/kiara/models/module/jobs.py | 29 +- src/kiara/models/module/manifest.py | 1 - src/kiara/models/module/persistence.py | 1 - src/kiara/models/module/pipeline/__init__.py | 3 - .../models/module/pipeline/controller.py | 1 - src/kiara/models/module/pipeline/pipeline.py | 1 - src/kiara/models/module/pipeline/structure.py | 5 +- .../models/module/pipeline/value_refs.py | 6 - src/kiara/models/python_class.py | 1 - src/kiara/models/rendering/__init__.py | 1 - .../runtime_environment/operating_system.py | 1 - src/kiara/models/values/matchers.py | 1 - src/kiara/models/values/value.py | 90 +-- .../included_metadata_types/__init__.py | 3 - src/kiara/models/values/value_schema.py | 1 - src/kiara/models/workflow.py | 2 +- src/kiara/modules/__init__.py | 2 - .../included_core_modules/filesystem.py | 38 +- .../modules/included_core_modules/metadata.py | 1 - .../modules/included_core_modules/pipeline.py | 1 - .../included_core_modules/render_value.py | 1 - .../included_core_modules/serialization.py | 4 +- .../included_core_operations/metadata.py | 4 +- .../included_core_operations/pretty_print.py | 1 - .../included_core_operations/render_data.py | 2 - .../included_core_operations/render_value.py | 2 - .../included_core_operations/serialize.py | 3 +- src/kiara/registries/__init__.py | 402 +++++++++- src/kiara/registries/aliases/__init__.py | 271 +++++-- src/kiara/registries/aliases/archives.py | 46 +- src/kiara/registries/aliases/sqlite_store.py | 201 +++++ src/kiara/registries/data/__init__.py | 339 +++++++-- .../registries/data/data_store/__init__.py | 270 +++++-- .../data/data_store/filesystem_store.py | 197 +++-- .../data/data_store/sqlite_store.py | 702 ++++++++++++++++++ src/kiara/registries/destinies/__init__.py | 46 -- .../registries/destinies/filesystem_store.py | 191 ----- src/kiara/registries/destinies/registry.py | 309 -------- src/kiara/registries/events/metadata.py | 22 +- src/kiara/registries/jobs/__init__.py | 38 +- .../jobs/job_store/filesystem_store.py | 48 +- .../registries/jobs/job_store/sqlite_store.py | 234 ++++++ src/kiara/registries/rendering/__init__.py | 1 - src/kiara/registries/templates/__init__.py | 1 - src/kiara/registries/workflows/__init__.py | 33 +- src/kiara/registries/workflows/archives.py | 40 +- .../registries/workflows/sqlite_store.py | 134 ++++ .../renderers/included_renderers/pipeline.py | 2 - src/kiara/utils/class_loading.py | 11 +- src/kiara/utils/cli/__init__.py | 1 + src/kiara/utils/cli/rich_click.py | 4 +- src/kiara/utils/cli/run.py | 11 + src/kiara/utils/concurrency.py | 1 - src/kiara/utils/debug.py | 4 +- src/kiara/utils/graphs.py | 2 +- src/kiara/utils/hashfs/__init__.py | 3 - src/kiara/utils/output.py | 11 +- src/kiara/utils/stores.py | 77 ++ 95 files changed, 3806 insertions(+), 1230 deletions(-) create mode 100644 docs/development/stores.md create mode 100644 src/kiara/registries/aliases/sqlite_store.py create mode 100644 src/kiara/registries/data/data_store/sqlite_store.py delete mode 100644 src/kiara/registries/destinies/__init__.py delete mode 100644 src/kiara/registries/destinies/filesystem_store.py delete mode 100644 src/kiara/registries/destinies/registry.py create mode 100644 src/kiara/registries/jobs/job_store/sqlite_store.py create mode 100644 src/kiara/registries/workflows/sqlite_store.py create mode 100644 src/kiara/utils/stores.py diff --git a/.github/workflows/build-linux.yaml b/.github/workflows/build-linux.yaml index 04f9075e7..32dbc1816 100644 --- a/.github/workflows/build-linux.yaml +++ b/.github/workflows/build-linux.yaml @@ -131,7 +131,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install ruff==0.0.257 + pip install ruff>=0.1.8 # Include `--format=github` to enable automatic inline annotations. - name: Run Ruff run: ruff --format=github src/ diff --git a/.gitignore b/.gitignore index 97af67720..a7a92a1e9 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,4 @@ environment.yaml .tzapinclude .pixi dev.py +store.sqlite diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5d8a0ad38..df8ad4ec1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: - repo: https://github.com/charliermarsh/ruff-pre-commit # Ruff version. - rev: 'v0.0.257' + rev: 'v0.1.8' hooks: - id: ruff diff --git a/docs/development/stores.md b/docs/development/stores.md new file mode 100644 index 000000000..6580828eb --- /dev/null +++ b/docs/development/stores.md @@ -0,0 +1,26 @@ +# *kiara* stores + +This page contains some information about how *kiara* stores work. + +Practically, there are two types of stores in *kiara*: + +- *archives*: stores that can only be read from, but not written to +- *stores*: atual 'stores', those are read as well as write + +*kiara* has different store types, depending on what exactly is stored: + +- *data stores*: stores that store actual data, those are the most important ones +- *alias stores*: stores that keep human readable references (aliases), and link them to actual data (using their value_id) +- *job stores*: stores details and records about past jobs that were run in a *kiara* instance + +## Base store + +All archives & stores inherit from the base class 'kiara.registries.BaseArchive', which manages basic attributes like thie stores id, it's configuration, and it holds a reference to the current kiara context. + +As a developer, you probably won't be using this directly, but you will inherit from either a higher level abstract base class, in case of data-stores that would be: + +- `kiara.registries.data.DataArchive` +- `kiara.registries.data.DataStore` + +Depending on what you want to store, it's a good idea to check out the source code of those base classes, and look up which methods you need to implement. +Also, you can check out the default implementation of such a store/archive ('filesystem'-based in all cases), to get an idea what needs to happen in each of those methods. diff --git a/pyproject.toml b/pyproject.toml index acb428b24..ecc7ccb53 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ dependencies = [ "humanfriendly>=10.0", "importlib-metadata>=3.0.0.0,<8.0.0", "jinja2>=3.0.1", + "lz4>=4.3.0", "patool>=1.12", "mistune>=3.0.0", "mmhash3>=3.0.1", @@ -92,6 +93,7 @@ dependencies = [ "stevedore>=5.0.0,<6.0.0", "structlog>=21.5.0", "tzlocal>=2.1,<6.0", + "zstandard>=0.22.0" ] dynamic = ["version"] @@ -106,10 +108,16 @@ filesystem_job_archive = "kiara.registries.jobs.job_store.filesystem_store:FileS filesystem_job_store = "kiara.registries.jobs.job_store.filesystem_store:FileSystemJobStore" filesystem_alias_archive = "kiara.registries.aliases.archives:FileSystemAliasArchive" filesystem_alias_store = "kiara.registries.aliases.archives:FileSystemAliasStore" -filesystem_destiny_archive = "kiara.registries.destinies.filesystem_store:FileSystemDestinyArchive" -filesystem_destiny_store = "kiara.registries.destinies.filesystem_store:FileSystemDestinyStore" filesystem_workflow_archive = "kiara.registries.workflows.archives:FileSystemWorkflowArchive" filesystem_workflow_store = "kiara.registries.workflows.archives:FileSystemWorkflowStore" +sqlite_data_archive = "kiara.registries.data.data_store.sqlite_store:SqliteDataArchive" +sqlite_data_store = "kiara.registries.data.data_store.sqlite_store:SqliteDataStore" +sqlite_alias_archive = "kiara.registries.aliases.sqlite_store:SqliteAliasArchive" +sqlite_alias_store = "kiara.registries.aliases.sqlite_store:SqliteAliasStore" +sqlite_job_archive = "kiara.registries.jobs.job_store.sqlite_store:SqliteJobArchive" +sqlite_job_store = "kiara.registries.jobs.job_store.sqlite_store:SqliteJobStore" +sqlite_workflow_archive = "kiara.registries.workflows.sqlite_store:SqliteWorkflowArchive" +sqlite_workflow_store = "kiara.registries.workflows.sqlite_store:SqliteWorkflowStore" [project.entry-points."kiara.cli_subcommands"] @@ -262,7 +270,7 @@ dev_utils = [ "pytest>=6.2.2", "pytest-xdist>=3.2.1", "setup-cfg-fmt>=1.16.0", - "ruff>=0.0.272", + "ruff>=0.1.8", "types-Deprecated", "types-PyYAML", "types-pkg-resources", @@ -385,7 +393,7 @@ select = [ "PIE", ] #select = ["E", "F", "RUF100", "W", "I001"] -ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901"] +ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901", "S603"] fix = true fixable = ["E", "F", "RUF100", "I001", "Q", "D"] @@ -488,6 +496,7 @@ module = [ "importlib_metadata.*", "importlib_resources.*", "jupytext", + "lz4.*", "mmh3", "pickle5", "pp", @@ -509,6 +518,7 @@ module = [ "streamlit.*", "textual.*", "uvloop", - "uvicorn" + "uvicorn", + "zstandard" ] ignore_missing_imports = true diff --git a/src/kiara/__init__.py b/src/kiara/__init__.py index d2ad4ed8e..34ac71144 100644 --- a/src/kiara/__init__.py +++ b/src/kiara/__init__.py @@ -154,7 +154,7 @@ def get_version() -> str: else: __version__ = "unknown" - except (Exception): + except Exception: __version__ = "unknown" if __version__ is None: diff --git a/src/kiara/context/__init__.py b/src/kiara/context/__init__.py index fdf43c570..d7a4ea35f 100644 --- a/src/kiara/context/__init__.py +++ b/src/kiara/context/__init__.py @@ -33,7 +33,6 @@ from kiara.registries import KiaraArchive from kiara.registries.aliases import AliasRegistry from kiara.registries.data import DataRegistry -from kiara.registries.destinies.registry import DestinyRegistry from kiara.registries.environment import EnvironmentRegistry from kiara.registries.events.metadata import CreateMetadataDestinies from kiara.registries.events.registry import EventRegistry @@ -45,9 +44,10 @@ from kiara.registries.rendering import RenderRegistry from kiara.registries.types import TypeRegistry from kiara.registries.workflows import WorkflowRegistry -from kiara.utils import log_exception +from kiara.utils import log_exception, log_message from kiara.utils.class_loading import find_all_archive_types from kiara.utils.operations import filter_operations +from kiara.utils.stores import check_external_archive # Copyright (c) 2021, University of Luxembourg / DHARPA project # Copyright (c) 2021, Markus Binsteiner @@ -77,7 +77,6 @@ def explain(item: Any, kiara: Union[None, "Kiara"] = None): class Kiara(object): - """ The core context of a kiara session. @@ -140,7 +139,7 @@ def __init__( self._kiara_model_registry: ModelRegistry = ModelRegistry.instance() self._alias_registry: AliasRegistry = AliasRegistry(kiara=self) - self._destiny_registry: DestinyRegistry = DestinyRegistry(kiara=self) + # self._destiny_registry: DestinyRegistry = DestinyRegistry(kiara=self) self._workflow_registry: WorkflowRegistry = WorkflowRegistry(kiara=self) @@ -160,7 +159,13 @@ def __init__( self._archives: Dict[str, KiaraArchive] = {} for archive_alias, archive in self._config.archives.items(): + + # this is just to make old context that still had that not error out + if "_destiny_" in archive.archive_type: + continue + archive_cls = self._archive_types.get(archive.archive_type, None) + if archive_cls is None: raise Exception( f"Can't create context: no archive type '{archive.archive_type}' available. Available types: {', '.join(self._archive_types.keys())}" @@ -168,23 +173,23 @@ def __init__( config_cls = archive_cls._config_cls archive_config = config_cls(**archive.config) - archive_obj = archive_cls(archive_id=archive.archive_uuid, config=archive_config) # type: ignore + archive_obj = archive_cls(archive_alias=archive_alias, archive_config=archive_config) # type: ignore for supported_type in archive_obj.supported_item_types(): if supported_type == "data": self.data_registry.register_data_archive( - archive_obj, alias=archive_alias # type: ignore + archive_obj, # type: ignore ) if supported_type == "job_record": - self.job_registry.register_job_archive(archive_obj, alias=archive_alias) # type: ignore + self.job_registry.register_job_archive(archive_obj) # type: ignore if supported_type == "alias": - self.alias_registry.register_archive(archive_obj, alias=archive_alias) # type: ignore + self.alias_registry.register_archive(archive_obj) # type: ignore - if supported_type == "destiny": - self.destiny_registry.register_destiny_archive(archive_obj, alias=archive_alias) # type: ignore + # if supported_type == "destiny": + # self.destiny_registry.register_destiny_archive(archive_obj) # type: ignore if supported_type == "workflow": - self.workflow_registry.register_archive(archive_obj, alias=archive_alias) # type: ignore + self.workflow_registry.register_archive(archive_obj) # type: ignore if self._runtime_config.lock_context: self.lock_context() @@ -258,9 +263,9 @@ def kiara_model_registry(self) -> ModelRegistry: def alias_registry(self) -> AliasRegistry: return self._alias_registry - @property - def destiny_registry(self) -> DestinyRegistry: - return self._destiny_registry + # @property + # def destiny_registry(self) -> DestinyRegistry: + # return self._destiny_registry @property def job_registry(self) -> JobRegistry: @@ -313,6 +318,41 @@ def module_type_names(self) -> Iterable[str]: # =================================================================================================== # kiara session API methods + def register_external_archive( + self, + archive: Union[str, KiaraArchive, Iterable[Union[KiaraArchive, str]]], + allow_write_access: bool = False, + ) -> Dict[str, str]: + """Register one or several external archives with the context. + + In case you provide KiaraArchive instances, they will be modified in case the provided 'allow_write_access' is different from the 'is_force_read_only' attribute of the archive. + """ + + archive_instances = check_external_archive( + archive=archive, allow_write_access=allow_write_access + ) + + result = {} + for archive_type, _archive_inst in archive_instances.items(): + log_message( + "register.external.archive", + archive=_archive_inst.archive_alias, + allow_write_access=allow_write_access, + ) + + _archive_inst.set_force_read_only(not allow_write_access) + + if archive_type == "data": + result["data"] = self.data_registry.register_data_archive(_archive_inst) # type: ignore + if archive_type == "alias": + result["alias"] = self.alias_registry.register_archive(_archive_inst) # type: ignore + if archive_type == "job_record": + result["job_record"] = self.job_registry.register_job_archive(_archive_inst) # type: ignore + else: + raise Exception(f"Can't register archive of type '{archive_type}'.") + + return result + def create_manifest( self, module_or_operation: str, config: Union[Mapping[str, Any], None] = None ) -> Manifest: @@ -433,8 +473,8 @@ def get_all_archives(self) -> Dict[KiaraArchive, Set[str]]: result.setdefault(archive, set()).add(alias) for alias, archive in self.alias_registry.alias_archives.items(): result.setdefault(archive, set()).add(alias) - for alias, archive in self.destiny_registry.destiny_archives.items(): - result.setdefault(archive, set()).add(alias) + # for alias, archive in self.destiny_registry.destiny_archives.items(): + # result.setdefault(archive, set()).add(alias) for alias, archive in self.job_registry.job_archives.items(): result.setdefault(archive, set()).add(alias) for alias, archive in self.workflow_registry.workflow_archives.items(): @@ -513,7 +553,7 @@ def _retrieve_data_to_hash(self) -> Any: def get_info(self, item_type: str, item_id: str) -> ItemInfo: - if item_type == "data_type" or item_type == "data_types": + if item_type in ("data_type", "data_types"): group_info: InfoItemGroup = self.data_types elif "module" in item_type: group_info = self.module_types diff --git a/src/kiara/context/config.py b/src/kiara/context/config.py index e49e9d9ce..75d743fc0 100644 --- a/src/kiara/context/config.py +++ b/src/kiara/context/config.py @@ -8,7 +8,17 @@ import os import uuid from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Literal, + Mapping, + Type, + Union, +) import structlog from pydantic import BaseModel, ConfigDict, field_validator, model_validator @@ -26,17 +36,19 @@ KIARA_CONFIG_FILE_NAME, KIARA_MAIN_CONFIG_FILE, KIARA_MAIN_CONTEXTS_PATH, - METADATA_DESTINY_STORE_MARKER, kiara_app_dirs, ) from kiara.exceptions import KiaraException from kiara.registries.environment import EnvironmentRegistry from kiara.registries.ids import ID_REGISTRY +from kiara.utils import log_message from kiara.utils.files import get_data_from_file if TYPE_CHECKING: from kiara.context import Kiara from kiara.models.context import ContextInfo + from kiara.models.runtime_environment.kiara import KiaraTypesRuntimeEnvironment + from kiara.registries import BaseArchive, KiaraArchive logger = structlog.getLogger() @@ -59,16 +71,155 @@ def config_file_settings_source(settings: BaseSettings) -> Dict[str, Any]: class KiaraArchiveConfig(BaseModel): + """Configuration data that can be used to load an existing kiara archive.""" - archive_id: str = Field(description="The unique archive id.") + # archive_alias: str = Field(description="The unique archive id.") archive_type: str = Field(description="The archive type.") config: Mapping[str, Any] = Field( description="Archive type specific config.", default_factory=dict ) + +class KiaraArchiveReference(BaseModel): + @classmethod + def load_existing_archive( + cls, + archive_uri: str, + store_type: Union[str, None, Iterable[str]] = None, + allow_write_access: bool = False, + **kwargs: Any, + ) -> "KiaraArchiveReference": + + from kiara.utils.class_loading import find_all_archive_types + + archive_types = find_all_archive_types() + + archive_configs: List[KiaraArchiveConfig] = [] + archives: List[KiaraArchive] = [] + + archive_alias = None + + if store_type: + if isinstance(store_type, str): + archive_cls: Union[Type[KiaraArchive], None] = archive_types.get( + store_type, None + ) + if archive_cls is None: + raise Exception( + f"Can't create context: no archive type '{store_type}' available. Available types: {', '.join(archive_types.keys())}" + ) + data = archive_cls.load_archive_config( + archive_uri=archive_uri, + allow_write_access=allow_write_access, + **kwargs, + ) + archive_config = archive_cls._config_cls(**data) + archive: KiaraArchive = archive_cls(archive_config=archive_config) + wrapped_archive_config = KiaraArchiveConfig( + archive_type=store_type, config=data + ) + archive_configs.append(wrapped_archive_config) + archives.append(archive) + else: + for st in store_type: + archive_cls = archive_types.get(st, None) + if archive_cls is None: + raise Exception( + f"Can't create context: no archive type '{store_type}' available. Available types: {', '.join(archive_types.keys())}" + ) + data = archive_cls.load_archive_config( + archive_uri=archive_uri, + allow_write_access=allow_write_access, + **kwargs, + ) + archive_config = archive_cls._config_cls(**data) + archive = archive_cls( + archive_config=archive_config, archive_alias=archive_alias + ) + wrapped_archive_config = KiaraArchiveConfig( + archive_type=st, config=data + ) + archive_configs.append(wrapped_archive_config) + archives.append(archive) + else: + for archive_type, archive_cls in archive_types.items(): + data = archive_cls.load_archive_config( + archive_uri=archive_uri, + allow_write_access=allow_write_access, + **kwargs, + ) + + if data is None: + continue + archive_config = archive_cls._config_cls(**data) + archive = archive_cls( + archive_config=archive_config, archive_alias=archive_alias + ) + wrapped_archive_config = KiaraArchiveConfig( + archive_type=archive_type, config=data + ) + archive_configs.append(wrapped_archive_config) + archives.append(archive) + + if archives is None: + raise Exception( + f"Can't create context: no valid archive found at '{archive_uri}'" + ) + + result = cls( + archive_uri=archive_uri, + allow_write_access=allow_write_access, + archive_configs=archive_configs, + # archive_alias=archive_alias, + ) + result._archives = archives + return result + + archive_uri: str = Field(description="The uri that points to the archive.") + # archive_alias: str = Field( + # description="The alias that is used for the archives contained in here." + # ) + allow_write_access: bool = Field( + description="Whether to allow write access to the archives contained here.", + default=False, + ) + archive_configs: List[KiaraArchiveConfig] = Field( + description="All the archives this kiara context can use and the aliases they are registered with." + ) + _archives: Union[None, List["KiaraArchive"]] = PrivateAttr(default=None) + @property - def archive_uuid(self) -> uuid.UUID: - return uuid.UUID(self.archive_id) + def archives(self) -> List["KiaraArchive"]: + + if self._archives is not None: + return self._archives + + from kiara.utils.class_loading import find_all_archive_types + + archive_types = find_all_archive_types() + + archive_alias = None + + result = [] + for config in self.archive_configs: + if config.archive_type not in archive_types.keys(): + raise Exception( + f"Can't create context: no archive type '{config.archive_type}' available. Available types: {', '.join(archive_types.keys())}" + ) + + archive_cls = archive_types[config.archive_type] + archive_config_data = archive_cls.load_archive_config( + archive_uri=self.archive_uri, + allow_write_access=self.allow_write_access, + ) + archive_config = archive_cls._config_cls(**archive_config_data) + archive = archive_cls( + archive_config=archive_config, archive_alias=archive_alias + ) + result.append(archive) + + self._archives = result + return self._archives class KiaraContextConfig(BaseModel): @@ -95,13 +246,22 @@ def add_pipelines(self, *pipelines: str): "ignore.pipeline", reason="path does not exist", path=pipeline ) - # @property - # def db_url(self): - # return get_kiara_db_url(self.context_folder) + # def create_archive( + # self, archive_alias: str, allow_write_access: bool = False + # ) -> "KiaraArchive": + # """Create the kiara archive with the specified alias. + # + # Make sure you know what you are doing when setting 'allow_write_access' to True. + # """ # - # @property - # def data_directory(self) -> str: - # return os.path.join(self.context_folder, "data") + # store_config = self.archives[archive_alias] + # store = create_store( + # archive_id=store_config.archive_uuid, + # store_type=store_config.archive_type, + # store_config=store_config.config, + # allow_write_access=allow_write_access, + # ) + # return store class KiaraSettings(BaseSettings): @@ -118,6 +278,38 @@ class KiaraSettings(BaseSettings): KIARA_SETTINGS = KiaraSettings() +def create_default_store_config( + store_type: str, stores_base_path: str +) -> KiaraArchiveConfig: + + env_registry = EnvironmentRegistry.instance() + kiara_types: "KiaraTypesRuntimeEnvironment" = env_registry.environments["kiara_types"] # type: ignore + available_archives = kiara_types.archive_types + + assert store_type in available_archives.item_infos.keys() + + from kiara.models.archives import ArchiveTypeInfo + + archive_info: ArchiveTypeInfo = available_archives.item_infos[store_type] + cls: Type[BaseArchive] = archive_info.python_class.get_class() # type: ignore + + log_message( + "create_new_store", + stores_base_path=stores_base_path, + store_type=cls.__name__, + ) + + config = cls._config_cls.create_new_store_config(store_base_path=stores_base_path) + + # store_id: uuid.UUID = config.get_archive_id() + + data_store = KiaraArchiveConfig( + archive_type=store_type, + config=config.model_dump(), + ) + return data_store + + class KiaraConfig(BaseSettings): model_config = SettingsConfigDict( env_prefix="kiara_", extra="forbid", use_enum_values=True @@ -142,10 +334,12 @@ def create_in_folder(cls, path: Union[Path, str]) -> "KiaraConfig": return config @classmethod - def load_from_file(cls, path: Union[Path, None] = None) -> "KiaraConfig": + def load_from_file(cls, path: Union[Path, str, None] = None) -> "KiaraConfig": if path is None: path = Path(KIARA_MAIN_CONFIG_FILE) + elif isinstance(path, str): + path = Path(path) if not path.exists(): raise Exception( @@ -181,6 +375,10 @@ def load_from_file(cls, path: Union[Path, None] = None) -> "KiaraConfig": description="The name of the default context to use if none is provided.", default=DEFAULT_CONTEXT_NAME, ) + default_store_type: Literal["sqlite", "filesystem"] = Field( + description="The default store type to ues if not specified.", + default="sqlite", + ) auto_generate_contexts: bool = Field( description="Whether to auto-generate requested contexts if they don't exist yet.", default=True, @@ -309,119 +507,127 @@ def get_context_config( def _validate_context(self, context_config: KiaraContextConfig) -> bool: - env_registry = EnvironmentRegistry.instance() - from kiara.models.runtime_environment.kiara import KiaraTypesRuntimeEnvironment + changed = False - kiara_types: KiaraTypesRuntimeEnvironment = env_registry.environments["kiara_types"] # type: ignore - available_archives = kiara_types.archive_types + sqlite_base_path = os.path.join(self.stores_base_path, "sqlite_stores") + filesystem_base_path = os.path.join(self.stores_base_path, "filesystem_stores") - changed = False - if DEFAULT_DATA_STORE_MARKER not in context_config.archives.keys(): - data_store_type = "filesystem_data_store" - assert data_store_type in available_archives.item_infos.keys() - - data_store_id = ID_REGISTRY.generate(comment="default data store id") - data_archive_config = { - "archive_path": os.path.abspath( - os.path.join( - self.stores_base_path, data_store_type, str(data_store_id) - ) + def create_default_sqlite_archive_config() -> Dict[str, Any]: + + store_id = str(uuid.uuid4()) + file_name = f"{store_id}.sqlite" + archive_path = Path( + os.path.abspath(os.path.join(sqlite_base_path, file_name)) + ) + + if archive_path.exists(): + raise Exception( + f"Archive path '{archive_path.as_posix()}' already exists." ) - } - data_store = KiaraArchiveConfig( - archive_id=str(data_store_id), - archive_type=data_store_type, - config=data_archive_config, + + archive_path.parent.mkdir(exist_ok=True, parents=True) + + # Connect to the SQLite database (or create it if it doesn't exist) + import sqlite3 + + conn = sqlite3.connect(archive_path) + + # Create a cursor object + c = conn.cursor() + # Create table + c.execute( + """CREATE TABLE archive_metadata + (key text PRIMARY KEY , value text NOT NULL)""" ) - context_config.archives[DEFAULT_DATA_STORE_MARKER] = data_store + c.execute( + "INSERT INTO archive_metadata VALUES ('archive_id', ?)", (store_id,) + ) + conn.commit() + conn.close() + + return {"sqlite_db_path": archive_path.as_posix()} + + default_sqlite_config: Union[Dict[str, Any], None] = None + + if DEFAULT_DATA_STORE_MARKER not in context_config.archives.keys(): + if self.default_store_type == "sqlite": + default_sqlite_config = create_default_sqlite_archive_config() + data_store = KiaraArchiveConfig( + archive_type="sqlite_data_store", config=default_sqlite_config + ) + elif self.default_store_type == "filesystem": + data_store_type = "filesystem_data_store" + data_store = create_default_store_config( + store_type=data_store_type, + stores_base_path=os.path.join(filesystem_base_path, "data"), + ) + else: + raise Exception( + f"Can't create default data store: invalid default store type '{self.default_store_type}'." + ) + + context_config.archives[DEFAULT_DATA_STORE_MARKER] = data_store changed = True if DEFAULT_JOB_STORE_MARKER not in context_config.archives.keys(): - job_store_type = "filesystem_job_store" - assert job_store_type in available_archives.item_infos.keys() - - job_store_id = ID_REGISTRY.generate(comment="default job store id") - job_archive_config = { - "archive_path": os.path.abspath( - os.path.join( - self.stores_base_path, job_store_type, str(job_store_id) - ) + + if self.default_store_type == "sqlite": + + if default_sqlite_config is None: + default_sqlite_config = create_default_sqlite_archive_config() + + job_store = KiaraArchiveConfig( + archive_type="sqlite_job_store", config=default_sqlite_config + ) + elif self.default_store_type == "filesystem": + job_store_type = "filesystem_job_store" + job_store = create_default_store_config( + store_type=job_store_type, + stores_base_path=os.path.join(filesystem_base_path, "jobs"), + ) + else: + raise Exception( + f"Can't create default job store: invalid default store type '{self.default_store_type}'." ) - } - job_store = KiaraArchiveConfig( - archive_id=str(job_store_id), - archive_type=job_store_type, - config=job_archive_config, - ) - context_config.archives[DEFAULT_JOB_STORE_MARKER] = job_store + context_config.archives[DEFAULT_JOB_STORE_MARKER] = job_store changed = True if DEFAULT_ALIAS_STORE_MARKER not in context_config.archives.keys(): - alias_store_type = "filesystem_alias_store" - assert alias_store_type in available_archives.item_infos.keys() - alias_store_id = ID_REGISTRY.generate(comment="default alias store id") - alias_store_config = { - "archive_path": os.path.abspath( - os.path.join( - self.stores_base_path, alias_store_type, str(alias_store_id) - ) + if self.default_store_type == "sqlite": + + if default_sqlite_config is None: + default_sqlite_config = create_default_sqlite_archive_config() + + alias_store = KiaraArchiveConfig( + archive_type="sqlite_alias_store", config=default_sqlite_config + ) + elif self.default_store_type == "filesystem": + alias_store_type = "filesystem_alias_store" + alias_store = create_default_store_config( + store_type=alias_store_type, + stores_base_path=os.path.join(filesystem_base_path, "aliases"), + ) + else: + raise Exception( + f"Can't create default alias store: invalid default store type '{self.default_store_type}'." ) - } - alias_store = KiaraArchiveConfig( - archive_id=str(alias_store_id), - archive_type=alias_store_type, - config=alias_store_config, - ) - context_config.archives[DEFAULT_ALIAS_STORE_MARKER] = alias_store + context_config.archives[DEFAULT_ALIAS_STORE_MARKER] = alias_store changed = True if DEFAULT_WORKFLOW_STORE_MARKER not in context_config.archives.keys(): workflow_store_type = "filesystem_workflow_store" - assert workflow_store_type in available_archives.item_infos.keys() - workflow_store_id = ID_REGISTRY.generate( - comment="default workflow store id" - ) - workflow_store_config = { - "archive_path": os.path.abspath( - os.path.join( - self.stores_base_path, - workflow_store_type, - str(workflow_store_id), - ) - ) - } - workflow_store = KiaraArchiveConfig( - archive_id=str(workflow_store_id), - archive_type=workflow_store_type, - config=workflow_store_config, - ) - context_config.archives[DEFAULT_WORKFLOW_STORE_MARKER] = workflow_store + # workflow_store_type = "sqlite_workflow_store" - changed = True - - if METADATA_DESTINY_STORE_MARKER not in context_config.archives.keys(): - destiny_store_type = "filesystem_destiny_store" - assert destiny_store_type in available_archives.item_infos.keys() - destiny_store_id = ID_REGISTRY.generate(comment="default destiny store id") - destiny_store_config = { - "archive_path": os.path.abspath( - os.path.join( - self.stores_base_path, destiny_store_type, str(destiny_store_id) - ) - ) - } - destiny_store = KiaraArchiveConfig( - archive_id=str(destiny_store_id), - archive_type=destiny_store_type, - config=destiny_store_config, + workflow_store = create_default_store_config( + store_type=workflow_store_type, + stores_base_path=os.path.join(filesystem_base_path, "workflows"), ) - context_config.archives[METADATA_DESTINY_STORE_MARKER] = destiny_store - + context_config.archives[DEFAULT_WORKFLOW_STORE_MARKER] = workflow_store changed = True return changed @@ -545,7 +751,7 @@ def save(self, path: Union[Path, None] = None): def delete( self, context_name: Union[str, None] = None, dry_run: bool = True - ) -> "ContextInfo": + ) -> Union["ContextInfo", None]: if context_name is None: context_name = self.default_context @@ -556,20 +762,27 @@ def delete( context_config = self.get_context_config( context_name=context_name, auto_generate=False ) - kiara = Kiara(config=context_config, runtime_config=self.runtime_config) - context_summary = ContextInfo.create_from_context( - kiara=kiara, context_name=context_name - ) + context_summary = None + + try: + kiara = Kiara(config=context_config, runtime_config=self.runtime_config) + + context_summary = ContextInfo.create_from_context( + kiara=kiara, context_name=context_name + ) - if dry_run: - return context_summary + if dry_run: + return context_summary - for archive in kiara.get_all_archives().keys(): - archive.delete_archive(archive_id=archive.archive_id) + for archive in kiara.get_all_archives().keys(): + archive.delete_archive(archive_id=archive.archive_id) + except Exception as e: + log_message("delete.context.error", context_name=context_name, error=e) - if context_config._context_config_path is not None: - os.unlink(context_config._context_config_path) + if not dry_run: + if context_config._context_config_path is not None: + os.unlink(context_config._context_config_path) return context_summary diff --git a/src/kiara/context/runtime_config.py b/src/kiara/context/runtime_config.py index 28b967546..a1bd20a16 100644 --- a/src/kiara/context/runtime_config.py +++ b/src/kiara/context/runtime_config.py @@ -19,7 +19,7 @@ class KiaraRuntimeConfig(BaseSettings): job_cache: JobCacheStrategy = Field( description="Name of the strategy that determines when to re-run jobs or use cached results.", - default=JobCacheStrategy.data_hash, + default=JobCacheStrategy.no_cache, ) allow_external: bool = Field( description="Whether to allow external external pipelines.", default=True diff --git a/src/kiara/data_types/__init__.py b/src/kiara/data_types/__init__.py index 242db9727..cf41b8df9 100644 --- a/src/kiara/data_types/__init__.py +++ b/src/kiara/data_types/__init__.py @@ -70,7 +70,6 @@ class DataTypeConfig(BaseModel): - """ Base class that describes the configuration a [``DataType``][kiara.data.data_types.DataType] class accepts. @@ -137,7 +136,6 @@ def __rich_console__( class DataType(abc.ABC, Generic[TYPE_PYTHON_CLS, TYPE_CONFIG_CLS]): - """ Base class that all *kiara* data_types must inherit from. diff --git a/src/kiara/data_types/included_core_types/__init__.py b/src/kiara/data_types/included_core_types/__init__.py index aafeab2ed..9dae7539c 100644 --- a/src/kiara/data_types/included_core_types/__init__.py +++ b/src/kiara/data_types/included_core_types/__init__.py @@ -48,7 +48,6 @@ class NoneType(DataType[SpecialValue, DataTypeConfig]): - """Type indicating a 'None' value.""" _data_type_name: ClassVar[str] = "none" @@ -86,7 +85,6 @@ class AnyType( DataType[TYPE_PYTHON_CLS, TYPE_CONFIG_CLS], Generic[TYPE_PYTHON_CLS, TYPE_CONFIG_CLS], ): - """ 'Any' type, the parent type for most other types. @@ -183,7 +181,6 @@ def render_as__terminal_renderable( class BytesType(AnyType[bytes, DataTypeConfig]): - """An array of bytes.""" _data_type_name: ClassVar[str] = "bytes" @@ -237,7 +234,6 @@ class StringTypeConfig(DataTypeConfig): class StringType(AnyType[str, StringTypeConfig]): - """A string. Can be configured to only allow a list of specific strings by using the `allowed_strings` configuration option. @@ -353,7 +349,6 @@ def validate(cls, value: Any): class DictValueType(AnyType[KiaraDict, DataTypeConfig]): - """ A dictionary. @@ -528,7 +523,6 @@ def render_as__terminal_renderable( class KiaraModelValueBaseType( AnyType[KIARA_MODEL_CLS, TYPE_CONFIG_CLS], Generic[KIARA_MODEL_CLS, TYPE_CONFIG_CLS] ): - """ A value type that is used internally. diff --git a/src/kiara/data_types/included_core_types/filesystem.py b/src/kiara/data_types/included_core_types/filesystem.py index 5bdb169a8..ff1b231d4 100644 --- a/src/kiara/data_types/included_core_types/filesystem.py +++ b/src/kiara/data_types/included_core_types/filesystem.py @@ -37,7 +37,6 @@ class FileTypeConfig(DataTypeConfig): class FileValueType(KiaraModelValueBaseType[KiaraFile, FileTypeConfig]): - """A file.""" _data_type_name: ClassVar[str] = "file" @@ -188,7 +187,6 @@ def _pretty_print_as__terminal_renderable( class FileBundleValueType(AnyType[KiaraFileBundle, FileTypeConfig]): - """A bundle of files (like a folder, zip archive, etc.).""" _data_type_name: ClassVar[str] = "file_bundle" @@ -216,7 +214,10 @@ def serialize(self, data: KiaraFileBundle) -> "SerializedData": file_data[rel_path] = {"type": "file", "codec": "raw", "file": file.path} file_metadata[rel_path] = { "file_name": file.file_name, - # "import_time": file.import_time, + "size": file.size, + "mime_type": file.mime_type, + "metadata": file.metadata, + "metadata_schemas": file.metadata_schemas, } # bundle_metadata = orjson_dumps(data.metadata) diff --git a/src/kiara/data_types/included_core_types/internal/__init__.py b/src/kiara/data_types/included_core_types/internal/__init__.py index b0845fcbb..1fcdefb3c 100644 --- a/src/kiara/data_types/included_core_types/internal/__init__.py +++ b/src/kiara/data_types/included_core_types/internal/__init__.py @@ -31,7 +31,6 @@ class InternalType( DataType[TYPE_PYTHON_CLS, TYPE_CONFIG_CLS], Generic[TYPE_PYTHON_CLS, TYPE_CONFIG_CLS], ): - """'A 'marker' base data type for data types that are (mainly) used internally in kiara..""" _data_type_name: ClassVar[str] = "internal" @@ -95,7 +94,6 @@ def render_as__terminal_renderable( class TerminalRenderable(InternalType[object, DataTypeConfig]): - """ A list of renderable objects, used in the 'rich' Python library, to print to the terminal or in Jupyter. @@ -132,7 +130,6 @@ class InternalModelTypeConfig(DataTypeConfig): class InternalModelValueType(InternalType[KiaraModel, InternalModelTypeConfig]): - """ A value type that is used internally. @@ -238,7 +235,6 @@ def _pretty_print_as__terminal_renderable( class DocumentationModelValueType(InternalModelValueType): - """Documentation for an internal entity.""" _data_type_name: ClassVar[str] = "doc" diff --git a/src/kiara/data_types/included_core_types/internal/render_value.py b/src/kiara/data_types/included_core_types/internal/render_value.py index 88ad741a5..69cd20266 100644 --- a/src/kiara/data_types/included_core_types/internal/render_value.py +++ b/src/kiara/data_types/included_core_types/internal/render_value.py @@ -25,7 +25,6 @@ class RenderSceneTypeConfig(DataTypeConfig): class RenderSceneDataType(InternalType[RenderScene, RenderSceneTypeConfig]): - """A value type to contain information about how to render a value in a specific render scenario.""" _data_type_name: ClassVar[str] = "render_scene" @@ -93,7 +92,6 @@ def _pretty_print_as__terminal_renderable( class RenderValueResultDataType(InternalType[RenderValueResult, DataTypeConfig]): - """A value type to contain information about how to render a value in a specific render scenario.""" _data_type_name: ClassVar[str] = "render_value_result" diff --git a/src/kiara/data_types/included_core_types/metadata.py b/src/kiara/data_types/included_core_types/metadata.py index c65fa43e8..c3073cd9d 100644 --- a/src/kiara/data_types/included_core_types/metadata.py +++ b/src/kiara/data_types/included_core_types/metadata.py @@ -20,7 +20,6 @@ class MetadataTypeConfig(DataTypeConfig): class MetadataValueType(KiaraModelValueBaseType[Metadata, MetadataTypeConfig]): - """A file.""" _data_type_name: ClassVar[str] = "file" diff --git a/src/kiara/data_types/included_core_types/serialization.py b/src/kiara/data_types/included_core_types/serialization.py index c9c4eb936..4ec0337e1 100644 --- a/src/kiara/data_types/included_core_types/serialization.py +++ b/src/kiara/data_types/included_core_types/serialization.py @@ -14,7 +14,6 @@ class PythonObjectType(InternalType[object, DataTypeConfig]): - """ A 'plain' Python object. diff --git a/src/kiara/defaults.py b/src/kiara/defaults.py index 247955b59..e726bc9fa 100644 --- a/src/kiara/defaults.py +++ b/src/kiara/defaults.py @@ -43,6 +43,7 @@ kiara_app_dirs.user_data_dir, "context_locks" ) + KIARA_DEFAULT_STAGES_EXTRACTION_TYPE = "early" INIT_EXAMPLE_NAME = "init" @@ -79,6 +80,7 @@ "kiara", "callbacks", ] +INVALID_ALIAS_NAMES = ["kiara", "__default__", "alias", "value", "value_id"] """List of reserved names, inputs/outputs can't use those.""" DEFAULT_DATA_STORE_MARKER = "default_data_store" @@ -93,7 +95,7 @@ DEFAULT_WORKFLOW_STORE_MARKER = "default_workflow_store" """Name for the default context workflow store.""" -METADATA_DESTINY_STORE_MARKER = "metadata" +METADATA_PROPERTY_MARKER = "metadata" """Name for the default context destiny store.""" PIPELINE_PARENT_MARKER = "__pipeline__" @@ -121,6 +123,8 @@ KIARA_DEFAULT_ROOT_NODE_ID = "__self__" +KIARA_SQLITE_STORE_EXTENSION = "kiara" + VALUE_ATTR_DELIMITER = "::" VALID_VALUE_QUERY_CATEGORIES = ["data", "properties"] diff --git a/src/kiara/doc/__init__.py b/src/kiara/doc/__init__.py index 3646f3b18..3dfa10fb0 100644 --- a/src/kiara/doc/__init__.py +++ b/src/kiara/doc/__init__.py @@ -27,7 +27,6 @@ class FrklDocumentationPlugin(BasePlugin): - """ [mkdocs](https://www.mkdocs.org/) plugin to render API documentation for a project. diff --git a/src/kiara/doc/mkdocs_macros_kiara.py b/src/kiara/doc/mkdocs_macros_kiara.py index ee70450cb..fc6089b84 100644 --- a/src/kiara/doc/mkdocs_macros_kiara.py +++ b/src/kiara/doc/mkdocs_macros_kiara.py @@ -51,7 +51,7 @@ def get_src_of_object(obj: Union[str, Any]): src = inspect.getsource(_obj) return src except Exception as e: - return f"Can't render object source: {str(e)}" + return f"Can't render object source: {e}" @env.macro def get_context_info() -> KiaraContextInfo: diff --git a/src/kiara/doc/mkdocstrings/handler.py b/src/kiara/doc/mkdocstrings/handler.py index e8d2d599f..23506ceb9 100644 --- a/src/kiara/doc/mkdocstrings/handler.py +++ b/src/kiara/doc/mkdocstrings/handler.py @@ -20,7 +20,6 @@ class KiaraHandler(BaseHandler): - """ The kiara handler class. diff --git a/src/kiara/exceptions.py b/src/kiara/exceptions.py index f3c285827..0dbe6e756 100644 --- a/src/kiara/exceptions.py +++ b/src/kiara/exceptions.py @@ -89,7 +89,7 @@ def create_renderable(self, **config) -> "RenderableType": from rich.console import Group - rows: List[RenderableType] = [f"[red]Error[/red]: {str(self._msg)}"] + rows: List[RenderableType] = [f"[red]Error[/red]: {self._msg}"] root_details = self.root_details() if root_details: from rich.markdown import Markdown diff --git a/src/kiara/interfaces/__init__.py b/src/kiara/interfaces/__init__.py index 9a52bac52..b944c217f 100644 --- a/src/kiara/interfaces/__init__.py +++ b/src/kiara/interfaces/__init__.py @@ -10,7 +10,16 @@ import sys import uuid from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, Literal, Union +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + Iterable, + Iterator, + Literal, + Union, +) from kiara.defaults import KIARA_CONFIG_FILE_NAME, KIARA_MAIN_CONFIG_FILE from kiara.exceptions import KiaraException @@ -60,10 +69,9 @@ def create_console( ) class OptionHighlighter(RegexHighlighter): - """Highlights our special options.""" - highlights = [ + highlights: ClassVar = [ # type: ignore r"(^|\W)(?P\-\w+)(?![a-zA-Z0-9])", r"(^|\W)(?P