From 54ed7fa5d79f1e3eada360713c93de3d82fd10ec Mon Sep 17 00:00:00 2001 From: Markus Binsteiner Date: Wed, 14 Apr 2021 10:22:34 +0200 Subject: [PATCH] refactor: initial commit after python project template change --- .envrc.disabled | 3 + .git_archival.txt | 1 + .gitattributes | 1 + .github/workflows/build-darwin.yaml | 92 +++ .github/workflows/build-linux.yaml | 104 +++ .github/workflows/build-windows.yaml | 78 ++ .gitignore | 62 ++ .gitlab-ci.yml | 38 + .pre-commit-config.yaml | 70 ++ AUTHORS.rst | 5 + CHANGELOG.rst | 8 + LICENSE | 375 ++++++++++ MANIFEST.in | 15 + Makefile | 77 ++ README.md | 87 +++ ci/docker/Dockerfile | 10 + commitlint.config.js | 1 + docs/development/index.md | 1 + docs/index.md | 1 + docs/install.md | 35 + docs/modules/core_modules.md | 17 + docs/modules/index.md | 8 + docs/modules/pipeline_modules.md | 10 + docs/usage.md | 8 + mkdocs.yml | 58 ++ pyproject.toml | 27 + scripts/documentation/gen_schemas.py | 77 ++ setup.cfg | 228 ++++++ setup.py | 65 ++ src/kiara/__init__.py | 48 ++ src/kiara/_frkl/__init__.py | 4 + src/kiara/_frkl/_frkl.json | 11 + src/kiara/config.py | 196 +++++ src/kiara/data/__init__.py | 6 + src/kiara/data/registry.py | 441 +++++++++++ src/kiara/data/values.py | 699 ++++++++++++++++++ src/kiara/defaults.py | 37 + src/kiara/doc/__init__.py | 0 src/kiara/doc/mkdocs_macros_kiara.py | 121 +++ src/kiara/events.py | 62 ++ src/kiara/interfaces/__init__.py | 2 + src/kiara/interfaces/cli/__init__.py | 254 +++++++ src/kiara/kiara.py | 123 +++ src/kiara/metadata/__init__.py | 0 src/kiara/mgmt.py | 172 +++++ src/kiara/module.py | 375 ++++++++++ src/kiara/modules/__init__.py | 2 + src/kiara/modules/dev.py | 82 ++ src/kiara/modules/logic_gates.py | 111 +++ src/kiara/modules/pipelines/__init__.py | 37 + src/kiara/pipeline/__init__.py | 4 + src/kiara/pipeline/controller.py | 309 ++++++++ src/kiara/pipeline/module.py | 187 +++++ src/kiara/pipeline/pipeline.py | 376 ++++++++++ src/kiara/pipeline/structure.py | 619 ++++++++++++++++ src/kiara/py.typed | 0 src/kiara/resources/.gitkeep | 0 src/kiara/resources/pipelines/.gitkeep | 0 src/kiara/resources/pipelines/logic/nand.json | 24 + src/kiara/resources/pipelines/logic/nor.json | 24 + src/kiara/resources/pipelines/logic/xor.json | 31 + src/kiara/utils.py | 233 ++++++ src/kiara/workflow.py | 126 ++++ tests/__init__.py | 0 tests/conftest.py | 40 + tests/resources/pipelines/dummy/dummy_1.json | 26 + .../pipelines/dummy/dummy_1_delay.json | 30 + tests/resources/pipelines/logic/logic_1.json | 9 + tests/resources/pipelines/logic/logic_2.json | 24 + tests/resources/pipelines/logic/logic_3.json | 21 + tests/test_workflow_creation.py | 54 ++ tests/utils.py | 9 + 72 files changed, 6491 insertions(+) create mode 100644 .envrc.disabled create mode 100644 .git_archival.txt create mode 100644 .gitattributes create mode 100644 .github/workflows/build-darwin.yaml create mode 100644 .github/workflows/build-linux.yaml create mode 100644 .github/workflows/build-windows.yaml create mode 100644 .gitignore create mode 100644 .gitlab-ci.yml create mode 100644 .pre-commit-config.yaml create mode 100644 AUTHORS.rst create mode 100644 CHANGELOG.rst create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 ci/docker/Dockerfile create mode 100644 commitlint.config.js create mode 100644 docs/development/index.md create mode 100644 docs/index.md create mode 100644 docs/install.md create mode 100644 docs/modules/core_modules.md create mode 100644 docs/modules/index.md create mode 100644 docs/modules/pipeline_modules.md create mode 100644 docs/usage.md create mode 100644 mkdocs.yml create mode 100644 pyproject.toml create mode 100644 scripts/documentation/gen_schemas.py create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 src/kiara/__init__.py create mode 100644 src/kiara/_frkl/__init__.py create mode 100644 src/kiara/_frkl/_frkl.json create mode 100644 src/kiara/config.py create mode 100644 src/kiara/data/__init__.py create mode 100644 src/kiara/data/registry.py create mode 100644 src/kiara/data/values.py create mode 100644 src/kiara/defaults.py create mode 100644 src/kiara/doc/__init__.py create mode 100644 src/kiara/doc/mkdocs_macros_kiara.py create mode 100644 src/kiara/events.py create mode 100644 src/kiara/interfaces/__init__.py create mode 100644 src/kiara/interfaces/cli/__init__.py create mode 100644 src/kiara/kiara.py create mode 100644 src/kiara/metadata/__init__.py create mode 100644 src/kiara/mgmt.py create mode 100644 src/kiara/module.py create mode 100644 src/kiara/modules/__init__.py create mode 100644 src/kiara/modules/dev.py create mode 100644 src/kiara/modules/logic_gates.py create mode 100644 src/kiara/modules/pipelines/__init__.py create mode 100644 src/kiara/pipeline/__init__.py create mode 100644 src/kiara/pipeline/controller.py create mode 100644 src/kiara/pipeline/module.py create mode 100644 src/kiara/pipeline/pipeline.py create mode 100644 src/kiara/pipeline/structure.py create mode 100644 src/kiara/py.typed create mode 100644 src/kiara/resources/.gitkeep create mode 100644 src/kiara/resources/pipelines/.gitkeep create mode 100644 src/kiara/resources/pipelines/logic/nand.json create mode 100644 src/kiara/resources/pipelines/logic/nor.json create mode 100644 src/kiara/resources/pipelines/logic/xor.json create mode 100644 src/kiara/utils.py create mode 100644 src/kiara/workflow.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/resources/pipelines/dummy/dummy_1.json create mode 100644 tests/resources/pipelines/dummy/dummy_1_delay.json create mode 100644 tests/resources/pipelines/logic/logic_1.json create mode 100644 tests/resources/pipelines/logic/logic_2.json create mode 100644 tests/resources/pipelines/logic/logic_3.json create mode 100644 tests/test_workflow_creation.py create mode 100644 tests/utils.py diff --git a/.envrc.disabled b/.envrc.disabled new file mode 100644 index 000000000..362a0ca5c --- /dev/null +++ b/.envrc.disabled @@ -0,0 +1,3 @@ +layout python3 +# or, uncomment for specific 'base'-version of python: +# layout python "$HOME/.pyenv/versions/3.8.2/bin/python" diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 000000000..95cb3eea4 --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1 @@ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..00a7b00c9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.git_archival.txt export-subst diff --git a/.github/workflows/build-darwin.yaml b/.github/workflows/build-darwin.yaml new file mode 100644 index 000000000..27f9bfbe3 --- /dev/null +++ b/.github/workflows/build-darwin.yaml @@ -0,0 +1,92 @@ +name: "darwin tests & binary build for 'kiara'" +# This workflow is triggered on pushes to the repository. +on: [push] +env: + DEVELOPER_DIR: /Applications/Xcode_12.4.app/Contents/Developer + MACOSX_DEPLOYMENT_TARGET: 10.13 + + +jobs: + test-darwin: + name: pytest on darwin + runs-on: macos-10.15 + strategy: + matrix: + python_version: [3.6, 3.7, 3.8, 3.9] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with pytest + run: make test + + mypy-darwin: + name: mypy check on darwin + runs-on: macos-10.15 + strategy: + matrix: + python_version: [3.6, 3.7, 3.8, 3.9] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with mypy + run: make mypy + + flake8-darwin: + name: flake8 on darwin + runs-on: macos-10.15 + steps: + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with flake8 + run: make flake + + + + build-darwin: + name: build darwin binary + runs-on: macos-10.15 + needs: + - test-darwin + - mypy-darwin + - flake8-darwin + steps: + - uses: actions/cache@v2 + with: + path: ~/.pyenv/ + key: ${{ runner.os }}-target-darwin-10.13-kiara-python-build + - run: brew install readline xz bzip2 lbzip2 lzlib openssl zlib + - uses: actions/checkout@v2 + - name: download build script + run: wget https://gitlab.com/frkl/frkl.project/-/raw/develop/scripts/build-binary/build.sh && chmod +x build.sh + - name: download spec file + run: wget https://gitlab.com/frkl/frkl.project/-/raw/develop/scripts/build-binary/onefile.spec + - name: build_binary + run: ./build.sh --python-type pyenv +# # uncomment to sign binary (needs certificate data in env) +# - name: Add MacOS certs +# run: chmod +x ./scripts/build-binary/add-osx-cert.sh && ./scripts/build-binary/add-osx-cert.sh +# env: +# CERTIFICATE_OSX_APPLICATION: ${{ secrets.CERTIFICATE_OSX_APPLICATION }} +# CERTIFICATE_PASSWORD: ${{ secrets.CERTIFICATE_PASSWORD }} +# - name: sign binary +# run: "codesign -f -s 'Developer ID Application: Markus Binsteiner (4Q559SZWTL)' dist/darwin19/kiara" + - name: Archive Mac OS X binary + uses: actions/upload-artifact@v1 + with: + name: kiara + path: dist/darwin19/kiara diff --git a/.github/workflows/build-linux.yaml b/.github/workflows/build-linux.yaml new file mode 100644 index 000000000..e537f6b1f --- /dev/null +++ b/.github/workflows/build-linux.yaml @@ -0,0 +1,104 @@ +name: "linux tests, binary- and documentation builds for 'kiara'" +# This workflow is triggered on pushes to the repository. +on: [push] + + +jobs: + test-linux: + name: pytest on linux + runs-on: ubuntu-latest + strategy: + matrix: + python_version: [3.6, 3.7, 3.8, 3.9] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with pytest + run: make test + + mypy-linux: + name: mypy check on linux + runs-on: ubuntu-latest + strategy: + matrix: + python_version: [3.6, 3.7, 3.8, 3.9] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with mypy + run: make mypy + + flake8-linux: + name: flake8 on linux + runs-on: ubuntu-latest + steps: + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with flake8 + run: make flake + + build-docs: + name: build documentation + runs-on: ubuntu-latest + needs: + - test-linux + - mypy-linux + - flake8-linux + steps: + - name: Set up Python 3.9.1 + uses: actions/setup-python@v2 + with: + python-version: 3.9.1 + - uses: actions/checkout@v2 + - name: install requirements + run: pip install --extra-index-url https://gitlab.com/api/v4/projects/25344049/packages/pypi/simple -e '.[dev_documentation,all]' + - name: create documentation + run: make docs + - name: deploy documentation to gh-pages + uses: JamesIves/github-pages-deploy-action@4.1.0 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BRANCH: gh-pages + FOLDER: build/site + + + + build-linux: + name: build linux binary + runs-on: ubuntu-16.04 + needs: + - test-linux + - mypy-linux + - flake8-linux + steps: + - uses: actions/cache@v2 + with: + path: ~/.pyenv/ + key: ${{ runner.os }}-target-ubuntu-kiara-python-build + - uses: actions/checkout@v2 + - name: download build script + run: wget https://gitlab.com/frkl/frkl.project/-/raw/develop/scripts/build-binary/build.sh && chmod +x build.sh + - name: download spec file + run: wget https://gitlab.com/frkl/frkl.project/-/raw/develop/scripts/build-binary/onefile.spec + - name: build_binary + run: "PATH=\"$(echo $PATH | tr : '\n' | grep -v linuxbrew | paste -s -d:)\" ./build.sh --python-type pyenv" + - name: Archive Linux binary + uses: actions/upload-artifact@v1 + with: + name: kiara + path: dist/linux-gnu/kiara diff --git a/.github/workflows/build-windows.yaml b/.github/workflows/build-windows.yaml new file mode 100644 index 000000000..1396c1ddc --- /dev/null +++ b/.github/workflows/build-windows.yaml @@ -0,0 +1,78 @@ +name: "windows tests & binary build for 'kiara'" +# This workflow is triggered on pushes to the repository. +on: [push] + + +jobs: + test-windows: + name: pytest on windows + runs-on: windows-latest + strategy: + matrix: + python_version: [3.6, 3.7, 3.8, 3.9] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with pytest + run: make test + + mypy-windows: + name: mypy check on windows + runs-on: windows-latest + strategy: + matrix: + python_version: [3.6, 3.7, 3.8, 3.9] + steps: + - name: "Set up Python ${{ matrix.python_version }}" + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python_version }}" + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with mypy + run: make mypy + + flake8-windows: + name: flake8 on windows + runs-on: windows-latest + steps: + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - uses: actions/checkout@v2 + - name: install requirements + run: pip install -U -e '.[dev_testing,all]' + - name: Test with flake8 + run: make flake + + build-windows: + name: build windows binary + runs-on: windows-latest + needs: + - test-windows + - mypy-windows + - flake8-windows + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.9' # Version range or exact version of a Python version to use, using SemVer's version range syntax + architecture: 'x64' # optional x64 or x86. Defaults to x64 if not specified + - name: download build script + run: C:\msys64\usr\bin\wget.exe https://gitlab.com/frkl/frkl.project/-/raw/develop/scripts/build-binary/build.sh && chmod +x build.sh + - name: download spec file + run: C:\msys64\usr\bin\wget.exe https://gitlab.com/frkl/frkl.project/-/raw/develop/scripts/build-binary/onefile.spec + - name: build_binary + run: "bash ./build.sh --python-type system" + - name: Archive windows binary + uses: actions/upload-artifact@v1 + with: + name: kiara + path: dist/msys/kiara.exe diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..6012520e2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +# Temporary and binary files +*~ +*.py[cod] +*.so +*.cfg +!.isort.cfg +!setup.cfg +*.orig +*.log +*.pot +__pycache__/* +.cache/* +.*.swp +*/.ipynb_checkpoints/* + +# Project files +.ropeproject +.project +.pydevproject +.settings +.idea +tags + +# Package files +*.egg +*.eggs/ +.installed.cfg +*.egg-info + +# Unittest and coverage +htmlcov/* +.coverage +.coverage.* +.tox +junit.xml +coverage.xml +.pytest_cache/ + +# Build and docs folder/files +/build/* +/dist/* +sdist/* +cover/* +MANIFEST + +# Per-project virtualenvs +.venv*/ +pip-wheel-metadata/ +.python-version +src/kiara/version.txt +.direnv +public +site +.dephell_report +.direnv +.mypy_cache +.env +docs/api-documentation.md +.frkl +.envrc +build.sh +onefile.spec diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 000000000..c8e239a74 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,38 @@ +--- +variables: + GIT_STRATEGY: fetch + GIT_DEPTH: 0 + GIT_SUBMODULE_STRATEGY: recursive + LM_PYTHON_VERSION: "2" + DS_PYTHON_VERSION: "3" + PIP_EXTRA_INDEX_URL: "https://pkgs.frkl.io/frkl/dev" + +# include: +# - template: Dependency-Scanning.gitlab-ci.yml +# - template: License-Management.gitlab-ci.yml + +image: python:3.8 + +stages: + - test + - build + - build_windows + - release + +include: + - local: '/ci/gitlab/test/tox.yml' + - local: '/ci/gitlab/test/mypy.yml' + - local: '/ci/gitlab/test/flake8.yml' + - local: '/ci/gitlab/test/coverage.yml' + - local: '/ci/gitlab/test/safety.yml' + - local: '/ci/gitlab/test/commitlint.yml' + - local: '/ci/gitlab/build/docs.yml' + - local: '/ci/gitlab/build/pkg.yml' + - local: '/ci/gitlab/build/binary_linux.yml' + - local: '/ci/gitlab/build/binary_windows.yml' +# - local: '/ci/gitlab/release/binaries.yml' +# - local: '/ci/gitlab/release/container.yml' + + +pages: + extends: .kiara_build_docs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..05cd0ef0e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,70 @@ +default_language_version: + python: python3 + +repos: + +- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook + rev: 'v4.0.0' + hooks: + - id: commitlint + stages: [commit-msg] + additional_dependencies: ['@commitlint/config-conventional'] + +- repo: https://github.com/asottile/setup-cfg-fmt + rev: 'v1.16.0' + hooks: + - id: setup-cfg-fmt + +- repo: https://github.com/pycqa/isort + rev: 5.7.0 + hooks: + - id: isort + +- repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black + +- repo: https://github.com/myint/autoflake + rev: 'v1.4' + hooks: + - id: autoflake + args: ['--in-place', '--remove-all-unused-imports'] + # args: ['--in-place', '--remove-all-unused-imports', '--remove-unused-variable'] + +- repo: https://gitlab.com/pycqa/flake8 + rev: '3.8.4' # pick a git hash / tag to point to + hooks: + - id: flake8 + +- repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v0.812' # Use the sha / tag you want to point at + hooks: + - id: mypy + files: "^src/" + pass_filenames: true + args: ["--config-file", "setup.cfg"] + additional_dependencies: [pydantic>=1.8.0, rich>=10.0.0, ruamel.yaml] + + +- repo: git://github.com/pre-commit/pre-commit-hooks + rev: 'v3.4.0' + hooks: + - id: trailing-whitespace + exclude: 'setup.cfg' + - id: check-added-large-files + - id: check-ast + - id: check-json + - id: check-merge-conflict + - id: check-xml + - id: check-yaml + exclude: 'tests/\*' + - id: debug-statements + - id: end-of-file-fixer + exclude: '.*.json' + - id: requirements-txt-fixer + - id: fix-encoding-pragma + - id: mixed-line-ending + args: ['--fix=no'] + #- id: no-commit-to-branch + # args: [--branch, master] diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 000000000..abb15a5bd --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,5 @@ +============ +Contributors +============ + +* Markus Binsteiner diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 000000000..368d21427 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,8 @@ +========= +Changelog +========= + +Version 0.9 (Upcoming) +====================== + +- first release of *kiara* diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..f07237b1b --- /dev/null +++ b/LICENSE @@ -0,0 +1,375 @@ +Copyright 2021 DHARPA project + +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..54567e2d5 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,15 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.md + +include src/kiara/_frkl/_frkl.json +include src/kiara/py.typed + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.md *.jpg *.png *.gif +recursive-include src/kiara/resources * diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..da0345d9e --- /dev/null +++ b/Makefile @@ -0,0 +1,77 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help +.DEFAULT_GOAL := help + +help: + @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) + +docs: ## build documentation + mkdocs build + +serve-docs: ## serve and watch documentation + mkdocs serve + +clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts + +clean-build: ## remove build artifacts + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: ## remove Python file artifacts + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: ## remove test and coverage artifacts + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + rm -fr .pytest_cache + rm -fr .mypy_cache + +init: clean ## initialize a development environment (to be run in virtualenv) + git init + git checkout -b develop || true + pip install -U pip + pip install --extra-index-url https://gitlab.com/api/v4/projects/25344049/packages/pypi/simple -U -e '.[all_dev]' + pre-commit install + setup-cfg-fmt setup.cfg || true + git add "*" ".*" + pre-commit run --all-files || true + git add "*" ".*" + +setup-cfg-fmt: # format setup.cfg + setup-cfg-fmt setup.cfg || true + +black: ## run black + black --config pyproject.toml setup.py src/kiara tests + +flake: ## check style with flake8 + flake8 src/kiara tests + +mypy: ## run mypy + mypy src/kiara + +test: ## run tests quickly with the default Python + py.test + +test-all: ## run tests on every Python version with tox + tox + +coverage: ## check code coverage quickly with the default Python + coverage run -m pytest tests + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +check: black flake mypy test ## run dev-related checks + +pre-commit: ## run pre-commit on all files + pre-commit run --all-files + +dist: clean ## build source and wheel packages + python setup.py sdist + python setup.py bdist_wheel + ls -l dist diff --git a/README.md b/README.md new file mode 100644 index 000000000..53987ecd3 --- /dev/null +++ b/README.md @@ -0,0 +1,87 @@ +[![PyPI status](https://img.shields.io/pypi/status/kiara.svg)](https://pypi.python.org/pypi/kiara/) +[![PyPI version](https://img.shields.io/pypi/v/kiara.svg)](https://pypi.python.org/pypi/kiara/) +[![PyPI pyversions](https://img.shields.io/pypi/pyversions/kiara.svg)](https://pypi.python.org/pypi/kiara/) +[![Build Status](https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2FDHARPA-Project%2Fkiara%2Fbadge%3Fref%3Ddevelop&style=flat)](https://actions-badge.atrox.dev/DHARPA-Project/kiara/goto?ref=develop) +[![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black) + +# kiara + +*A workflow management and execution engine for the DHARPA project.* + + - Documentation: [https://dharpa.org/kiara](https://dharpa.org/kiara) + - Code: [https://github.com/DHARPA-Project/kiara](https://github.com/DHARPA-Project/kiara) + +## Description + +Documentation still to be done. + +## Downloads + +### Binaries + +Only snapshot binaries (for now): + + - [Linux](https://github.com/DHARPA-Project/kiara/actions/workflows/build-linux.yaml) + - [Windows](https://github.com/DHARPA-Project/kiara/actions/workflows/build-windows.yaml) + - [Mac OS X](https://github.com/DHARPA-Project/kiara/actions/workflows/build-darwin.yaml) + +# Development + +## Requirements + +- Python (version >=3.6 -- some make targets only work for Python >=3.7, but *kiara* itself should work on 3.6) +- pip, virtualenv +- git +- make +- [direnv](https://direnv.net/) (optional) + + +## Prepare development environment + +```console +git clone https://github.com/DHARPA-Project/kiara.git +cd kiara +python3 -m venv .venv +source .venv/bin/activate +make init +``` + +If you use [direnv](https://direnv.net/), you can alternatively do: + +*Note*: you might want to adjust the Python version in ``.envrc`` (should not be necessary in most cases though) + +``` console +git clone https://github.com/DHARPA-Project/kiara.git +cd kiara +cp .envrc.disabled .envrc +direnv allow # if using direnv, otherwise activate virtualenv +make init +``` + +## ``make`` targets + +- ``init``: init development project (install project & dev dependencies into virtualenv, as well as pre-commit git hook) +- ``flake``: run *flake8* tests +- ``mypy``: run mypy tests +- ``test``: run unit tests +- ``docs``: create static documentation pages +- ``serve-docs``: serve documentation pages (incl. auto-reload) +- ``clean``: clean build directories + +For details (and other, minor targets), check the ``Makefile``. + + +## Running tests + +``` console +> make test +# or +> make coverage +``` + + +## Copyright & license + +This project is MPL v2.0 licensed, for the license text please check the [LICENSE](/LICENSE) file in this repository. + +[Copyright (c) 2021 DHARPA project](https://dharpa.org) diff --git a/ci/docker/Dockerfile b/ci/docker/Dockerfile new file mode 100644 index 000000000..7c115a388 --- /dev/null +++ b/ci/docker/Dockerfile @@ -0,0 +1,10 @@ +FROM debian:stable + +RUN \ + apt update -y && \ + apt install -y wget git + +COPY kiara /usr/bin/kiara + +RUN \ + chmod +x /usr/bin/kiara diff --git a/commitlint.config.js b/commitlint.config.js new file mode 100644 index 000000000..28fe5c5bf --- /dev/null +++ b/commitlint.config.js @@ -0,0 +1 @@ +module.exports = {extends: ['@commitlint/config-conventional']} diff --git a/docs/development/index.md b/docs/development/index.md new file mode 100644 index 000000000..459110d34 --- /dev/null +++ b/docs/development/index.md @@ -0,0 +1 @@ +# Development diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 000000000..f8898b55a --- /dev/null +++ b/docs/index.md @@ -0,0 +1 @@ +--8<-- "../README.md" diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 000000000..5859e27e1 --- /dev/null +++ b/docs/install.md @@ -0,0 +1,35 @@ +# Installation + +There are three ways to install *kiara* on your machine. Via a manual binary download, an install script, or installation of the python package. + +## Binaries + +To install the `kiara` binary, download the appropriate binary from one of the links below, and set the downloaded file to be executable (``chmod +x kiara``): + +Only snapshot binaries for now, not for production use: + + - [Linux](https://github.com/DHARPA-Project/kiara/actions/workflows/build-linux.yaml) + - [Windows](https://github.com/DHARPA-Project/kiara/actions/workflows/build-windows.yaml) + - [Mac OS X](https://github.com/DHARPA-Project/kiara/actions/workflows/build-darwin.yaml) + + +## Python package + +*Note*: this does not work yet! + +The python package is currently not available on [pypi](https://pypi.org), so you need to specify the ``--extra-url`` parameter for your pip command. If you chooose this install method, I assume you know how to install Python packages manually, which is why I only show you an example way of getting *kiara* onto your machine: + +``` console +> python3 -m venv ~/.venvs/kiara +> source ~/.venvs/kiara/bin/activate +> pip install kiara +... +... +... +Successfully installed ... ... ... +> kiara --help +Usage: kiara [OPTIONS] COMMAND [ARGS]... + ... + ... +``` +München diff --git a/docs/modules/core_modules.md b/docs/modules/core_modules.md new file mode 100644 index 000000000..89e8d3234 --- /dev/null +++ b/docs/modules/core_modules.md @@ -0,0 +1,17 @@ +# *Core* modules + +*Core modules* are implemented as Python classes which inherit from the abstract base class [KiaraModule][kiara.module.KiaraModule]. They need to implement 3 methods: + + - [``create_input_schema``][kiara.module.KiaraModule.create_input_schema]: returns a description of the input(s) this module takes + - [``create_output_schema``][kiara.module.KiaraModule.create_output_schema]: returns a description of the output(s) this module produces + - [``process``][kiara.module.KiaraModule.process]: the actual processing step, to transform the inputs into outputs + +!!! note + Ideally, a modules function is [idempotent](https://en.wikipedia.org/wiki/Idempotence), but it's allowed to have calls to functions that return + random objects within, as long as it's ok for the resulting output to be cached/re-used. + +An example of such a module would be the [AndModule][kiara.modules.logic_gates.AndModule], which is a simple module that computes the logic 'and' operation: + +``` python +{{ get_src_of_object('kiara.modules.logic_gates.AndModule') }} +``` diff --git a/docs/modules/index.md b/docs/modules/index.md new file mode 100644 index 000000000..4496b3d79 --- /dev/null +++ b/docs/modules/index.md @@ -0,0 +1,8 @@ +# *Kiara* modules + +Modules are the building blocks of *Kiara*. The central element of a *Kiara* module is a [pure](https://en.wikipedia.org/wiki/Pure_function) function which performs a defined piece of work. The module also contains a type information/schema of the input values the function takes, as well as of the output it produces. + +Currently, *Kiara* has two types of modules: + +- [*Core modules*](core_modules.md): Python objects that inherit from the common abstract base class [KiaraModule][kiara.module.KiaraModule] +- [*Pipeline modules*](pipeline_modules.md): assemblies of other modules (*core* or *pipeline*), incl. descriptions of how those are connected. Usually expressed as ``json`` or ``yaml`` data structures. diff --git a/docs/modules/pipeline_modules.md b/docs/modules/pipeline_modules.md new file mode 100644 index 000000000..8a9cbb6df --- /dev/null +++ b/docs/modules/pipeline_modules.md @@ -0,0 +1,10 @@ +# *Pipeline* modules + +*Pipeline modules* are assemblies of a number of other modules (which can be either type, *core*, or *pipeline*), including descriptions of how some modules inputs +are connected to other modules outputs. Module inputs that are not connected to one or several other modules outputs are expected to receive (external) user input. + +An example of a configuration for such a *pipeline module* would be the ``nand``-pipeline, which contains two *core modules* ([AndModule][kiara.modules.logic_gates.AndModule] and [NotModule][kiara.modules.logic_gates.NotModule]), where the latters only input is connected to the formers output), and which performs the, as you might have guessed, the [*nand*](https://en.wikipedia.org/wiki/NAND_logic) operation: + +```yaml +{{ get_pipeline_config('nand') }} +``` diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 000000000..7911622e6 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,8 @@ +# Usage + + +## Getting help + +To get information for the `kiara` command, use the ``--help`` flag: + +{{ cli("kiara", "--help") }} diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 000000000..541851553 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,58 @@ +site_name: kiara +repo_url: https://github.com/DHARPA-Project/kiara +site_author: Markus Binsteiner +docs_dir: docs +site_dir: build/site + +theme: + name: material + features: + - navigation.instant + - navigation.tracking + +markdown_extensions: +- attr_list +- admonition +- def_list +- codehilite: + guess_lang: false +- toc: + permalink: true + toc_depth: 3 +- pymdownx.snippets: + base_path: docs +- mkdocs-click +- pymdownx.highlight +- pymdownx.superfences + +nav: +- Home: index.md +- Install: install.md +- Usage: usage.md +- Modules: + - Overview: modules/index.md + - Core modules: modules/core_modules.md + - Pipeline modules: modules/pipeline_modules.md +- Development: + - Overview: development/index.md + - Entities: + - Overview: development/entities/index.md + - Values / Data: development/entities/values.md + - Modules / Pipelines: development/entities/modules.md + - Events: development/entities/events.md + +plugins: +- search +- mkdocstrings: + default_handler: python + watch: + - "src" +- frkl-docgen: + main_module: "kiara" +- macros: + modules: + - frkl.project.meta.documentation.mkdocs_macros_frkl + - kiara.doc.mkdocs_macros_kiara +- gen-files: + scripts: + - scripts/documentation/gen_schemas.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..d342f7094 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,27 @@ +[build-system] +requires = [ + "setuptools", + "setuptools_scm", + "wheel", +] +build-backend = 'setuptools.build_meta' + +[tool.black] +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + | external +)/ +''' + +[tool.dephell.main] +from = {format = "setuppy", path = "setup.py"} diff --git a/scripts/documentation/gen_schemas.py b/scripts/documentation/gen_schemas.py new file mode 100644 index 000000000..8cb091096 --- /dev/null +++ b/scripts/documentation/gen_schemas.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +import inspect +import mkdocs_gen_files +import os +import typing + +from kiara.doc.mkdocs_macros_kiara import KIARA_MODEL_CLASSES + + +def class_namespace(cls: typing.Type): + + module = cls.__module__ + if module is None or module == str.__class__.__module__: + return cls.__name__ + else: + return module + "." + cls.__name__ + + +overview_file_path = os.path.join("development", "entities", "index.md") +overview = """# Schemas overviews + +This page contains an overview of the available models and their associated schemas used in *kiara*. + +""" + +for category, classes in KIARA_MODEL_CLASSES.items(): + + overview = overview + f"## {category.capitalize()}\n\n" + + file_path = os.path.join("development", "entities", f"{category}.md") + + content = f"# {category.capitalize()}\n\n" + + for cls in classes: + + doc = cls.__doc__ + + if doc is None: + doc = "" + + doc = inspect.cleandoc(doc) + + doc_short = doc.split("\n")[0] + if doc_short: + doc_str = f": {doc_short}" + else: + doc_str = "" + + overview = ( + overview + + f" - [``{cls.__name__}``]({category}{os.path.sep}#{cls.__name__.lower()}){doc_str}\n" + ) + + namescace = class_namespace(cls) + download_link = f'{cls.__name__}.json' + + # content = content + f"## {cls.__name__}\n\n" + "{{ get_schema_for_model('" + class_namespace(cls) + ") }}\n\n" + content = content + f"## {cls.__name__}\n\n" + content = content + doc + "\n\n" + content = content + "#### References\n\n" + content = ( + content + f" - model class reference: [{cls.__name__}][{namescace}]\n" + ) + content = content + f" - JSON schema file: {download_link}\n\n" + content = content + "#### JSON schema\n\n" + content = ( + content + + "``` json\n{{ get_schema_for_model('" + + namescace + + "') }}\n```\n\n" + ) + + with mkdocs_gen_files.open(file_path, "w") as f: + f.write(content) + +with mkdocs_gen_files.open(overview_file_path, "w") as f: + f.write(overview) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..8a7402867 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,228 @@ +[metadata] +name = kiara +description = A workflow management and execution engine for the DHARPA project. +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/DHARPA-Project/kiara +author = Markus Binsteiner +author_email = markus.binsteiner@uni.lu +license = MPL-2.0 +license_file = LICENSE +platforms = any +classifiers = + Development Status :: 3 - Alpha + License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) + Programming Language :: Python + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + +[options] +packages = find_namespace: +install_requires = + appdirs>=1.4.4,<2.0.0 + deepdiff>=5.2.0,<6.0.0 + faker>=8.0.0,<9.0.0 + networkx>=2.5,<3.0 + pyarrow>=3.0.0,<4.0.0 + pydantic>=1.8.0,<2.0.0 + pyyaml>=5.4.0,<6.0.0 + rich>=9.0.0,<11.0.0 + ruamel.yaml>=0.17.0,<0.18.0 + stevedore>=3.3.0,<4.0.0 +python_requires = >=3.6 +include_package_data = True +package_dir = + =src +setup_requires = + setuptools_scm + setuptools_scm_git_archive +zip_safe = False + +[options.entry_points] +console_scripts = + kiara = kiara.interfaces.cli:cli +kiara.modules = + or = kiara.modules.logic_gates:OrModule + and = kiara.modules.logic_gates:AndModule + not = kiara.modules.logic_gates:NotModule + pipeline = kiara.pipeline.module:PipelineModule + dummy = kiara.modules.dev:DummyModule + +[options.extras_require] +cli = + asyncclick>=7.0.9,<8.0.0 +dev_build = + frkl.project[build] +dev_documentation = + deepdiff>=5.2.0 + devtools>=0.6.0 + frkl.project + mkdocs>=1.1.2 + mkdocs-macros-plugin>=0.5.0 + mkdocs-material>=6.2.5 + mkdocs-simple-hooks>=0.1.2 + mkdocstrings>=0.15.0 + pymdown-extensions>=8.1 + mkdocs-click>=0.3.0;python_version>'3.6' + mkdocs-gen-files>=0.3.1;python_version>'3.6' + pip-licenses>=3.3.0;python_version>'3.6' +dev_modules = + jupyter + jupyterlab + pandas + pyarrow +dev_testing = + flake8>=3.8.4 + mypy>=0.800 + pytest>=6.2.2 + pytest-cov>=2.11.1 + tox>=3.21.2 +dev_utils = + black + cruft>=2.6.0 + flake8>=3.8.4 + ipython + pip-licenses>=3.3.0 + pp-ez>=0.2.0 + pre-commit>=2.9.3 + setup-cfg-fmt>=1.16.0 + watchgod>=0.6 + wheel + +[options.packages.find] +where = src +exclude = + tests + +[aliases] +build = bdist_wheel +release = build upload + +[bdist_wheel] +universal = 1 + +[devpi:upload] +no-vcs = 1 +formats = sdist, bdist_wheel + +[tool:pytest] +addopts = + --verbose +norecursedirs = + dist + build + .tox +testpaths = tests + +[tox:tox] +envlist = py36, py37, py38, flake8 + +[testenv] +setenv = + PYTHONPATH = {toxinidir} +deps = + -e{toxinidir}[dev_testing,all] +install_command = pip install {opts} {packages} +commands = + pip install -U pip + py.test --basetemp={envtmpdir} + +[testenv:flake8] +basepython = python +deps = + -e{toxinidir}[dev_testing,all] + flake8 +install_command = pip install {opts} {packages} +commands = flake8 src + +[coverage:run] +branch = True +source = kiara + +[coverage:paths] +source = + src/ + */site-packages/ + +[coverage:report] +exclude_lines = + pragma: no cover + + def __repr__ + if self\.debug + + raise AssertionError + raise NotImplementedError + + if 0: + if __name__ == .__main__.: + +[flake8] +exclude = + .tox + build + dist + .eggs + docs/conf.py + .git + __pycache__ +ignore = F405, W503, E501 +max-line-length = 88 + +[isort] +profile = black +indent = ' ' +skip = .tox,.venv,build,dist +known_standard_library = setuptools,pkg_resources +known_test = pytest +known_first_party = kiara +sections = FUTURE,STDLIB,COMPAT,TEST,THIRDPARTY,FIRSTPARTY,LOCALFOLDER +default_section = THIRDPARTY +multi_line_output = 3 +include_trailing_comma = True +force_grid_wrap = 0 +combine_as_imports = True +line_length = 88 + +[mypy] +mypy_path = + src/ +namespace_packages = false +plugins = pydantic.mypy + +[mypy-appdirs] +ignore_missing_imports = true + +[mypy-asciinet.*] +ignore_missing_imports = true + +[mypy-asyncclick] +ignore_missing_imports = true + +[mypy-deepdiff] +ignore_missing_imports = true + +[mypy-devtools] +ignore_missing_imports = true + +[mypy-faker] +ignore_missing_imports = true + +[mypy-networkx] +ignore_missing_imports = true + +[mypy-pyarrow.*] +ignore_missing_imports = true + +[mypy-ruamel.*] +ignore_missing_imports = true + +[mypy-stevedore] +ignore_missing_imports = true + +[mypy-uvloop] +ignore_missing_imports = true diff --git a/setup.py b/setup.py new file mode 100755 index 000000000..b5210d1da --- /dev/null +++ b/setup.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + Setup file for kiara. + Use setup.cfg to configure your project. + + This file was generated with PyScaffold 3.1. + PyScaffold helps you to put up the scaffold of your new Python project. + Learn more under: https://pyscaffold.org/ +""" +from setuptools import setup + +import sys + +try: + from pkg_resources import VersionConflict, require + + require("setuptools>=38.3") +except VersionConflict: + print("Error: version of setuptools is too old (<38.3)!") + sys.exit(1) + + +def get_extra_requires(add_all=True, add_all_dev=True): + + from distutils.dist import Distribution + + dist = Distribution() + dist.parse_config_files() + dist.parse_command_line() + + extras = {} + extra_deps = dist.get_option_dict("options.extras_require") + + for extra_name, data in extra_deps.items(): + + _, dep_string = data + deps = [] + d = dep_string.split("\n") + for line in d: + if not line: + continue + deps.append(line) + extras[extra_name] = deps + + if add_all: + all = set() + for e_n, deps in extras.items(): + if not e_n.startswith("dev_"): + all.update(deps) + extras["all"] = all + + # add tag `all` at the end + if add_all_dev: + extras["all_dev"] = set(vv for v in extras.values() for vv in v) + extras["dev_all"] = extras["all_dev"] + + return extras + + +if __name__ in ["__main__", "builtins", "__builtin__"]: + setup( + use_scm_version={"write_to": "src/kiara/version.txt"}, + extras_require=get_extra_requires(), + ) diff --git a/src/kiara/__init__.py b/src/kiara/__init__.py new file mode 100644 index 000000000..2a83c98c5 --- /dev/null +++ b/src/kiara/__init__.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# isort: skip_file + +import os + +from .kiara import Kiara # noqa +from .module import KiaraModule # noqa +from .pipeline.pipeline import Pipeline # noqa +from .pipeline.structure import PipelineStructure # noqa +from .pipeline.controller import PipelineController # noqa +from .pipeline.module import PipelineModule # noqa +from .data.registry import DataRegistry # noqa + +"""Top-level package for kiara.""" + + +__author__ = """Markus Binsteiner""" +"""The author of this package.""" +__email__ = "markus.binsteiner@uni.lu" +"""Email address of the author.""" + + +def get_version() -> str: + """Return the current version of *Kiara*.""" + from pkg_resources import DistributionNotFound, get_distribution + + try: + # Change here if project is renamed and does not equal the package name + dist_name = __name__ + __version__ = get_distribution(dist_name).version + except DistributionNotFound: + + try: + version_file = os.path.join(os.path.dirname(__file__), "version.txt") + + if os.path.exists(version_file): + with open(version_file, encoding="utf-8") as vf: + __version__ = vf.read() + else: + __version__ = "unknown" + + except (Exception): + pass + + if __version__ is None: + __version__ = "unknown" + + return __version__ diff --git a/src/kiara/_frkl/__init__.py b/src/kiara/_frkl/__init__.py new file mode 100644 index 000000000..0b4e39511 --- /dev/null +++ b/src/kiara/_frkl/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from typing import Any, Dict + +build_properties: Dict[str, Any] = {"resources": [], "hidden_imports": []} diff --git a/src/kiara/_frkl/_frkl.json b/src/kiara/_frkl/_frkl.json new file mode 100644 index 000000000..43ab866f7 --- /dev/null +++ b/src/kiara/_frkl/_frkl.json @@ -0,0 +1,11 @@ +{ + "project": { + "full_name": "Markus Binsteiner", + "email": "markus.binsteiner@uni.lu", + "project_name": "kiara", + "exe_name": "kiara", + "project_slug": "kiara", + "project_main_module": "kiara", + "project_short_description": "kiara" + } +} diff --git a/src/kiara/config.py b/src/kiara/config.py new file mode 100644 index 000000000..deeacc53c --- /dev/null +++ b/src/kiara/config.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- + +"""Configuration models for the *Kiara* package.""" + +import collections +import deepdiff +import typing +from pathlib import Path +from pydantic import BaseModel, Extra, Field, PrivateAttr, validator +from rich import box +from rich.console import Console, ConsoleOptions, RenderResult +from rich.table import Table + +from kiara.data.values import StepValueAddress +from kiara.utils import get_data_from_file + +if typing.TYPE_CHECKING: + from kiara import PipelineStructure + + +class PipelineStepConfig(BaseModel): + """A class to hold the configuration of one module within a [PipelineModule][kiara.pipeline.module.PipelineModule].""" + + class Config: + extra = Extra.forbid + validate_assignment = True + + module_type: str = Field(description="The name of the module type.") + step_id: str = Field(description="The id of the step.") + module_config: typing.Dict = Field( + default_factory=dict, + description="The configuration for the module (module-type specific).", + ) + input_links: typing.Dict[str, typing.List[StepValueAddress]] = Field( + default_factory=dict, + description="The map with the name of an input link as key, and the connected module output name(s) as value.", + ) + + @validator("input_links", pre=True) + def ensure_input_links_valid(cls, v): + + result = {} + for input_name, output in v.items(): + + if isinstance(output, str): + + tokens = output.split(".") + if len(tokens) == 1: + step_id = output + output_name = input_name + elif len(tokens) == 2: + step_id = tokens[0] + output_name = tokens[1] + else: + raise NotImplementedError() + + elif isinstance(output, collections.abc.Mapping): + step_id = output["step_id"] + output_name = output["output_name"] + elif isinstance(output, collections.abc.Sequence): + raise NotImplementedError() + else: + raise TypeError( + f"Can't parse input map, invalid type for output: {output}" + ) + + input_link = StepValueAddress( + step_id=step_id, value_name=output_name, sub_value=None + ) + result[input_name] = [input_link] + + return result + + +class KiaraModuleConfig(BaseModel): + """Base class that describes the configuration a [KiaraModule][kiara.module.KiaraModule] class accepts. + + This is stored in the ``_config_cls`` class attribute in each ``KiaraModule`` class. By default, + such a ``KiaraModule`` is not configurable. + + """ + + _config_hash: str = PrivateAttr(default=None) + constants: typing.Dict[str, typing.Any] = Field( + default_factory=dict, description="Value constants for this module." + ) + + class Config: + extra = Extra.forbid + validate_assignment = True + + def get(self, key: str) -> typing.Any: + + if key not in self.__fields__: + raise Exception( + f"No config value '{key}' in module config class '{self.__class__.__name__}'." + ) + + return getattr(self, key) + + @property + def config_hash(self): + + if self._config_hash is None: + _d = self.dict() + hashes = deepdiff.DeepHash(_d) + self._config_hash = hashes[_d] + return self._config_hash + + def __eq__(self, other): + + if self.__class__ != other.__class__: + return False + + return self.dict() == other.dict() + + def __hash__(self): + + return hash(self.config_hash) + + def __rich_console__( + self, console: Console, options: ConsoleOptions + ) -> RenderResult: + + my_table = Table(box=box.MINIMAL, show_header=False) + my_table.add_column("Field name", style="i") + my_table.add_column("Value") + for field in self.__fields__: + my_table.add_row(field, getattr(self, field)) + + yield my_table + + +KIARA_CONFIG = typing.TypeVar("KIARA_CONFIG", bound=KiaraModuleConfig) + + +class PipelineModuleConfig(KiaraModuleConfig): + """A class to hold the configuration for a [PipelineModule][kiara.pipeline.module.PipelineModule].""" + + class Config: + extra = Extra.allow + validate_assignment = True + + steps: typing.List[PipelineStepConfig] = Field( + default_factory=list, + description="A list of steps/modules of this pipeline, and their connections.", + ) + input_aliases: typing.Dict[str, str] = Field( + default_factory=dict, + description="A map of input aliases, with the calculated (__ -- double underscore!) name as key, and a string (the resulting workflow input alias) as value", + ) + output_aliases: typing.Dict[str, str] = Field( + default_factory=dict, + description="A map of output aliases, with the calculated (__ -- double underscore!) name as key, and a string (the resulting workflow output alias) as value", + ) + doc: str = Field( + default="-- n/a --", description="Documentation about what the pipeline does." + ) + + meta: typing.Dict[str, typing.Any] = Field( + default_factory=dict, description="Metadata for this workflow." + ) + + def create_structure(self, parent_id: str) -> "PipelineStructure": + from kiara import PipelineStructure + + ps = PipelineStructure( + parent_id=parent_id, + steps=self.steps, + input_aliases=self.input_aliases, + output_aliases=self.output_aliases, + ) + return ps + + +class KiaraWorkflowConfig(BaseModel): + """The object to hold a configuration for a workflow.""" + + class Config: + extra = Extra.forbid + validate_assignment = True + + @classmethod + def from_file(cls, path: typing.Union[str, Path]): + + data = get_data_from_file(path) + return KiaraWorkflowConfig(module_type="pipeline", module_config=data) + + module_type: str = Field( + description="The name of the 'root' module of this workflow.", + default="pipeline", + ) + module_config: typing.Dict[str, typing.Any] = Field( + default_factory=dict, + description="The configuration for the 'root' module of this workflow.", + ) diff --git a/src/kiara/data/__init__.py b/src/kiara/data/__init__.py new file mode 100644 index 000000000..ac4f47573 --- /dev/null +++ b/src/kiara/data/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# isort: skip_file + +"""Data and value related classes for *Kiara*.""" + +from .values import Value, PipelineValues, ValueSet # noqa diff --git a/src/kiara/data/registry.py b/src/kiara/data/registry.py new file mode 100644 index 000000000..6788976ec --- /dev/null +++ b/src/kiara/data/registry.py @@ -0,0 +1,441 @@ +# -*- coding: utf-8 -*- + +import logging +import typing +import uuid + +from kiara.data.values import ( + DataValue, + LinkedValue, + Value, + ValueField, + ValueSchema, + ValueType, + ValueUpdateHandler, +) + +log = logging.getLogger("kiara") + + +def generate_random_value_id(): + + return str(uuid.uuid4()) + + +class DataRegistry(object): + """Contains and manages all [Value][kiara.data.values.Value] objects for *Kiara*. + + This is one of the central classes in *Kiara*, as it manages all data that is set by users or which results from + processing steps in [KiaraModule][kiara.module.KiaraModule]s. Basically, the registry keeps a record of every ``Value`` object that is produced + or consumed within *Kiara* by associating it with a unique id. This id then can be used to retrieve or set/replace the + current data (bytes) for a value, and subscribe to events that happens on such ``Value`` object (which is needed in + [PipelineController][kiara.pipeline.controller.PipelineController]s). + + Note: + In the future, this will probably become an abstract base class, so it'll be possible to implement different + ways of storing/managing values and data. + + """ + + def __init__(self): + + self._id: str = str(uuid.uuid4()) + self._value_items: typing.Dict[str, DataValue] = {} + """PipelineValues that have a actual data associated to it. key is the value id, value is the value wrapper object.""" + self._linked_value_items: typing.Dict[str, LinkedValue] = {} + """PipelineValues that track one or several other values. key is the value id, value is a dictionary with the tracked value id as key and an optional sub-value query string as value (if not the whole value is used).""" + self._linked_value_items_reverse: typing.Dict[str, typing.List[str]] = {} + self._values: typing.Dict[str, typing.Any] = {} + self._callbacks: typing.Dict[str, typing.List[ValueUpdateHandler]] = {} + + def get_value_item(self, item: typing.Union[str, Value]) -> Value: + """Get the [Value][kiara.data.values.Value] object for an id. + + If a string is provided, it is interpreted as value id. If a ``Value`` object is provided, the registry will check whether its id is registered with it, and return the object that is registered with it. + + If the provided id could not be found, an Exception is thrown. + + Arguments: + item: a value id or ``Value`` object + + Returns: the ``Value`` objectssh 1 + """ + + if isinstance(item, str): + value_id = item + elif isinstance(item, Value): + value_id = item.id + else: + raise TypeError( + f"Invalid type '{type(item)}', need PipelineValue or string." + ) + + if value_id in self._value_items.keys(): + return self._value_items[value_id] + elif value_id in self._linked_value_items.keys(): + return self._linked_value_items[value_id] + else: + raise Exception(f"No value with id: {value_id}") + + def register_value( + self, + value_schema: ValueSchema, + value_fields: typing.Union[ + ValueField, typing.Iterable[ValueField], None + ] = None, + value_id: typing.Optional[str] = None, + callbacks: typing.Optional[typing.Iterable[ValueUpdateHandler]] = None, + initial_value: typing.Any = None, + origin: typing.Optional[str] = None, + is_constant: bool = False, + ) -> DataValue: + """Register a value in this registry. + + This registers an unique id, along with a data schema and other metadata which can then be 'filled' with actual + data. + + Arguments: + value_schema: the allowed schema for the data that is held in this value + value_fields: the field(s) within a [PipelineStructure][kiara.pipeline.structure.PipelineStructure] that is associated with this value + value_id: the (unique) id for this value, if not provided one will be generated + callbacks: the callbacks to register for this value (can be added later too) + initial_value: if provided, this value will be set + origin: a string describing the type of field the value is coming from (e.g. user input, step output, ...) + is_constant: whether this value is a constant or not + + Returns: + the newly created value object + """ + + if value_id is not None and value_id in self._values.keys(): + raise Exception(f"Value id '{id}' already registered.") + + if value_id is None: + value_id = generate_random_value_id() + + if is_constant and initial_value is None: + raise Exception("Can't register constant, no initial value provided.") + + if value_fields is None: + _value_fields: typing.Tuple[ValueField, ...] = tuple() + elif isinstance(value_fields, ValueField): + _value_fields = (value_fields,) + elif isinstance(value_fields, typing.Iterable): + for vf in value_fields: + assert isinstance(vf, ValueField) + _value_fields = tuple(value_fields) # type: ignore + else: + raise TypeError( + f"Invalid type for 'value_fields' argument: {type(value_fields)}" + ) + + value_item = DataValue( # type: ignore + id=value_id, + value_schema=value_schema, + value_fields=_value_fields, + registry=self, # type: ignore + origin=origin, + is_constant=is_constant, + ) + + self._value_items[value_id] = value_item + self._values[value_id] = None + + if callbacks: + for cb in callbacks: + self.register_callback(cb, value_item) + + if initial_value is not None: + self.set_value(value_id, initial_value) + + return value_item + + def register_linked_value( + self, + linked_values: typing.Union[ + typing.Dict[str, typing.Dict[str, str]], + str, + Value, + typing.Iterable[typing.Union[str, Value]], + ], + value_fields: typing.Union[ + ValueField, typing.Iterable[ValueField], None + ] = None, + value_id: typing.Optional[str] = None, + callbacks: typing.Optional[typing.Iterable[ValueUpdateHandler]] = None, + origin: typing.Optional[str] = None, + ) -> LinkedValue: + """Register a linked value in this registry. + + This registers an unique id, along with one or several other, already existing 'parent' ``Value`` objects. The + 'value' of the resulting [LinkedValue][kiara.data.values.LinkedValue] and its schema is determined by those upstream objects. + + Note: + Currently only one-to-one mappings of ``Value``/``LinkedValue`` is allowed. This will be more flexible in the future. + + Arguments: + value_fields: field(s) within a [PipelineStructure][kiara.pipeline.structure.PipelineStructure] that is associated with this value + value_id: the (unique) id for this value, if not provided one will be generated + callbacks: the callbacks to register for this value (can be added later too) + origin: a string describing the type of field the value is coming from (e.g. user input, step output, ...) + + Returns: + the newly created value object + """ + + if value_id is not None and value_id in self._values.keys(): + raise Exception(f"Value id '{id}' already registered.") + + if value_id is not None and value_id in self._linked_value_items.keys(): + raise Exception(f"Value id '{id}' already registered as a linked value.") + + if value_id is None: + value_id = generate_random_value_id() + + if value_fields is None: + _value_fields: typing.Tuple[ValueField, ...] = tuple() + elif isinstance(value_fields, ValueField): + _value_fields = (value_fields,) + elif isinstance(value_fields, typing.Iterable): + for vf in value_fields: + assert isinstance(vf, ValueField) + _value_fields = tuple(value_fields) # type: ignore + else: + raise TypeError( + f"Invalid type for 'value_fields' argument: {type(value_fields)}" + ) + + _linked_values: typing.Dict[str, typing.Dict[str, str]] = {} + _linked_value_objs: typing.List[DataValue] = [] + # TODO: allow duplicate ids as long as subvalues are different + if isinstance(linked_values, str): + if linked_values in _linked_values.keys(): + raise Exception(f"Duplicate linked value id: {linked_values}") + _linked_values[linked_values] = {} + elif isinstance(linked_values, Value): + if linked_values.id in _linked_values.keys(): + raise Exception(f"Duplicate linked value id: {linked_values.id}") + _linked_values[linked_values.id] = {} + elif isinstance(linked_values, typing.Mapping): + for k, v in linked_values.items(): + if k in _linked_values.keys(): + raise Exception(f"Duplicate linked value id: {k}") + if not v: + _linked_values[k] = {} + else: + raise NotImplementedError() + elif isinstance(linked_values, typing.Iterable): + for linked_value in linked_values: + _v = self.get_value_item(linked_value) # type: ignore + if _v.id in _linked_values.keys(): + raise Exception(f"Duplicate linked value id: {_v.id}") + _linked_values[_v.id] = {} + else: + raise TypeError( + f"Invalid type '{type(linked_values)}' for linked values: {linked_values}" + ) + + if not _linked_values: + raise Exception("Can't create linked value without any links.") + for linked_value, details in _linked_values.items(): + if details: + raise NotImplementedError() + # make sure the value exists + _i = self.get_value_item(linked_value) + if not isinstance(_i, DataValue): + raise NotImplementedError() + _linked_value_objs.append(_i) + + # TODO: auto-generate doc string + schema = ValueSchema(type=ValueType.any, doc="-- linked value --") + + value_item = LinkedValue( # type: ignore + id=value_id, + value_schema=schema, + value_fields=_value_fields, + registry=self, # type: ignore + origin=origin, + links=_linked_values, + ) + + self._update_linked_value( + item=value_item, changed_upstream_values=_linked_value_objs + ) + self._linked_value_items[value_id] = value_item + for linked_value_id in _linked_values.keys(): + self._linked_value_items_reverse.setdefault(linked_value_id, []).append( + value_item.id + ) + + if callbacks: + for cb in callbacks: + self.register_callback(cb, value_item) + + return self._linked_value_items[value_id] + + def register_callback( + self, callback: ValueUpdateHandler, *items: typing.Union[str, Value] + ): + """Register a callback function that is called when one or several of the provided data items were changed. + + This callback needs to have a signature that takes in one or several objects of the class [Value][kiara.data.values.Value] + as positional parameters (``*args``). If the callback has keyword arguments ``(**kwargs)``, those will be ignored. + + Arguments: + callback: the callback + *items: the value items (or their ids) to get notified for + + """ + + for item in items: + item = self.get_value_item(item) + self._callbacks.setdefault(item.id, []).append(callback) + + def get_value_data(self, item: typing.Union[str, Value]) -> typing.Any: + """Request the actual data for a value item or its id. + + Arguments: + item: the value or its id + + Returns: + The data wrapped in a Python object. + """ + + item = self.get_value_item(item) + value: typing.Any = None + if item.id in self._value_items.keys(): + return self._values[item.id] + elif item.id in self._linked_value_items.keys(): + linked_item = self._linked_value_items[item.id] + if len(linked_item.links) != 1: + raise NotImplementedError() + for linked_id, details in linked_item.links.items(): + if details: + raise NotImplementedError() + value = self.get_value_data(linked_id) + + return value + + def set_value(self, item: typing.Union[str, Value], value: typing.Any) -> bool: + """Set a single value. + + In most cases, the [set_values][kiara.data.registry.DataRegistry.set_values] method will be used, which is + always recommended if multiple values are updated, since otherwise callbacks will be sent out seperately + which might be inefficient. + + + Arguments: + item: the value object or id to be set + value: the data (a Python object) + + Returns: + whether the value was changed (``True``) or not (``False``) + """ + + item = self.get_value_item(item) + + result = self.set_values({item: value}) # type: ignore + return result[item] + + def set_values( + self, values: typing.Mapping[typing.Union[str, DataValue], typing.Any] + ) -> typing.Dict[Value, bool]: + """Set data on values. + + Args: + values: a dict where the key is the value to set (or it's id), and the value is the data to set + + Returns: + a dict where the key is the value and the value a bool that indicates whether the + value was changed or not for that value + """ + + # ensure we are only dealing with values that can be set + for _item, value in values.items(): + + item: DataValue = self.get_value_item(_item) # type: ignore + if not isinstance(item, DataValue): + raise Exception(f"Can't set non-datavalue '{item.id}'.") + + if item.is_constant: + if self._values.get(item.id) is not None: + raise Exception(f"Can't set value '{item.id}', it's a constant.") + + if value is None: + raise ValueError("Value can't be None") + + result: typing.Dict[Value, bool] = {} + callbacks: typing.Dict[typing.Callable, typing.List[Value]] = {} + linked_values_to_update: typing.Dict[str, typing.List[DataValue]] = {} + + # set all values, record callbacks and downstream dependencies that also need to be changed + for _item, value in values.items(): + + item: DataValue = self.get_value_item(_item) # type:ignore + + old_value = self.get_value_data(item) + changed = True + if old_value == value: + changed = False + else: + # TODO: validate value + self._values[item.id] = value + self._value_items[item.id].is_valid = True + for cb in self._callbacks.get(item.id, []): + callbacks.setdefault(cb, []).append(item) + + _downstream_values = self._linked_value_items_reverse.get(item.id, None) + if _downstream_values: + for _up in _downstream_values: + linked_values_to_update.setdefault(_up, []).append(item) + result[item] = changed + + # now we need to re-compute all the linked values that are dependent on one or several of the changed items + for linked_value, upstream_values in linked_values_to_update.items(): + _i: LinkedValue = self.get_value_item(linked_value) # type: ignore + if not isinstance(_i, LinkedValue): + raise NotImplementedError() + self._update_linked_value(item=_i, changed_upstream_values=upstream_values) + for cb in self._callbacks.get(linked_value, []): + callbacks.setdefault(cb, []).append(_i) + + for cb, v in callbacks.items(): + cb(*v) + + return result + + def _update_linked_value( + self, item: LinkedValue, changed_upstream_values: typing.List[DataValue] + ): + """Update metadata for a linked value after one or several of it's 'parents' changed. + + Arguments: + item: the value to update + changed_upstream_values: a list of parent values that were changed + """ + + assert isinstance(item, LinkedValue) + + valid = True + for value_id, details in item.links.items(): + linked_item = self.get_value_item(value_id) + if not linked_item.is_valid: + valid = False + break + + item.is_valid = valid + + def get_stats(self) -> typing.Dict: + + return self._values + + def __eq__(self, other): + + if not isinstance(other, DataRegistry): + return False + + return self._id == other._id + + def __hash__(self): + + return hash(self._id) diff --git a/src/kiara/data/values.py b/src/kiara/data/values.py new file mode 100644 index 000000000..ef639fff6 --- /dev/null +++ b/src/kiara/data/values.py @@ -0,0 +1,699 @@ +# -*- coding: utf-8 -*- + +"""A module that contains value-related classes for *Kiara*. + +A value in Kiara-speak is a pointer to actual data (aka 'bytes'). It contains metadata about that data (like whether it's +valid/set, what type/schema it has, when it was last modified, ...), but it does not contain the data itself. The reason for +that is that such data can be fairly large, and in a lot of cases it is not necessary for the code involved to have +access to it, access to the metadata is enough. + +Each Value has a unique id, which can be used to retrieve the data (whole, or parts of it) from a [DataRegistry][kiara.data.registry.DataRegistry]. In addition, that id can be used to subscribe to change events for a value (published +whenever the data that is associated with a value was changed). +""" + +import abc +import logging +import typing +import uuid +from datetime import datetime +from enum import Enum +from faker import Faker +from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator + +from kiara.defaults import INVALID_VALUE_NAMES, PIPELINE_PARENT_MARKER + +if typing.TYPE_CHECKING: + from kiara.data.registry import DataRegistry + +log = logging.getLogger("kiara") +fake = Faker() + +try: + + class ValueUpdateHandler(typing.Protocol): + """The call signature for callbacks that can be registered as value update handlers.""" + + def __call__(self, *items: "Value", **kwargs: typing.Any) -> typing.Any: + ... + + +except Exception: + # there is some issue with older Python versions, typing.Protocol, and Pydantic + ValueUpdateHandler = typing.Callable # type:ignore + + +class StepValueAddress(BaseModel): + """Small model to describe the address of a value of a step, within a Pipeline/PipelineStructure.""" + + class Config: + extra = Extra.forbid + + step_id: str = Field(description="The id of a step within a pipeline.") + value_name: str = Field( + description="The name of the value (output name or pipeline input name)." + ) + sub_value: typing.Optional[str] = Field( + default=None, + description="A reference to a subitem of a value (e.g. column, list item)", + ) + + @property + def alias(self): + """An alias string for this address (in the form ``[step_id].[value_name]``).""" + return generate_step_alias(self.step_id, self.value_name) + + def __eq__(self, other): + + if not isinstance(other, StepValueAddress): + return False + + return (self.step_id, self.value_name, self.sub_value) == ( + other.step_id, + other.value_name, + other.sub_value, + ) + + def __hash__(self): + + return hash((self.step_id, self.value_name, self.sub_value)) + + def __repr__(self): + + if self.sub_value: + sub_value = f" sub_value={self.sub_value}" + else: + sub_value = "" + return f"StepValueAddres(step_id={self.step_id}, value_name={self.value_name}{sub_value})" + + def __str__(self): + return self.__repr__() + + +class ValueSchema(BaseModel): + """The schema of a value. + + The schema contains the [ValueType][kiara.data.values.ValueType] of a value, as well as an optional default that + will be used if no user input was given (yet) for a value. + + For more complex types like arrays and tables, a sub-schema will be available (e.g. columns of a table, type of + the array-items, ...). This bit is not implemented yet. + """ + + class Config: + use_enum_values = True + + type: "ValueType" + doc: str = Field( + default="-- n/a --", + description="A description for the value of this input field.", + ) + default: typing.Any = Field(description="A default value.", default=None) + sub_schema: typing.Union[ + None, "ValueSchema", typing.Mapping[str, "ValueSchema"] + ] = Field( + description="In case this schemas type is a container type (list, dict, ...), this field specifies the schema of its content.", + default=None, + ) + + @property + def type_obj(self): + return ValueType[self.type] + + def __eq__(self, other): + + if not isinstance(other, ValueSchema): + return False + + return (self.type, self.default) == (other.type, other.default) + + def __hash__(self): + + return hash((self.type, self.default)) + + +class Value(BaseModel, abc.ABC): + """A pointer to 'actual' data (bytes), along with metadata associated with this data. + + This object is created by a [DataRegistry][kiara.data.registry.DataRegistry], and can be used to retrieve the associated data + from that registry. In addition, it can be used to subscribe to change events for that data, using the [register_callback][kiara.data.registry.DataRegistry.register_callback] method. + The reason the data itself is not contained within this model is that the data could be very big, + and it might not be necessary to hold them in memory in a lot of cases. + """ + + class Config: + extra = Extra.forbid + use_enum_values = True + + _registry: typing.Optional["DataRegistry"] = PrivateAttr() + + id: str = Field(description="A unique id for this value.") + value_schema: ValueSchema = Field(description="The schema of this value.") + value_fields: typing.Tuple["ValueField", ...] = Field( + description="Value fields within a pipeline connected to this value.", + default_factory=set, + ) + is_constant: bool = Field( + description="Whether this value is a constant.", default=False + ) + origin: typing.Optional[str] = Field( + description="Description of how/where the value was set.", default="n/a" + ) + last_update: typing.Optional[datetime] = Field( + default=None, description="The time the last update to this value happened." + ) + is_streaming: bool = Field( + default=False, + description="Whether the value is currently streamed into this object.", + ) + is_valid: bool = Field( + description="Whether the value is set and valid.", default=False + ) + metadata: typing.Dict[str, typing.Any] = Field( + description="Metadata relating to the actual data (size, no. of rows, etc. -- depending on data type).", + default_factory=dict, + ) + + def __init__(self, **data): # type: ignore + data["stage"] = "init" + registry = data.pop("registry", None) + if registry is None: + raise ValueError("No 'registry' provided.") + super().__init__(**data) + self._registry = registry + + @property + def registry(self) -> "DataRegistry": + if self._registry is None: + raise Exception(f"Registry not set for value: {self}") + return self._registry + + def register_callback( + self, callback: typing.Callable + ): # this needs to implement ValueUpdateHandler, but can't add that type hint due to a pydantic error + self.registry.register_callback(callback, self) + + def __eq__(self, other): + + # TODO: compare all attributes if id is equal, just to make sure... + + if not isinstance(other, Value): + return False + return self.id == other.id + + def __hash__(self): + return hash(self.id) + + def __repr__(self): + return f"{self.__class__.__name__}(id={str(self.id)} valid={self.is_valid})" + + def __str__(self): + return self.__repr__() + + +class DataValue(Value): + """An implementation of [Value][kiara.data.values.Value] that points to 'actual' data. + + This is opposed to a [LinkedValue][kiara.data.values.LinkedValue], which points to one or several other ``Value`` + objects, and is read-only. + """ + + @root_validator(pre=True) + def validate_input_fields(cls, values): + + # TODO: validate against schema? + + if values.get("last_update", None): + raise ValueError( + "Can't set 'last_update', this value will be set automatically." + ) + + is_init = True if values.pop("stage", None) == "init" else False + + value_schema: ValueSchema = values.get("value_schema", None) + if value_schema is not None and not is_init: + raise ValueError( + "Can't set value_schema after initial construction of a Value object." + ) + else: + if not isinstance(value_schema, ValueSchema): + raise TypeError(f"Invalid type for ValueSchema: {type(value_schema)}") + + value_id: str = values.get("id", None) + if value_id and not is_init: + raise ValueError( + "Can't set value id after initial construction of a Value object." + ) + else: + if not isinstance(value_id, str): + raise TypeError(f"Invalid type for value id: {type(value_id)}") + + is_constant: bool = values.get("is_constant", None) + if is_constant and not is_init: + raise ValueError( + "Can't set 'is_constant' value after initial construction of a Value object." + ) + else: + if not isinstance(is_constant, bool): + raise TypeError(f"Invalid type for 'is_constant': {type(is_constant)}") + + if is_constant: + values["origin"] = "constant" + + values["last_update"] = datetime.now() + + return values + + def get_value_data(self) -> typing.Any: + return self.registry.get_value_data(self) + + def set_value_data(self, value: typing.Any) -> bool: + + # TODO: validate against schema + changed: bool = self.registry.set_value(self, value) + return changed + + +class LinkedValue(Value): + """An implementation of [Value][kiara.data.values.Value] that points to one or several other ``Value`` objects.. + + This is opposed to a [DataValue][kiara.data.values.DataValue], which points to 'actual' data, and is read/write-able. + """ + + links: typing.Dict[str, typing.Dict[str, str]] + + @root_validator(pre=True) + def validate_input_fields(cls, values): + + # TODO: validate against schema? + + if values.get("last_update", None): + raise ValueError( + "Can't set 'last_update', this value will be set automatically." + ) + + is_init = True if values.pop("stage", None) == "init" else False + + value_schema: ValueSchema = values.get("value_schema", None) + if value_schema is not None and not is_init: + raise ValueError( + "Can't set value_schema after initial construction of a Value object." + ) + else: + if not isinstance(value_schema, ValueSchema): + raise TypeError(f"Invalid type for ValueSchema: {type(value_schema)}") + + value_id: str = values.get("id", None) + if value_id and not is_init: + raise ValueError( + "Can't set value id after initial construction of a Value object." + ) + else: + if not isinstance(value_id, str): + raise TypeError(f"Invalid type for value id: {type(value_id)}") + + is_constant: bool = values.get("is_constant", None) + if is_constant is not None: + raise ValueError("Can't set 'is_constant' value in LinkedValue object.") + + values["last_update"] = datetime.now() + values["is_constant"] = False + + return values + + def get_value_data(self) -> typing.Any: + return self.registry.get_value_data(self) + + def set_value_data(self, value: typing.Any) -> bool: + raise Exception("Linked values can't be set.") + + +class ValueSet(typing.MutableMapping[str, Value]): + """A dict-like object that contains a set of value fields that belong together in some way (for example outputs of a step or pipeline).""" + + def __init__(self, items: typing.Mapping[str, Value]): + + for item, value in items.items(): + + if value is None: + raise Exception( + f"Can't create value set, item '{item}' does not have a value (yet)." + ) + + if item.startswith("_"): + raise ValueError(f"Value name can't start with '_': {item}") + if item in INVALID_VALUE_NAMES: + raise ValueError(f"Invalid value name '{item}'.") + super(ValueSet, self).__setattr__("_value_items", items) + # TODO: auto-generate doc + self._schema = ValueSchema(type="value_items", default=None, doc="-- n/a --") + + def __getattr__(self, item): + + # if item == "ALL": + if item == "_value_items": + raise KeyError() + + # if item == "ALL": + # return {k: v. for k, v in self.__dict__["_value_items"].items()} + elif item in self.__dict__["_value_items"].keys(): + return self.__dict__["_value_items"][item] + else: + return super().__getattribute__(item) + + def __setattr__(self, key, value): + + if key == "ALL": + self.set_values(**value) + elif key in self._value_items.keys(): + self.set_values(**{key: value}) + elif key.startswith("_") or key in INVALID_VALUE_NAMES: + self.__dict__[key] = value + else: + av = list(self._value_items.keys()) + raise Exception( + f"Can't set value, invalid field name '{key}'. Available fields: {', '.join(av)}" + ) + + def __getitem__(self, item: str) -> Value: + + return self._value_items[item] + + def __setitem__(self, key: str, value: Value): + + self.set_values(**{key: value}) + + def __delitem__(self, key: str): + + raise Exception(f"Removing items not supported: {key}") + + def __iter__(self) -> typing.Iterator[str]: + return iter(self._value_items) + + def __len__(self): + return len(self._value_items) + + @property + def items_are_valid(self) -> bool: + + for item in self._value_items.values(): + if item is None or not item.is_valid: + return False + return True + + def dict(self): + result = {} + for k, v in self._value_items.items(): + result[k] = v.get_value_data() + return result + + def set_values(self, **values: typing.Any) -> typing.Dict[Value, bool]: + + invalid: typing.List[str] = [] + registries: typing.Dict[DataRegistry, typing.Dict[Value, typing.Any]] = {} + + for k, v in values.items(): + + if isinstance(v, Value): + raise Exception("Invalid value type") + + if k not in self._value_items.keys(): + invalid.append(k) + else: + item: Value = self._value_items[k] + registries.setdefault(item.registry, {})[item] = v + + if invalid: + raise ValueError( + f"No value item(s) with name(s) {', '.join(invalid)} available, valid names: {', '.join(self._value_items.keys())}" + ) + + result: typing.Dict[Value, bool] = {} + + for registry, v in registries.items(): + _r = registry.set_values(v) + result.update(_r) + + return result + + def to_details(self) -> "PipelineValues": + + result = {} + for name, item in self._value_items.items(): + result[name] = PipelineValue.from_value_obj(item) + + return PipelineValues(values=result) + + def to_dict(self) -> typing.Dict[str, typing.Any]: + + return self.to_details().dict() + + def to_json(self) -> str: + + return self.to_details().json() + + def __repr__(self): + + return f"ValueItems(values={self._value_items} valid={self.items_are_valid})" + + +class ValueType(Enum): + """Supported value types. + + It's very early days, so this does not really do anything yet. + """ + + def __new__(cls, *args, **kwds): + value = args[0]["id"] + obj = object.__new__(cls) + obj._value_ = value + return obj + + def __init__(self, type_map: typing.Mapping[str, typing.Any]): + + for k, v in type_map.items(): + setattr(self, k, v) + + any = {"id": "any", "python": object, "fake_value": fake.pydict} + integer = {"id": "integer", "python": int, "fake_value": fake.pyint} + string = {"id": "string", "python": str, "fake_value": fake.pystr} + dict = {"id": "dict", "python": dict, "fake_value": fake.pydict} + boolean = {"id": "boolean", "python": bool, "fake_value": fake.pybool} + table = { + "id": "table", + "python": typing.List[typing.Dict], + "fake_value": fake.pydict, + } + value_items = { + "id": "value_items", + "python": ValueSet, + "fake_value": NotImplemented, + } + + +ValueSchema.update_forward_refs() + + +class PipelineValue(BaseModel): + """Convenience wrapper to make the [PipelineState][kiara.pipeline.pipeline.PipelineState] json/dict export prettier.""" + + @classmethod + def from_value_obj(cls, value: Value): + + return PipelineValue( + id=value.id, + value_schema=value.value_schema, + is_valid=value.is_valid, + is_constant=value.is_constant, + origin=value.origin, + last_update=value.last_update, + is_streaming=value.is_streaming, + metadata=value.metadata, + ) + + class Config: + extra = Extra.forbid + allow_mutation = False + + id: str = Field(description="A unique id for this value.") + is_valid: bool = Field( + description="Whether the value is set and valid.", default=False + ) + value_schema: ValueSchema = Field(description="The schema of this value.") + is_constant: bool = Field( + description="Whether this value is a constant.", default=False + ) + origin: typing.Optional[str] = Field( + description="Description of how/where the value was set.", default="n/a" + ) + last_update: datetime = Field( + default=None, description="The time the last update to this value happened." + ) + is_streaming: bool = Field( + default=False, + description="Whether the value is currently streamed into this object.", + ) + metadata: typing.Dict[str, typing.Any] = Field( + description="Metadata relating to the actual data (size, no. of rows, etc. -- depending on data type).", + default_factory=dict, + ) + + +class PipelineValues(BaseModel): + """Convenience wrapper to make the [PipelineState][kiara.pipeline.pipeline.PipelineState] json/dict export prettier. + + This is basically just a simplified version of the [ValueSet][kiara.data.values.ValueSet] class that is using + pydantic, in order to make it easy to export to json. + """ + + @classmethod + def from_value_set(cls, value_set: ValueSet): + + values: typing.Dict[str, PipelineValue] = {} + for k, v in value_set.items(): + values[k] = PipelineValue.from_value_obj(v) + + return PipelineValues(values=values) + + values: typing.Dict[str, PipelineValue] = Field( + description="Field names are keys, and the data as values." + ) + + class Config: + use_enum_values = True + + +class ValueField(BaseModel): + """An object that holds information about the location of a value within a pipeline. + + This object does not contain the value itself. + + There are four different ValuePoint types: + + - [kiara.data.values.StepInputField][]: an input to a step + - [kiara.data.values.StepOutputField][]: an output of a step + - [kiara.data.values.PipelineInputField][]: an input to a pipeline + - [kiara.data.values.PipelineOutputField][]: an output for a pipeline + + Several point objects can target the same value, for example a step output and a connected step input are + actually the same. + """ + + class Config: + allow_mutation = False + extra = Extra.forbid + + _id: uuid.UUID = PrivateAttr(default_factory=uuid.uuid4) + value_name: str + value_schema: ValueSchema + pipeline_id: str + + def __eq__(self, other): + + if not isinstance(other, self.__class__): + return False + + return self._id == other._id + + def __hash__(self): + return hash(self._id) + + def __repr__(self): + step_id = "" + if hasattr(self, "step_id"): + step_id = f" step_id='{self.step_id}'" + return f"{self.__class__.__name__}(value_name='{self.value_name}' pipeline_id='{self.pipeline_id}'{step_id})" + + def __str__(self): + return self.__repr__() + + +def generate_step_alias(step_id: str, value_name): + return f"{step_id}.{value_name}" + + +class StepInputField(ValueField): + """An input to a step. + + This object can either have a 'connected_outputs' set, or a 'connected_pipeline_input', not both. + """ + + step_id: str = Field(description="The step id.") + connected_outputs: typing.Optional[typing.List[StepValueAddress]] = Field( + default=None, + description="A potential connected list of one or several module outputs.", + ) + connected_pipeline_input: typing.Optional[str] = Field( + default=None, description="A potential pipeline input." + ) + + @root_validator(pre=True) + def ensure_single_connected_item(cls, values): + + if values.get("connected_outputs", None) and values.get( + "connected_pipeline_input" + ): + raise ValueError("Multiple connected items, only one allowed.") + + return values + + @property + def alias(self) -> str: + return generate_step_alias(self.step_id, self.value_name) + + @property + def address(self) -> StepValueAddress: + return StepValueAddress(step_id=self.step_id, value_name=self.value_name) + + +class StepOutputField(ValueField): + """An output to a step.""" + + class Config: + allow_mutation = True + + step_id: str = Field(description="The step id.") + pipeline_output: typing.Optional[str] = Field( + description="The connected pipeline output." + ) + connected_inputs: typing.List[StepValueAddress] = Field( + description="The step inputs that are connected to this step output", + default_factory=list, + ) + + @property + def alias(self) -> str: + return generate_step_alias(self.step_id, self.value_name) + + @property + def address(self) -> StepValueAddress: + return StepValueAddress(step_id=self.step_id, value_name=self.value_name) + + +class PipelineInputField(ValueField): + """An input to a pipeline.""" + + connected_inputs: typing.List[StepValueAddress] = Field( + description="The step inputs that are connected to this pipeline input", + default_factory=list, + ) + + @property + def alias(self) -> str: + return generate_step_alias(PIPELINE_PARENT_MARKER, self.value_name) + + +class PipelineOutputField(ValueField): + """An output to a pipeline.""" + + connected_output: StepValueAddress = Field(description="Connected step outputs.") + + @property + def alias(self) -> str: + return generate_step_alias(PIPELINE_PARENT_MARKER, self.value_name) + + +Value.update_forward_refs() +DataValue.update_forward_refs() +LinkedValue.update_forward_refs() +StepInputField.update_forward_refs() +StepOutputField.update_forward_refs() diff --git a/src/kiara/defaults.py b/src/kiara/defaults.py new file mode 100644 index 000000000..84d299d9a --- /dev/null +++ b/src/kiara/defaults.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +import os +import sys +from appdirs import AppDirs + +kiara_app_dirs = AppDirs("kiara", "DHARPA") + +if not hasattr(sys, "frozen"): + KIARA_MODULE_BASE_FOLDER = os.path.dirname(__file__) + """Marker to indicate the base folder for the `kiara` module.""" +else: + KIARA_MODULE_BASE_FOLDER = os.path.join(sys._MEIPASS, "kiara") # type: ignore + """Marker to indicate the base folder for the `kiara` module.""" + +KIARA_RESOURCES_FOLDER = os.path.join(KIARA_MODULE_BASE_FOLDER, "resources") +"""Default resources folder for this package.""" + +MODULE_TYPE_KEY = "module_type" +"""The key to specify the type of a module.""" + +STEP_ID_KEY = "step_id" +"""The key to specify the step id.""" + +INVALID_VALUE_NAMES = ["dict", "items_are_valid", "set_values", "set_value", "ALL"] +"""List of reserved names, inputs/outputs can't use those.""" + +PIPELINE_PARENT_MARKER = "__pipeline__" +"""Marker string in the pipeline structure that indicates a parent pipeline element.""" + +DEFAULT_EXCLUDE_DIRS = [".git", ".tox", ".cache"] +"""List of directory names to exclude by default when walking a folder recursively.""" + +VALID_PIPELINE_FILE_EXTENSIONS = ["yaml", "yml", "json"] +"""File extensions a kiara pipeline/workflow file can have.""" + +MODULE_TYPE_NAME_KEY = "module_type_name" +"""The string for the module type name in a module configuration dict.""" diff --git a/src/kiara/doc/__init__.py b/src/kiara/doc/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/kiara/doc/mkdocs_macros_kiara.py b/src/kiara/doc/mkdocs_macros_kiara.py new file mode 100644 index 000000000..4651b266e --- /dev/null +++ b/src/kiara/doc/mkdocs_macros_kiara.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +import inspect +import os +from pydantic import BaseModel, typing +from pydoc import locate + +from kiara.config import KiaraModuleConfig, KiaraWorkflowConfig, PipelineModuleConfig +from kiara.data.values import ( + PipelineInputField, + PipelineOutputField, + PipelineValue, + PipelineValues, + StepInputField, + StepOutputField, + StepValueAddress, + Value, + ValueSchema, +) +from kiara.mgmt import PipelineModuleManager +from kiara.pipeline.pipeline import ( + PipelineInputEvent, + PipelineOutputEvent, + PipelineState, + PipelineStep, + PipelineStructureDesc, + StepInputEvent, + StepOutputEvent, +) +from kiara.utils import StringYAML + +KIARA_MODEL_CLASSES: typing.Mapping[str, typing.List[typing.Type[BaseModel]]] = { + "values": [ + ValueSchema, + Value, + PipelineValue, + PipelineValues, + StepValueAddress, + StepInputField, + StepOutputField, + PipelineInputField, + PipelineOutputField, + ], + "modules": [ + KiaraModuleConfig, + PipelineModuleConfig, + PipelineStep, + PipelineStructureDesc, + PipelineState, + KiaraWorkflowConfig, + ], + "events": [ + StepInputEvent, + StepOutputEvent, + PipelineInputEvent, + PipelineOutputEvent, + ], +} + + +yaml = StringYAML() + + +def define_env(env): + """ + This is the hook for defining variables, macros and filters + + - variables: the dictionary that contains the environment variables + - macro: a decorator function, to declare a macro. + """ + + # env.variables["baz"] = "John Doe" + + @env.macro + def get_schema_for_model(model_class: typing.Union[str, typing.Type[BaseModel]]): + + if isinstance(model_class, str): + _class: typing.Type[BaseModel] = locate(model_class) # type: ignore + else: + _class = model_class + + schema_json = _class.schema_json(indent=2) + + return schema_json + + @env.macro + def get_src_of_object(obj: typing.Union[str, typing.Any]): + + if isinstance(obj, str): + _obj: typing.Type[BaseModel] = locate(obj) # type: ignore + else: + _obj = obj + + src = inspect.getsource(_obj) + return src + + @env.macro + def get_pipeline_config(pipeline_name: str): + + pmm = PipelineModuleManager() + desc = pmm.pipeline_descs[pipeline_name]["data"] + + desc_str = yaml.dump(desc) + return desc_str + + +def on_post_build(env): + "Post-build actions" + + site_dir = env.conf["site_dir"] + + for category, classes in KIARA_MODEL_CLASSES.items(): + + for cls in classes: + schema_json = cls.schema_json(indent=2) + + file_path = os.path.join( + site_dir, "development", "entities", category, f"{cls.__name__}.json" + ) + os.makedirs(os.path.dirname(file_path), exist_ok=True) + with open(file_path, "w") as f: + f.write(schema_json) diff --git a/src/kiara/events.py b/src/kiara/events.py new file mode 100644 index 000000000..d8bd9186b --- /dev/null +++ b/src/kiara/events.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +import typing +from pydantic import BaseModel, Field + + +class StepEvent(BaseModel): + class Config: + allow_mutation = False + + pipeline_id: str + + def __repr__(self): + d = self.dict() + d.pop("pipeline_id") + return f"{self.__class__.__name__}(pipeline_id={self.pipeline_id} data={d}" + + def __str__(self): + return self.__repr__() + + +class StepInputEvent(StepEvent): + """Event that gets fired when one or several inputs for steps within a pipeline have changed.""" + + updated_step_inputs: typing.Dict[str, typing.List[str]] = Field( + description="steps (keys) with updated inputs which need re-processing (value is list of updated input names)" + ) + + @property + def newly_stale_steps(self) -> typing.List[str]: + """Convenience method to display the steps that have been rendered 'stale' by this event.""" + return list(self.updated_step_inputs.keys()) + + +class StepOutputEvent(StepEvent): + """Event that gets fired when one or several outputs for steps within a pipeline have changed.""" + + updated_step_outputs: typing.Dict[str, typing.List[str]] = Field( + description="steps (keys) that finished processing of one, several or all outputs (values are list of 'finished' output fields)" + ) + + +class PipelineInputEvent(StepEvent): + """Event that gets fired when one or several inputs for the pipeline itself have changed.""" + + updated_pipeline_inputs: typing.List[str] = Field( + description="list of pipeline input names that where changed" + ) + + +class PipelineOutputEvent(StepEvent): + """Event that gets fired when one or several outputs for the pipeline itself have changed.""" + + updated_pipeline_outputs: typing.List[str] = Field( + description="list of pipeline output names that where changed" + ) + + +class OtherEvent(StepEvent): + + new_streaming_input: typing.Dict[str, typing.List[str]] = Field( + description="steps (keys) where there was new data streamed to one or more inputs (values are list of those input names)" + ) diff --git a/src/kiara/interfaces/__init__.py b/src/kiara/interfaces/__init__.py new file mode 100644 index 000000000..aa32cb423 --- /dev/null +++ b/src/kiara/interfaces/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +"""Implementation of interfaces for *Kiara*.""" diff --git a/src/kiara/interfaces/cli/__init__.py b/src/kiara/interfaces/cli/__init__.py new file mode 100644 index 000000000..6903554da --- /dev/null +++ b/src/kiara/interfaces/cli/__init__.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- + +"""A command-line interface for *Kiara*. +""" +import asyncclick as click +import sys +import typing +from rich import print as rich_print + +from kiara import Kiara +from kiara.module import ModuleInfo +from kiara.pipeline.module import PipelineModuleInfo +from kiara.utils import module_config_from_cli_args +from kiara.workflow import KiaraWorkflow + +# from importlib.metadata import entry_points + + +# from asciinet import graph_to_ascii + + +try: + import uvloop + + uvloop.install() +except Exception: + pass + +click.anyio_backend = "asyncio" + + +@click.group() +@click.pass_context +def cli(ctx): + """Main cli entry-point, contains all the sub-commands.""" + + # test_pipelines_folder = os.path.abspath( + # os.path.join( + # os.path.dirname(__file__), + # "..", + # "..", + # "..", + # "..", + # "tests/resources/pipelines", + # ) + # ) + # test_pipeline_module_manager = PipelineModuleManager(test_pipelines_folder) + # Kiara.instance().add_module_manager(test_pipeline_module_manager) + + +@cli.group() +@click.pass_context +def module(ctx): + pass + + +@module.command(name="list") +@click.option( + "--only-pipeline-modules", "-p", is_flag=True, help="Only list pipeline modules." +) +@click.option( + "--only-core-modules", + "-c", + is_flag=True, + help="Only list core (aka 'Python') modules.", +) +@click.pass_context +def list_modules(ctx, only_pipeline_modules: bool, only_core_modules: bool): + """List available (Python) module types.""" + + if only_pipeline_modules and only_core_modules: + rich_print() + rich_print( + "Please provide either '--only-core-modules' or '--only-pipeline-modules', not both." + ) + sys.exit(1) + + if only_pipeline_modules: + m_list = Kiara.instance().available_pipeline_module_types + elif only_core_modules: + m_list = Kiara.instance().available_non_pipeline_module_types + else: + m_list = Kiara.instance().available_module_types + + for name in m_list: + rich_print(name) + + +@module.command(name="describe") +@click.argument("module_type", nargs=1, required=True) +@click.pass_context +def describe_module_type(ctx, module_type: str): + """Print details of a (PYthon) module.""" + + m_cls = Kiara.instance().get_module_class(module_type) + if module_type == "pipeline" or not m_cls.is_pipeline(): + info = ModuleInfo(module_type=module_type) + else: + info = PipelineModuleInfo(module_type=module_type) + rich_print() + rich_print(info) + + +@cli.group() +@click.pass_context +def pipeline(ctx): + """Pipeline-related sub-commands.""" + + +@pipeline.command() +@click.argument("pipeline_module_type", nargs=1) +@click.option( + "--full", + "-f", + is_flag=True, + help="Display full data-flow graph, incl. intermediate input/output connections.", +) +@click.pass_context +def data_flow_graph(ctx, pipeline_module_type: str, full: bool): + + m_cls = Kiara.instance().get_module_class(pipeline_module_type) + if not m_cls.is_pipeline(): + rich_print() + rich_print(f"Module '{pipeline_module_type}' is not a pipeline-type module.") + sys.exit(1) + + info = PipelineModuleInfo(module_type=pipeline_module_type) + + info.print_data_flow_graph(simplified=not full) + + +@pipeline.command() +@click.argument("pipeline_module_type", nargs=1) +@click.pass_context +def execution_graph(ctx, pipeline_module_type: str): + + m_cls = Kiara.instance().get_module_class(pipeline_module_type) + if not m_cls.is_pipeline(): + rich_print() + rich_print(f"Module '{pipeline_module_type}' is not a pipeline-type module.") + sys.exit(1) + + info = PipelineModuleInfo(module_type=pipeline_module_type) + info.print_execution_graph() + + +@cli.group() +@click.pass_context +def step(ctx): + """Display instantiated module details.""" + + +@step.command("describe") +@click.option("--module-type", "-t", nargs=1) +@click.option( + "--config", + "-c", + multiple=True, + required=False, + help="Configuration values for module initialization.", +) +@click.pass_context +def describe_step(ctx, module_type: str, config: typing.Iterable[typing.Any]): + + config = module_config_from_cli_args(*config) + + module_obj = Kiara.instance().create_module( + id=module_type, module_type=module_type, module_config=config + ) + rich_print() + rich_print(module_obj) + + +@cli.command() +@click.pass_context +def dev(ctx): + + # main_module = "kiara" + + # md_obj: ProjectMetadata = ProjectMetadata(project_main_module=main_module) + # + # md_json = json.dumps( + # md_obj.to_dict(), sort_keys=True, indent=2, separators=(",", ": ") + # ) + # print(md_json) + + # for entry_point_group, eps in entry_points().items(): + # print(entry_point_group) + # print(eps) + + # pc = get_data_from_file( + # "/home/markus/projects/dharpa/kiara/tests/resources/workflows/logic_1.json" + # ) + # wc = KiaraWorkflowConfig(module_config=pc) + + # kiara = Kiara.instance() + # print(kiara) + + # wf = KiaraWorkflow( + # "/home/markus/projects/dharpa/kiara/tests/resources/workflows/logic/logic_2.json" + # ) + + # wf = KiaraWorkflow( + # "/home/markus/projects/dharpa/kiara/tests/resources/workflows/dummy/dummy_1_delay.json" + # ) + + wf = KiaraWorkflow("xor") + + # pp(wf.pipeline.get_current_state().__dict__) + print(wf.pipeline.get_current_state().json()) + + # wf = KiaraWorkflow("logic_1") + # wf = KiaraWorkflow("and") + # import pp + # pp(wf._workflow_config.__dict__) + # print("XXXXXXXXXXX") + # print(wf.structure.data_flow_graph.nodes) + # print(graph_to_ascii(wf.structure.data_flow_graph)) + # pp(wf.__dict__) + + # cls = kiara.get_module_class("logic_1") + # print(cls) + + # m = cls(id="test") + # print(wf.input_names) + # print(wf.output_names) + + # wc = KiaraWorkflowConfig.from_file( + # "/home/markus/projects/dharpa/kiara/tests/resources/workflows/logic_2.json" + # ) + # # wc = KiaraWorkflowConfig(module_type="and") + # wf = KiaraWorkflow(workflow_config=wc) + # + # # print_ascii_graph(wf.structure.data_flow_graph_simple) + + # wf.inputs.and_1__a = True + # wf.inputs.and_1__b = True + # wf.inputs.and_2__b = True + # wf.inputs.and_1__a = True + wf.inputs.a = True + wf.inputs.b = False + + # print(wf.inputs) + # + # print(wf.state) + # + # print(wf.outputs.dict()) + + # print(wf.outputs.and_2__y.get_value()) + # print(Kiara().instance().data_registry.get_stats()) + + +if __name__ == "__main__": + cli() diff --git a/src/kiara/kiara.py b/src/kiara/kiara.py new file mode 100644 index 000000000..1a4c4a7c5 --- /dev/null +++ b/src/kiara/kiara.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- + +"""Main module.""" + +import logging +import typing + +from kiara.data.registry import DataRegistry +from kiara.mgmt import ModuleManager, PipelineModuleManager, PythonModuleManager + +if typing.TYPE_CHECKING: + from kiara.module import KiaraModule + +log = logging.getLogger("kiara") + + +class Kiara(object): + _instance = None + + @classmethod + def instance(cls): + if cls._instance is None: + cls._instance = Kiara() + return cls._instance + + def __init__( + self, module_managers: typing.Optional[typing.Iterable[ModuleManager]] = None + ): + + self._default_python_mgr = PythonModuleManager() + self._default_pipeline_mgr = PipelineModuleManager() + module_managers = [self._default_python_mgr, self._default_pipeline_mgr] + + self._module_mgrs: typing.List[ModuleManager] = [] + self._modules: typing.Dict[str, ModuleManager] = {} + + self._data_registry: DataRegistry = DataRegistry() + + for mm in module_managers: + self.add_module_manager(mm) + + def add_module_manager(self, module_manager: ModuleManager): + + for module_type in module_manager.get_module_types(): + if module_type in self._modules.keys(): + log.warning( + f"Duplicate module name '{module_type}'. Ignoring all but the first." + ) + self._modules[module_type] = module_manager + + self._module_mgrs.append(module_manager) + + @property + def data_registry(self) -> DataRegistry: + return self._data_registry + + def get_module_class(self, module_type: str) -> typing.Type["KiaraModule"]: + + mm = self._modules.get(module_type, None) + if mm is None: + raise Exception(f"No module '{module_type}' available.") + + cls = mm.get_module_class(module_type) + if hasattr(cls, "_module_type_id") and cls._module_type_id != module_type: # type: ignore + raise Exception( + f"Can't create module class '{cls}', it already has a _module_type_id attribute and it's different to the module name '{module_type}'." + ) + setattr(cls, "_module_type_id", module_type) + return cls + + @property + def available_module_types(self) -> typing.List[str]: + """Return the names of all available modules""" + return sorted(set(self._modules.keys())) + + @property + def available_non_pipeline_module_types(self) -> typing.List[str]: + """Return the names of all available pipeline-type modules.""" + + return [ + module_type + for module_type in self.available_module_types + if not self.get_module_class(module_type).is_pipeline() + ] + + @property + def available_pipeline_module_types(self) -> typing.List[str]: + """Return the names of all available pipeline-type modules.""" + + return [ + module_type + for module_type in self.available_module_types + if module_type != "pipeline" + and self.get_module_class(module_type).is_pipeline() + ] + + def is_pipeline_module(self, module_type: str): + + cls = self.get_module_class(module_type=module_type) + return cls.is_pipeline() + + def create_module( + self, + id: str, + module_type: str, + module_config: typing.Mapping[str, typing.Any], + parent_id: typing.Optional[str] = None, + ) -> "KiaraModule": + + mm = self._modules.get(module_type, None) + if mm is None: + raise Exception(f"No module '{module_type}' available.") + + _ = self.get_module_class( + module_type + ) # just to make sure the _module_type_id attribute is added + + return mm.create_module( + id=id, + parent_id=parent_id, + module_type=module_type, + module_config=module_config, + ) diff --git a/src/kiara/metadata/__init__.py b/src/kiara/metadata/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/kiara/mgmt.py b/src/kiara/mgmt.py new file mode 100644 index 000000000..2947ca9af --- /dev/null +++ b/src/kiara/mgmt.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +import abc +import logging +import os +import typing +from pathlib import Path + +from kiara.defaults import ( + DEFAULT_EXCLUDE_DIRS, + KIARA_RESOURCES_FOLDER, + MODULE_TYPE_NAME_KEY, + VALID_PIPELINE_FILE_EXTENSIONS, +) +from kiara.modules.pipelines import create_pipeline_class +from kiara.utils import find_kiara_modules, get_data_from_file + +if typing.TYPE_CHECKING: + from kiara.config import KiaraModuleConfig + from kiara.module import KiaraModule + from kiara.pipeline.module import PipelineModule + +log = logging.getLogger("kiara") + + +# extensions +# ------------------------------------------------------------------------ + + +class ModuleManager(abc.ABC): + @abc.abstractmethod + def get_module_types(self) -> typing.Iterable[str]: + pass + + @abc.abstractmethod + def get_module_class(self, module_type: str) -> typing.Type["KiaraModule"]: + pass + + def create_module_config( + self, module_type: str, module_config: typing.Mapping[str, typing.Any] + ) -> "KiaraModuleConfig": + + cls = self.get_module_class(module_type) + config = cls._config_cls(**module_config) + + return config + + def create_module( + self, + id: str, + module_type: str, + module_config: typing.Mapping[str, typing.Any] = None, + parent_id: typing.Optional[str] = None, + ) -> "KiaraModule": + + module_cls = self.get_module_class(module_type) + + module = module_cls(id=id, parent_id=parent_id, module_config=module_config) + return module + + +class PythonModuleManager(ModuleManager): + def __init__(self, **module_classes: typing.Type["KiaraModule"]): + + if not module_classes: + module_classes = find_kiara_modules() + + self._module_classes: typing.Mapping[ + str, typing.Type[KiaraModule] + ] = module_classes + + def get_module_class(self, module_type: str) -> typing.Type["KiaraModule"]: + + cls = self._module_classes.get(module_type, None) + if cls is None: + raise ValueError(f"No module of type '{module_type}' availble.") + return cls + + def get_module_types(self) -> typing.Iterable[str]: + return self._module_classes.keys() + + +class PipelineModuleManager(ModuleManager): + def __init__(self, *folders: typing.Union[str, Path]): + + if not folders: + folders = (os.path.join(KIARA_RESOURCES_FOLDER, "pipelines"),) + + self._pipeline_desc_folders: typing.List[Path] = [] + self._pipeline_descs: typing.Dict[str, typing.Mapping[str, typing.Any]] = {} + self._cached_classes: typing.Dict[str, typing.Type[PipelineModule]] = {} + + for folder in folders: + self.add_pipelines_folder(folder) + + def add_pipelines_folder(self, folder: typing.Union[str, Path]): + + if isinstance(folder, str): + folder = Path(os.path.expanduser(folder)) + if not folder.is_dir(): + raise Exception(f"Pipeline folder path not a directory: {folder}") + + files: typing.Dict[str, typing.Mapping[str, typing.Any]] = {} + for root, dirnames, filenames in os.walk(folder, topdown=True): + + dirnames[:] = [d for d in dirnames if d not in DEFAULT_EXCLUDE_DIRS] + + for filename in [ + f + for f in filenames + if os.path.isfile(os.path.join(root, f)) + and any(f.endswith(ext) for ext in VALID_PIPELINE_FILE_EXTENSIONS) + ]: + + try: + + path = os.path.join(root, filename) + data = get_data_from_file(path) + + if not data: + raise Exception("No content.") + if not isinstance(data, typing.Mapping): + raise Exception("Not a dictionary type.") + name = data.get(MODULE_TYPE_NAME_KEY, None) + if name is None: + name = filename.split(".", maxsplit=1)[0] + + if name in files.keys(): + raise Exception(f"Duplicate workflow name: {name}") + if name in self._pipeline_descs.keys(): + raise Exception(f"Duplicate workflow name: {name}") + files[name] = {"data": data, "source": path, "source_type": "file"} + except Exception as e: + log.warning(f"Ignoring invalid pipeline file '{path}': {e}") + + self._pipeline_descs.update(files) + + @property + def pipeline_descs(self) -> typing.Mapping[str, typing.Mapping[str, typing.Any]]: + return self._pipeline_descs + + def get_module_class(self, module_type: str) -> typing.Type["PipelineModule"]: + + if module_type in self._cached_classes.keys(): + return self._cached_classes[module_type] + + desc = self._pipeline_descs.get(module_type, None) + if desc is None: + raise Exception(f"No pipeline with name '{module_type}' available.") + + cls_name = "".join(x.capitalize() or "_" for x in module_type.split("_")) + cls = create_pipeline_class(cls_name, desc["data"]) + + self._cached_classes[module_type] = cls + return self._cached_classes[module_type] + + def get_module_types(self) -> typing.Iterable[str]: + return self._pipeline_descs.keys() + + +class WorkflowManager(object): + def __init__(self, module_manager: PythonModuleManager): + + self._module_mgr: PythonModuleManager = module_manager + + def create_workflow( + self, + workflow_id: str, + config: typing.Union[str, typing.Mapping[str, typing.Any]], + ): + + if isinstance(config, typing.Mapping): + raise NotImplementedError() diff --git a/src/kiara/module.py b/src/kiara/module.py new file mode 100644 index 000000000..ecc1e837c --- /dev/null +++ b/src/kiara/module.py @@ -0,0 +1,375 @@ +# -*- coding: utf-8 -*- +import inspect +import textwrap +import typing +from abc import abstractmethod +from pydantic import BaseModel, Extra, Field, root_validator +from rich import box +from rich.console import Console, ConsoleOptions, RenderResult +from rich.syntax import Syntax +from rich.table import Table + +from kiara import Kiara +from kiara.config import KIARA_CONFIG, KiaraModuleConfig +from kiara.data.values import ValueSchema, ValueSet +from kiara.utils import ( + StringYAML, + create_table_from_config_class, + get_doc_for_module_class, +) + +yaml = StringYAML() + + +class StepInputs(object): + """Wrapper class to hold a set of inputs for a pipeline processing step. + + This is necessary because we can't assume the processing will be done on the same machine (or in the same process) + as the pipeline controller. By disconnecting the value from the processing code, we can react appropriately to + those circumstances. + + Arguments: + inputs (ValueSet): the input values of a pipeline step + """ + + def __init__(self, inputs: ValueSet): + self._inputs: ValueSet = inputs + + def __getattr__(self, key): + + if key == "_inputs": + raise KeyError() + elif key in self.__dict__["_inputs"].keys(): + return self.__dict__["_inputs"][key].get_value_data() + else: + return super().__getattribute__(key) + + +class StepOutputs(object): + """Wrapper class to hold a set of outputs for a pipeline processing step. + + This is necessary because we can't assume the processing will be done on the same machine (or in the same process) + as the pipeline controller. By disconnecting the value from the processing code, we can react appropriately to + those circumstances. + + Arguments: + outputs (ValueSet): the output values of a pipeline step + """ + + def __init__(self, outputs: ValueSet): + super().__setattr__("_outputs", outputs) + + def __getattr__(self, key): + + if key == "_outputs": + raise KeyError() + elif key in self.__dict__["_outputs"].keys(): + return self.__dict__["_outputs"][key].get_value_data() + else: + return super().__getattribute__(key) + + def __setattr__(self, key, value): + + self.set_values(**{key: value}) + + def set_values(self, **values: typing.Any): + + wrong = [] + for key in values.keys(): + if key not in self._outputs.keys(): + wrong.append(key) + + if wrong: + av = ", ".join(self._outputs.keys()) + raise Exception( + f"Can't set output value(s), invalid key name(s): {', '.join(wrong)}. Available: {av}" + ) + + self._outputs.update(values) + + +class KiaraModule(typing.Generic[KIARA_CONFIG]): + """The base class that every custom module in *Kiara* needs to inherit from. + + The core of every ``KiaraModule`` is the [``process``][kiara.module.KiaraModule.process] method, which needs to be + a pure, (ideally, but not strictly) idempotent function that creates one or several output values from the given + input(s). + + Examples: + + A simple example would be an 'addition' module, with ``a`` and ``b`` configured as inputs, and ``z`` as the output field name. + + An implementing class would look something like this: + + TODO + + Arguments: + id (str): the id for this module (needs to be unique within a pipeline) + parent_id (typing.Optional[str]): the id of the parent, in case this module is part of a pipeline + module_config (typing.Any): the configuation for this module + meta (typing.Mapping[str, typing.Any]): metadata for this module (not implemented yet) + """ + + # TODO: not quite sure about this generic type here, mypy doesn't seem to like it + _config_cls: typing.Type[KIARA_CONFIG] = KiaraModuleConfig # type: ignore + + @classmethod + def is_pipeline(cls) -> bool: + return False + + def __init__( + self, + id: str, + parent_id: typing.Optional[str] = None, + module_config: typing.Union[ + None, KIARA_CONFIG, typing.Mapping[str, typing.Any] + ] = None, + meta: typing.Mapping[str, typing.Any] = None, + ): + + self._id: str = id + self._parent_id = parent_id + + if isinstance(module_config, KiaraModuleConfig): + self._config: KIARA_CONFIG = module_config # type: ignore + elif module_config is None: + self._config = self.__class__._config_cls() + elif isinstance(module_config, typing.Mapping): + self._config = self.__class__._config_cls(**module_config) + else: + raise TypeError(f"Invalid type for module config: {type(module_config)}") + + if meta is None: + meta = {} + self._meta = meta + + self._input_schemas: typing.Mapping[str, ValueSchema] = None # type: ignore + self._output_schemas: typing.Mapping[str, ValueSchema] = None # type: ignore + + @property + def id(self) -> str: + """The id of this module. + + This is only unique within a pipeline. + """ + return self._id + + @property + def parent_id(self) -> typing.Optional[str]: + """The id of the parent of this module (if part of a pipeline).""" + return self._parent_id + + @property + def full_id(self) -> str: + """The full id for this module.""" + + if self.parent_id: + return f"{self.parent_id}.{self.id}" + else: + return self.id + + @property + def config(self) -> KIARA_CONFIG: + """Retrieve the configuration object for this module. + + Returns: + the module-class-specific config object + """ + return self._config + + def get_config_value(self, key: str) -> typing.Any: + """Retrieve the value for a specific configuration option. + + Arguments: + key: the config key + + Returns: + the value for the provided key + """ + + return self.config.get(key) + + @abstractmethod + def create_input_schema(self) -> typing.Mapping[str, ValueSchema]: + """Abstract method to implement by child classes, returns a description of the input schema of this module.""" + + @abstractmethod + def create_output_schema(self) -> typing.Mapping[str, ValueSchema]: + """Abstract method to implement by child classes, returns a description of the output schema of this module.""" + + @property + def input_schemas(self) -> typing.Mapping[str, ValueSchema]: + """The input schema for this module.""" + + if self._input_schemas is None: + self._input_schemas = self.create_input_schema() + if not self._input_schemas: + raise Exception( + f"Invalid module implementation for '{self.__class__.__name__}': empty input schema" + ) + return self._input_schemas + + @property + def output_schemas(self) -> typing.Mapping[str, ValueSchema]: + """The output schema for this module.""" + + if self._output_schemas is None: + self._output_schemas = self.create_output_schema() + if not self._output_schemas: + raise Exception( + f"Invalid module implementation for '{self.__class__.__name__}': empty output schema" + ) + return self._output_schemas + + @property + def input_names(self) -> typing.Iterable[str]: + """A list of input field names for this module.""" + return self.input_schemas.keys() + + @property + def output_names(self) -> typing.Iterable[str]: + """A list of output field names for this module.""" + return self.output_schemas.keys() + + def process_step(self, inputs: ValueSet, outputs: ValueSet) -> None: + """Kick off processing for a specific set of input/outputs. + + This method calls the implemented [process][kiara.module.KiaraModule.process] method of the inheriting class, + as well as wrapping input/output-data related functionality. + + Arguments: + inputs: the input value set + outputs: the output value set + """ + + input_wrap: StepInputs = StepInputs(inputs=inputs) + output_wrap: StepOutputs = StepOutputs(outputs=outputs) + + self.process(inputs=input_wrap, outputs=output_wrap) + + @abstractmethod + def process(self, inputs: StepInputs, outputs: StepOutputs) -> None: + """Abstract method to implement by child classes, should be a pure, idempotent function that uses the values from ``inputs``, and stores results in the provided ``outputs`` object. + + Arguments: + inputs: the input value set + outputs: the output value set + """ + + def __eq__(self, other): + if self.__class__ != other.__class__: + return False + return (self.full_id, self.config) == (self.full_id, other.config) + + def __hash__(self): + return hash((self.__class__, self.full_id, self.config)) + + def __repr__(self): + return f"{self.__class__.__name__}(input_names={list(self.input_names)} output_names={list(self.output_names)})" + + def __rich_console__( + self, console: Console, options: ConsoleOptions + ) -> RenderResult: + + if not hasattr(self.__class__, "_module_type_id"): + raise Exception( + "Invalid model class, no '_module_type_id' attribute added. This is a bug" + ) + + data = { + # "module id": self.full_id, + "module type": self.__class__._module_type_id, # type: ignore + "module_config": self.config.dict(), + "inputs": {}, + "outputs": {}, + } + + for field_name, schema in self.input_schemas.items(): + d = "-- no default --" if schema.default is None else str(schema.default) + data["inputs"][field_name] = { + "type": schema.type, + "doc": schema.doc, + "default": d, + } + for field_name, schema in self.output_schemas.items(): + data["outputs"][field_name] = {"type": schema.type, "doc": schema.doc} + + yaml_str = yaml.dump(data) + yield Syntax(yaml_str, "yaml", background_color="default") + + +class ModuleInfo(BaseModel): + """A simple model class to hold and display information about a module. + + This is not used in processing at all, it is really only there to make it easier to communicate module characteristics.. + """ + + class Config: + extra = Extra.forbid + allow_mutation = False + + module_type: str = Field(description="The name the module is registered under.") + module_cls: typing.Type[KiaraModule] = Field(description="The module to describe.") + doc: str = Field(description="The documentation of the module.") + process_doc: str = Field( + description="In-depth documentation of the processing step of this module.", + default="-- n/a --", + ) + process_src: str = Field( + description="The source code of the processing method of this module." + ) + config_cls: typing.Type[KiaraModuleConfig] = Field( + description="The configuration class for this module." + ) + + @root_validator(pre=True) + def ensure_type(cls, values): + + module_type = values.pop("module_type", None) + assert module_type is not None + + if values: + raise ValueError( + f"Only 'module_type' allowed in constructor, not: {values.keys()}" + ) + + module_cls = Kiara.instance().get_module_class(module_type) + values["module_type"] = module_type + values["module_cls"] = module_cls + + doc = get_doc_for_module_class(module_cls) + + values["doc"] = doc + proc_doc = module_cls.process.__doc__ + if not proc_doc: + proc_doc = "-- n/a --" + else: + proc_doc = inspect.cleandoc(proc_doc) + values["process_doc"] = proc_doc + + proc_src = inspect.getsource(module_cls.process) + values["process_src"] = textwrap.dedent(proc_src) + values["config_cls"] = module_cls._config_cls + + return values + + def __rich_console__( + self, console: Console, options: ConsoleOptions + ) -> RenderResult: + yield f"[i]Module[/i]: [b]{self.module_type}[/b]" + my_table = Table(box=box.SIMPLE, show_lines=True, show_header=False) + my_table.add_column("Property", style="i") + my_table.add_column("Value") + my_table.add_row( + "class", f"{self.module_cls.__module__}.{self.module_cls.__qualname__}" + ) + my_table.add_row("doc", self.doc) + my_table.add_row( + "config class", + f"{self.config_cls.__module__}.{self.config_cls.__qualname__}", + ) + my_table.add_row("config", create_table_from_config_class(self.config_cls)) + syn_src = Syntax(self.process_src, "python") + my_table.add_row("src", syn_src) + + yield my_table diff --git a/src/kiara/modules/__init__.py b/src/kiara/modules/__init__.py new file mode 100644 index 000000000..8c2c7f883 --- /dev/null +++ b/src/kiara/modules/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +"""Base module under which the 'official' [KiaraModule][kiara.module.KiaraModule] implementations live.""" diff --git a/src/kiara/modules/dev.py b/src/kiara/modules/dev.py new file mode 100644 index 000000000..fd2bec526 --- /dev/null +++ b/src/kiara/modules/dev.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +"""Modules that are useful for kiara as well as pipeline-development, as well as testing.""" + +import time +import typing +from pydantic import Field + +from kiara import KiaraModule +from kiara.config import KiaraModuleConfig +from kiara.data.values import ValueSchema +from kiara.module import StepInputs, StepOutputs + + +class DummyProcessingModuleConfig(KiaraModuleConfig): + """Configuration for the 'dummy' processing module.""" + + doc: typing.Optional[str] = None + + input_schema: typing.Mapping[str, typing.Mapping] = Field( + description="The input schema for this module." + ) + output_schema: typing.Mapping[str, typing.Mapping] = Field( + description="The output schema for this module." + ) + outputs: typing.Mapping[str, typing.Any] = Field( + description="The (dummy) output for this module.", default_factory=dict + ) + delay: float = Field( + description="The delay in seconds from processing start to when the (dummy) outputs are returned.", + default=0, + ) + + +class DummyModule(KiaraModule): + """Module that simulates processing, but uses hard-coded outputs as a result.""" + + _config_cls = DummyProcessingModuleConfig + + def create_input_schema(self) -> typing.Mapping[str, ValueSchema]: + """The input schema for the ``dummy`` module is created at object creation time from the ``input_schemas`` config parameter.""" + + result = {} + for k, v in self.config.get("input_schema").items(): # type: ignore + result[k] = ValueSchema(**v) + return result + + def create_output_schema(self) -> typing.Mapping[str, ValueSchema]: + """The output schema for the ``dummy`` module is created at object creation time from the ``output_schemas`` config parameter.""" + + result = {} + for k, v in self.config.get("output_schema").items(): # type: ignore + result[k] = ValueSchema(**v) + return result + + def process(self, inputs: StepInputs, outputs: StepOutputs) -> None: + """Returns the hardcoded output values that are set in the ``outputs`` config field. + + Optionally, this module can simulate processing by waiting a configured amount of time (seconds -- specified in the ``delay`` config parameter). + """ + + time.sleep(self.config.get("delay")) # type: ignore + + output_values: typing.Mapping = self.config.get("outputs") # type: ignore + + value_dict = {} + for output_name in self.output_names: + if output_name not in output_values.keys(): + v = self.output_schemas[output_name].type_obj.fake_value() + value_dict[output_name] = v + else: + value_dict[output_name] = output_values[output_name] + outputs.set_values(**value_dict) + + # def _get_doc(self) -> str: + # + # doc = self.config.get("doc", None) + # + # if doc: + # return self.config["doc"] + # else: + # return super()._get_doc() diff --git a/src/kiara/modules/logic_gates.py b/src/kiara/modules/logic_gates.py new file mode 100644 index 000000000..e3128399a --- /dev/null +++ b/src/kiara/modules/logic_gates.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +import time +import typing +from pydantic import Field + +from kiara.config import KiaraModuleConfig +from kiara.data.values import ValueSchema, ValueType +from kiara.module import KiaraModule, StepInputs, StepOutputs + + +class LogicProcessingModuleConfig(KiaraModuleConfig): + """Config class for all the 'logic'-related modules.""" + + delay: float = Field( + default=0, + description="the delay in seconds from processing start to when the output is returned.", + ) + + +class LogicProcessingModule(KiaraModule): + + _config_cls = LogicProcessingModuleConfig + + +class NotModule(LogicProcessingModule): + """Negates the input.""" + + def create_input_schema(self) -> typing.Mapping[str, ValueSchema]: + """The not module only has one input, a boolean that will be negated by the module.""" + + return { + "a": ValueSchema( + type=ValueType.boolean, doc="A boolean describing this input state." + ), + } + + def create_output_schema(self) -> typing.Mapping[str, ValueSchema]: + """The output of this module is a single boolean, the negated input.""" + + return { + "y": ValueSchema( + type=ValueType.boolean, + doc="A boolean describing the module output state.", + ) + } + + def process(self, inputs: StepInputs, outputs: StepOutputs) -> None: + """Negates the input boolean.""" + + time.sleep(self.config.get("delay")) # type: ignore + + outputs.y = not inputs.a + + +class AndModule(LogicProcessingModule): + """Returns 'True' if both inputs are 'True'.""" + + def create_input_schema(self) -> typing.Mapping[str, ValueSchema]: + + return { + "a": ValueSchema( + type=ValueType.boolean, doc="A boolean describing this input state." + ), + "b": ValueSchema( + type=ValueType.boolean, doc="A boolean describing this input state." + ), + } + + def create_output_schema(self) -> typing.Mapping[str, ValueSchema]: + + return { + "y": ValueSchema( + type=ValueType.boolean, + doc="A boolean describing the module output state.", + ) + } + + def process(self, inputs: StepInputs, outputs: StepOutputs) -> None: + + time.sleep(self.config.delay) # type: ignore + + outputs.y = inputs.a and inputs.b + + +class OrModule(LogicProcessingModule): + """Returns 'True' if one of the inputs is 'True'.""" + + def create_input_schema(self) -> typing.Mapping[str, ValueSchema]: + + return { + "a": ValueSchema( + type=ValueType.boolean, doc="A boolean describing this input state." + ), + "b": ValueSchema( + type=ValueType.boolean, doc="A boolean describing this input state." + ), + } + + def create_output_schema(self) -> typing.Mapping[str, ValueSchema]: + + return { + "y": ValueSchema( + type=ValueType.boolean, + doc="A boolean describing the module output state.", + ) + } + + def process(self, inputs: StepInputs, outputs: StepOutputs) -> None: + + time.sleep(self.config.get("delay")) # type: ignore + outputs.y = inputs.a or inputs.b diff --git a/src/kiara/modules/pipelines/__init__.py b/src/kiara/modules/pipelines/__init__.py new file mode 100644 index 000000000..b98c98222 --- /dev/null +++ b/src/kiara/modules/pipelines/__init__.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +"""Base module that holds [PipelineModule][kiara.pipeline.module.PipelineModule] classes that are auto-generated +from pipeline descriptions in the ``resources/pipelines`` folder.""" + +import typing + +# TODO: add classloader for those classes to runtime + + +def create_pipeline_class( + cls_name: str, pipeline_desc: typing.Mapping[str, typing.Any] +): + + from kiara.config import PipelineModuleConfig + from kiara.pipeline.module import PipelineModule + + pmc = PipelineModuleConfig(**pipeline_desc) + + def init(self, id: str, **kwargs): + # TODO: merge config + if kwargs.get("module_config", None): + raise Exception( + f"Can't dynamically create PipelineModuleClass, 'module_config' provided externally: {pipeline_desc}" + ) + kwargs["module_config"] = pipeline_desc + super(self.__class__, self).__init__(id=id, **kwargs) + + attrs = { + "__init__": init, + "_config_cls": PipelineModuleConfig, + "_base_pipeline_config": pmc, + } + # TODO: add pydoc + + cls = type(cls_name, (PipelineModule,), attrs) + return cls diff --git a/src/kiara/pipeline/__init__.py b/src/kiara/pipeline/__init__.py new file mode 100644 index 000000000..035d4ef78 --- /dev/null +++ b/src/kiara/pipeline/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +from .module import PipelineModule # noqa +from .pipeline import Pipeline # noqa +from .structure import PipelineStructure # noqa diff --git a/src/kiara/pipeline/controller.py b/src/kiara/pipeline/controller.py new file mode 100644 index 000000000..200639ddc --- /dev/null +++ b/src/kiara/pipeline/controller.py @@ -0,0 +1,309 @@ +# -*- coding: utf-8 -*- +import abc +import logging +import typing + +from kiara.data import Value, ValueSet +from kiara.pipeline.structure import PipelineStep + +if typing.TYPE_CHECKING: + from kiara.events import ( + PipelineInputEvent, + PipelineOutputEvent, + StepInputEvent, + StepOutputEvent, + ) + from kiara.pipeline.pipeline import Pipeline, PipelineState + +log = logging.getLogger("kiara") + + +class PipelineController(abc.ABC): + """An object that controls how a [Pipeline][kiara.pipeline.pipeline.Pipeline] should react to events related to it's inputs/outputs. + + This is the base for the central controller class that needs to be implemented by a *Kiara* frontend. The default implementation + that is used if no ``PipelineController`` is provided in a [Pipeline][kiara.pipeline.pipeline.Pipeline] constructor + is the [BatchController][kiara.pipeline.controller.BatchController], which basically waits until all required inputs are + set, and then processes all pipeline steps in one go (in the right order). + + The pipeline object to control can be set either in the constructor, or via the ``set_pipeline`` method. But only once, + every subsequent attempt to set a pipeline will raise an Exception. + + If you want to implement your own controller, you have to override at least one of the (empty) event hook methods: + + - [``pipeline_inputs_changed``][kiara.pipeline.controller.PipelineController.pipeline_inputs_changed] + - [``pipeline_outputs_changed``][kiara.pipeline.controller.PipelineController.pipeline_outputs_changed] + - [``step_inputs_changed``][kiara.pipeline.controller.PipelineController.step_inputs_changed] + - [``step_outputs_changed``][kiara.pipeline.controller.PipelineController.step_outputs_changed] + + Arguments: + pipeline (Pipeline): the pipeline object to control + + """ + + def __init__(self, pipeline: typing.Optional["Pipeline"] = None): + self._pipeline: typing.Optional[Pipeline] = None + self._running_steps: typing.Mapping[str, str] = {} + """A map of all currently running steps, and their job id.""" + + if pipeline is not None: + self.set_pipeline(pipeline) + + @property + def pipeline(self) -> "Pipeline": + """Return the pipeline this controller, well, ...controls...""" + + if self._pipeline is None: + raise Exception("Pipeline not set yet.") + return self._pipeline + + def set_pipeline(self, pipeline: "Pipeline"): + """Set the pipeline object for this controller. + + Only one pipeline can be set, once. + + Arguments: + pipeline: the pipeline object + """ + if self._pipeline is not None: + raise Exception("Pipeline already set.") + self._pipeline = pipeline + + @property + def processing_stages(self) -> typing.List[typing.List[str]]: + """Return the processing stage order of the pipeline. + + Returns: + a list of lists of step ids + """ + + return self.pipeline.structure.processing_stages + + def get_step(self, step_id: str) -> PipelineStep: + """Return the step object for the provided id. + + Arguments: + step_id: the step id + Returns: + the step object + """ + + return self.pipeline.get_step(step_id) + + def get_step_inputs(self, step_id: str) -> ValueSet: + """Return the inputs object for the pipeline.""" + + return self.pipeline.get_step_inputs(step_id) + + def get_step_outputs(self, step_id: str) -> ValueSet: + """Return the outputs object for the pipeline.""" + + return self.pipeline.get_step_outputs(step_id) + + def get_step_input(self, step_id: str, input_name: str) -> Value: + """Get the (current) input value for a specified step and input field name.""" + + item = self.get_step_inputs(step_id).get(input_name) + assert item is not None + return item + + def get_step_output(self, step_id: str, output_name: str) -> Value: + """Get the (current) output value for a specified step and output field name.""" + + item = self.get_step_outputs(step_id).get(output_name) + assert item is not None + return item + + def get_current_pipeline_state(self) -> "PipelineState": + """Return a description of the current pipeline state. + + This methods creates a new [PipelineState][kiara.pipeline.pipeline.PipelineState] object when called, containing + the pipeline structure as well as metadata about pipeline as well as step inputs and outputs. + + Returns: + an object outlining the current pipeline state + """ + + return self.pipeline.get_current_state() + + @property + def pipeline_inputs(self) -> ValueSet: + """Return the inputs object for this pipeline.""" + + return self.pipeline._pipeline_inputs + + @pipeline_inputs.setter + def pipeline_inputs(self, inputs: typing.Mapping[str, typing.Any]) -> None: + """Set one, several or all inputs for this pipeline.""" + + self.set_pipeline_inputs(**inputs) + + @property + def pipeline_outputs(self) -> ValueSet: + """Return the (current) pipeline outputs object for this pipeline.""" + + return self.pipeline._pipeline_outputs + + def process_step(self, step_id: str): + """Kick off processing for the step with the provided id. + + Arguments: + step_id: the id of the step that should be started + """ + + step_inputs = self.get_step_inputs(step_id) + + # if the inputs are not valid, ignore this step + if not step_inputs.items_are_valid: + raise Exception( + f"Can't execute step '{step_id}': it does not have valid input set." + ) + + # get the output 'holder' objects, which we'll need to pass to the module + step_outputs = self.get_step_outputs(step_id) + # get the module object that holds the code that will do the processing + step = self.get_step(step_id) + + # finally, kick off processing + # print('========') + # print(step_inputs) + # print('-') + # print(step_outputs) + step.module.process_step(inputs=step_inputs, outputs=step_outputs) + + def step_is_ready(self, step_id: str) -> bool: + """Return whether the step with the provided id is ready to be processed. + + A ``True`` result means that all input fields are currently set with valid values. + + Arguments: + step_id: the id of the step to check + + Returns: + whether the step is ready (``True``) or not (``False``) + """ + return self.get_step_inputs(step_id).items_are_valid + + def step_is_valid(self, step_id: str) -> bool: + """Return whether the step with the provided id has been processed successfully. + + A ``True`` result means that all output fields are currently set with valid values, and the inputs haven't changed + since the last time processing was done. + + Arguments: + step_id: the id of the step to check + + Returns: + whether the step result is valid (``True``) or not (``False``) + """ + + return self.get_step_outputs(step_id).items_are_valid + + def pipeline_is_ready(self) -> bool: + """Return whether the pipeline is ready to be processed. + + A ``True`` result means that all pipeline inputs are set with valid values, and therefore every step within the + pipeline can be processed. + + Returns: + whether the pipeline can be processed as a whole (``True``) or not (``False``) + """ + return self.pipeline.inputs.items_are_valid + + def pipeline_is_valid(self) -> bool: + """Return whether the pipeline has been processed successfully. + + A ``True`` result means that every step of the pipeline has been processed successfully, and no pipeline input + has changed since that happened. + + Returns: + whether the pipeline was processed successfully (``True``) or not (``False``) + """ + return self.pipeline.outputs.items_are_valid + + def set_pipeline_inputs(self, **inputs: typing.Any): + """Set one, several or all inputs for this pipeline. + + Arguments: + **inputs: the input values to set + """ + + _inputs = self._pipeline_input_hook(**inputs) + self.pipeline_inputs.set_values(**_inputs) + + def _pipeline_input_hook(self, **inputs: typing.Any): + """Hook before setting input. + + Can be implemented by child controller classes, to prevent, transform, validate or queue inputs. + """ + + log.debug(f"Inputs for pipeline '{self.pipeline.id}' set: {inputs}") + return inputs + + def step_inputs_changed(self, event: "StepInputEvent"): + """Method to override if the implementing controller needs to react to events where one or several step inputs have changed. + + Arguments: + event: the step input event + """ + + def step_outputs_changed(self, event: "StepOutputEvent"): + """Method to override if the implementing controller needs to react to events where one or several step outputs have changed. + + Arguments: + event: the step output event + """ + + def pipeline_inputs_changed(self, event: "PipelineInputEvent"): + """Method to override if the implementing controller needs to react to events where one or several pipeline inputs have changed. + + !!! note + Whenever pipeline inputs change, the connected step inputs also change and an (extra) event will be fired for those. Which means + you can choose to only implement the ``step_inputs_changed`` method if you want to. This behaviour might change in the future. + + Arguments: + event: the pipeline input event + """ + + def pipeline_outputs_changed(self, event: "PipelineOutputEvent"): + """Method to override if the implementing controller needs to react to events where one or several pipeline outputs have changed. + + Arguments: + event: the pipeline output event + """ + + +class BatchController(PipelineController): + """A [PipelineController][kiara.pipeline.controller.PipelineController] that executes all pipeline steps non-interactively. + + This is the default implementation of a ``PipelineController``, and probably the most simple implementation of one. + It waits until all inputs are set, after which it executes all pipeline steps in the required order. + """ + + def __init__(self, pipeline: typing.Optional["Pipeline"] = None): + + self._is_running: bool = False + super().__init__(pipeline=pipeline) + + def step_inputs_changed(self, event: "StepInputEvent"): + + if self._is_running: + log.debug("Pipeline running, doing nothing.") + return + + if not self.pipeline_is_ready(): + log.debug(f"Pipeline not ready after input event: {event}") + return + + self._is_running = True + + for stage in self.processing_stages: + + for step_id in stage: + self.process_step(step_id) + + def pipeline_outputs_changed(self, event: "PipelineOutputEvent"): + + if self.pipeline_is_valid(): + # TODO: check if soemthing is running + self._is_running = False diff --git a/src/kiara/pipeline/module.py b/src/kiara/pipeline/module.py new file mode 100644 index 000000000..67551cc3b --- /dev/null +++ b/src/kiara/pipeline/module.py @@ -0,0 +1,187 @@ +# -*- coding: utf-8 -*- +import typing +from pydantic import Extra +from rich import box +from rich.console import Console, ConsoleOptions, RenderResult +from rich.syntax import Syntax +from rich.table import Table + +from kiara import Kiara +from kiara.config import PipelineModuleConfig +from kiara.data.values import ValueField, ValueSchema +from kiara.module import KiaraModule, ModuleInfo, StepInputs, StepOutputs +from kiara.pipeline.structure import PipelineStructure +from kiara.utils import ( + StringYAML, + create_table_from_config_class, + get_doc_for_module_class, + print_ascii_graph, +) + +yaml = StringYAML() + + +class PipelineModule(KiaraModule[PipelineModuleConfig]): + """A [KiaraModule][kiara.module.KiaraModule] that contains a collection of interconnected other modules.""" + + _config_cls: typing.Type[PipelineModuleConfig] = PipelineModuleConfig # type: ignore + + @classmethod + def is_pipeline(cls) -> bool: + return True + + def __init__( + self, + id: str, + parent_id: typing.Optional[str] = None, + module_config: typing.Union[ + None, PipelineModuleConfig, typing.Mapping[str, typing.Any] + ] = None, + meta: typing.Optional[typing.Mapping[str, typing.Any]] = None, + ): + + self._pipeline_structure: typing.Optional[PipelineStructure] = None + super().__init__( + id=id, parent_id=parent_id, module_config=module_config, meta=meta + ) + + @property + def structure(self) -> PipelineStructure: + """The ``PipelineStructure`` of this module.""" + + if self._pipeline_structure is None: + self._pipeline_structure = PipelineStructure( + parent_id=self.full_id, + steps=self._config.steps, + input_aliases=self._config.input_aliases, + output_aliases=self._config.output_aliases, + ) + return self._pipeline_structure + + def create_input_schema(self) -> typing.Mapping[str, ValueSchema]: + return self.structure.pipeline_input_schema + + def create_output_schema(self) -> typing.Mapping[str, ValueSchema]: + return self.structure.pipeline_output_schema + + def process(self, inputs: StepInputs, outputs: StepOutputs) -> None: + + from kiara import Pipeline + + pipeline = Pipeline(structure=self.structure) + inps = inputs._inputs + pipeline.inputs.set_values(**inps.dict()) + + outputs.set_values(**pipeline.outputs.dict()) + + +class PipelineModuleInfo(ModuleInfo): + class Config: + extra = Extra.forbid + allow_mutation = False + + def create_structure(self) -> "PipelineStructure": + base_conf: PipelineModuleConfig = self.module_cls._base_pipeline_config # type: ignore + return base_conf.create_structure(parent_id=self.module_type) + + def print_data_flow_graph(self, simplified: bool = True) -> None: + + structure = self.create_structure() + + if simplified: + graph = structure.data_flow_graph_simple + else: + graph = structure.data_flow_graph + + print_ascii_graph(graph) + + def print_execution_graph(self) -> None: + + structure = self.create_structure() + print_ascii_graph(structure.execution_graph) + + def __rich_console__( + self, console: Console, options: ConsoleOptions + ) -> RenderResult: + + yield f"[i]PipelineModule[/i]: [b]{self.module_type}[/b]" + my_table = Table(box=box.SIMPLE, show_lines=True, show_header=False) + my_table.add_column("Property", style="i") + my_table.add_column("Value") + my_table.add_row( + "class", f"{self.module_cls.__module__}.{self.module_cls.__qualname__}" + ) + my_table.add_row("doc", self.doc) + my_table.add_row( + "config class", + f"{self.config_cls.__module__}.{self.config_cls.__qualname__}", + ) + my_table.add_row( + "config", + create_table_from_config_class( + self.config_cls, remove_pipeline_config=True + ), + ) + + structure = self.create_structure() + + p_inputs = {} + for input_name, schema in structure.pipeline_input_schema.items(): + p_inputs[input_name] = {"type": schema.type, "doc": schema.doc} + inputs_str = yaml.dump(p_inputs) + _inputs_txt = Syntax(inputs_str, "yaml", background_color="default") + my_table.add_row("pipeline inputs", _inputs_txt) + + outputs = {} + for output_name, schema in structure.pipeline_output_schema.items(): + outputs[output_name] = {"type": schema.type, "doc": schema.doc} + outputs_str = yaml.dump(outputs) + _outputs_txt = Syntax(outputs_str, "yaml", background_color="default") + my_table.add_row("pipeline outputs", _outputs_txt) + + stages: typing.Dict[str, typing.Dict[str, typing.Any]] = {} + for nr, stage in enumerate(structure.processing_stages): + for s_id in stage: + step = structure.get_step(s_id) + mc = Kiara.instance().get_module_class(step.module_type) + desc = get_doc_for_module_class(mc) + inputs: typing.Dict[ValueField, typing.List[str]] = {} + for inp in structure.steps_inputs.values(): + if inp.step_id != s_id: + continue + if inp.connected_outputs: + for co in inp.connected_outputs: + inputs.setdefault(inp, []).append(co.alias) + else: + inputs.setdefault(inp, []).append( + f"__pipeline__.{inp.connected_pipeline_input}" + ) + + inp_str = [] + for k, v in inputs.items(): + s = f"{k.value_name} ← {', '.join(v)}" + inp_str.append(s) + + outp_str = [] + for outp in structure.steps_outputs.values(): + if outp.step_id != s_id: + continue + if outp.pipeline_output: + outp_str.append( + f"{outp.value_name} → __pipeline__.{outp.pipeline_output}" + ) + else: + outp_str.append(outp.value_name) + + stages.setdefault(f"stage {nr}", {})[s_id] = { + "module": step.module_type, + "desc": desc, + "inputs": inp_str, + "outputs": outp_str, + } + + stages_str = yaml.dump(stages) + _stages_txt = Syntax(stages_str, "yaml", background_color="default") + my_table.add_row("processing stages", _stages_txt) + + yield my_table diff --git a/src/kiara/pipeline/pipeline.py b/src/kiara/pipeline/pipeline.py new file mode 100644 index 000000000..25970c02e --- /dev/null +++ b/src/kiara/pipeline/pipeline.py @@ -0,0 +1,376 @@ +# -*- coding: utf-8 -*- +import logging +import typing +import uuid +from enum import Enum +from pydantic import BaseModel, Field + +from kiara.data.registry import DataRegistry +from kiara.data.values import ( + PipelineInputField, + PipelineOutputField, + PipelineValues, + StepInputField, + StepOutputField, + Value, + ValueSet, +) +from kiara.events import ( + PipelineInputEvent, + PipelineOutputEvent, + StepInputEvent, + StepOutputEvent, +) +from kiara.kiara import Kiara +from kiara.pipeline.controller import BatchController, PipelineController +from kiara.pipeline.structure import ( + PipelineStep, + PipelineStructure, + PipelineStructureDesc, +) + +log = logging.getLogger("kiara") + + +class StepStatus(Enum): + """Enum to describe the state of a workflow.""" + + STALE = "stale" + INPUTS_READY = "inputs_ready" + RESULTS_INCOMING = "processing" + RESULTS_READY = "results_ready" + + +class Pipeline(object): + """An instance of a [PipelineStructure][kiara.pipeline.structure.PipelineStructure] that holds state for all of the inputs/outputs of the steps within.""" + + def __init__( + self, + structure: PipelineStructure, + constants: typing.Optional[typing.Mapping[str, typing.Any]] = None, + controller: typing.Optional[PipelineController] = None, + data_registry: typing.Optional[DataRegistry] = None, + ): + + self._id: str = str(uuid.uuid4()) + self._structure: PipelineStructure = structure + + self._pipeline_inputs: ValueSet = None # type: ignore + self._pipeline_outputs: ValueSet = None # type: ignore + + self._step_inputs: typing.Mapping[str, ValueSet] = None # type: ignore + self._step_outputs: typing.Mapping[str, ValueSet] = None # type: ignore + + self._status: StepStatus = StepStatus.STALE + + if constants is None: + constants = {} + self._constants: typing.Mapping[str, typing.Any] = constants + + if data_registry is None: + data_registry = Kiara.instance().data_registry + self._data_registry: DataRegistry = data_registry + + self._init_values() + + if controller is None: + controller = BatchController(self) + self._controller: PipelineController = controller + + self._update_status() + + def __eq__(self, other): + + if not isinstance(other, Pipeline): + return False + + return self._id == other._id + + def __hash__(self): + + return hash(self._id) + + @property + def id(self) -> str: + return self._id + + @property + def structure(self) -> PipelineStructure: + return self._structure + + @property + def inputs(self) -> ValueSet: + return self._pipeline_inputs + + @property + def outputs(self) -> ValueSet: + return self._pipeline_outputs + + # def set_pipeline_inputs(self, **inputs: typing.Any): + # self._controller.set_pipeline_inputs(**inputs) + + def get_step(self, step_id: str) -> PipelineStep: + return self._structure.get_step(step_id) + + def get_step_inputs(self, step_id: str) -> ValueSet: + return self._step_inputs[step_id] + + def get_step_outputs(self, step_id: str) -> ValueSet: + return self._step_outputs[step_id] + + @property + def status(self) -> StepStatus: + return self._state + + def _update_status(self): + + if not self.inputs.items_are_valid: + new_state = StepStatus.STALE + elif not self.outputs.items_are_valid: + new_state = StepStatus.INPUTS_READY + else: + new_state = StepStatus.RESULTS_READY + + self._state = new_state + + def _init_values(self): + """Initialize this object. This should only be called once. + + Basically, this goes through all the inputs and outputs of all steps, and 'allocates' a PipelineValue object + for each of them. In case where output/input or pipeline-input/input points are connected, only one + value item is allocated, since those refer to the same value. + """ + + pipeline_inputs: typing.Dict[str, Value] = {} + pipeline_outputs: typing.Dict[str, Value] = {} + + all_step_inputs: typing.Dict[str, typing.Dict[str, Value]] = {} + all_step_outputs: typing.Dict[str, typing.Dict[str, Value]] = {} + + # create the value objects that are associated with step outputs + # all pipeline outputs are created here too, since the only place + # those can be associated are step outputs + for step_id, step_details in self._structure.steps_details.items(): + + step_outputs: typing.Mapping[str, StepOutputField] = step_details["outputs"] + + for output_name, output_point in step_outputs.items(): + + output_value_item = self._data_registry.register_value( + value_schema=output_point.value_schema, + value_fields=output_point, + origin=f"step_output:{self.structure.pipeline_id}.{output_point.alias}", + is_constant=False, + ) + self._data_registry.register_callback( + self.values_updated, output_value_item + ) + all_step_outputs.setdefault(step_id, {})[ + output_name + ] = output_value_item + + # not all step outputs necessarily need to be connected to a pipeline output + if output_point.pipeline_output: + po = self._structure.pipeline_outputs[output_point.pipeline_output] + pv = self._data_registry.register_linked_value( + output_value_item, value_fields=po + ) + self._data_registry.register_callback(self.values_updated, pv) + pipeline_outputs[output_point.pipeline_output] = pv + + # create the value objects that are associated with step inputs + for step_id, step_details in self._structure.steps_details.items(): + + step_inputs: typing.Mapping[str, StepInputField] = step_details["inputs"] + + for input_name, input_point in step_inputs.items(): + + # if this step input gets fed from a pipeline_input (meaning user input in most cases), + # we need to create a DataValue for that pipeline input + if input_point.connected_pipeline_input: + connected_pipeline_input_name = input_point.connected_pipeline_input + pipeline_input_field = self._structure.pipeline_inputs[ + connected_pipeline_input_name + ] + pipeline_input = pipeline_inputs.get( + connected_pipeline_input_name, None + ) + + if pipeline_input is None: + # if the pipeline input wasn't created by another step input before, + # we need to take care of it here + + constant = self._constants.get( + connected_pipeline_input_name, None + ) + pipeline_input = self._data_registry.register_value( + value_schema=input_point.value_schema, + value_fields=pipeline_input_field, + is_constant=False if constant is None else True, + initial_value=constant, + origin=f"pipeline_input:{self.structure.pipeline_id}.{input_name}", + ) + self._data_registry.register_callback( + self.values_updated, pipeline_input + ) + + pipeline_inputs[connected_pipeline_input_name] = pipeline_input + # TODO: create input field value + else: + # TODO: compare schemas of multiple inputs + log.warning( + "WARNING: not comparing schemas of pipeline inputs with links to more than one step input" + ) + # raise NotImplementedError() + + step_input = self._data_registry.register_linked_value( + linked_values=pipeline_input, + value_fields=input_point, + origin=f"step_input:{self.structure.pipeline_id}.{input_point.alias}", + ) + self._data_registry.register_callback( + self.values_updated, step_input + ) + + all_step_inputs.setdefault(step_id, {})[input_name] = step_input + + elif input_point.connected_outputs: + + if len(input_point.connected_outputs) > 1: + raise NotImplementedError() + for co in input_point.connected_outputs: + output_value = all_step_outputs[co.step_id][co.value_name] + step_input = self._data_registry.register_linked_value( + linked_values=output_value, + value_fields=input_point, + origin=f"step_input:{self.structure.pipeline_id}.{input_point.alias}", + ) + self._data_registry.register_callback( + self.values_updated, step_input + ) + all_step_inputs.setdefault(input_point.step_id, {})[ + input_point.value_name + ] = step_input + + else: + raise Exception( + f"Invalid value point type for this location: {input_point}" + ) + + self._pipeline_inputs = ValueSet(items=pipeline_inputs) + self._pipeline_outputs = ValueSet(items=pipeline_outputs) + self._step_inputs = {} + for step_id, inputs in all_step_inputs.items(): + self._step_inputs[step_id] = ValueSet(items=inputs) + self._step_outputs = {} + for step_id, outputs in all_step_outputs.items(): + self._step_outputs[step_id] = ValueSet(items=outputs) + + def values_updated(self, *items: Value): + + updated_inputs: typing.Dict[str, typing.List[str]] = {} + updated_outputs: typing.Dict[str, typing.List[str]] = {} + updated_pipeline_inputs: typing.List[str] = [] + updated_pipeline_outputs: typing.List[str] = [] + + # print("===================================================") + # for item in items: + # print(item) + # print("===================================================") + + self._update_status() + + for item in items: + + # TODO: multiple value fields, also check pipeline id + ps = item.value_fields + if len(ps) != 1: + raise NotImplementedError() + + p = list(ps)[0] + + if isinstance(p, StepInputField): + updated_inputs.setdefault(p.step_id, []).append(p.value_name) + elif isinstance(p, StepOutputField): + updated_outputs.setdefault(p.step_id, []).append(p.value_name) + elif isinstance(p, PipelineInputField): + updated_pipeline_inputs.append(p.value_name) + elif isinstance(p, PipelineOutputField): + updated_pipeline_outputs.append(p.value_name) + else: + raise TypeError(f"Can't update, invalid type: {type(p)}") + + if updated_pipeline_inputs: + event_pi = PipelineInputEvent( + pipeline_id=self._structure.pipeline_id, + updated_pipeline_inputs=updated_pipeline_inputs, + ) + self._controller.pipeline_inputs_changed(event_pi) + + if updated_inputs: + event_si = StepInputEvent( + pipeline_id=self._structure.pipeline_id, + updated_step_inputs=updated_inputs, + ) + self._controller.step_inputs_changed(event_si) + + if updated_outputs: + event_so = StepOutputEvent( + pipeline_id=self._structure.pipeline_id, + updated_step_outputs=updated_outputs, + ) + self._controller.step_outputs_changed(event_so) + + if updated_pipeline_outputs: + event_po = PipelineOutputEvent( + pipeline_id=self._structure.pipeline_id, + updated_pipeline_outputs=updated_pipeline_outputs, + ) + self._controller.pipeline_outputs_changed(event_po) + + def get_current_state(self) -> "PipelineState": + + step_inputs = {} + for k, v in self._step_inputs.items(): + step_inputs[k] = PipelineValues.from_value_set(v) + + step_outputs = {} + for k, v in self._step_outputs.items(): + step_outputs[k] = PipelineValues.from_value_set(v) + + state = PipelineState( + structure=self.structure.to_details(), + pipeline_inputs=self._pipeline_inputs.to_details(), + pipeline_outputs=self._pipeline_outputs.to_details(), + step_inputs=step_inputs, + step_outputs=step_outputs, + status=self.status, + ) + return state + + +class PipelineState(BaseModel): + """Describes the current state of a pipeline. + + This includes the structure of the pipeline (how the internal modules/steps are connected to each other), as well + as all current input/output values for the pipeline itself, as well as for all internal steps. + + Use the ``dict`` or ``json`` methods to convert this object into a generic data structure. + """ + + structure: PipelineStructureDesc = Field( + description="The structure (interconnections of modules/steps) of the pipeline." + ) + pipeline_inputs: PipelineValues = Field( + description="The current (externally facing) input values of this pipeline." + ) + pipeline_outputs: PipelineValues = Field( + description="The current (externally facing) output values of this pipeline." + ) + step_inputs: typing.Dict[str, PipelineValues] = Field( + description="The current (internal) input values of each step of this pipeline." + ) + step_outputs: typing.Dict[str, PipelineValues] = Field( + description="The current (internal) output values of each step of this pipeline." + ) + status: StepStatus = Field(description="The current overal status of the pipeline.") diff --git a/src/kiara/pipeline/structure.py b/src/kiara/pipeline/structure.py new file mode 100644 index 000000000..cc9343e0c --- /dev/null +++ b/src/kiara/pipeline/structure.py @@ -0,0 +1,619 @@ +# -*- coding: utf-8 -*- +import networkx as nx +import typing +from deepdiff import DeepHash +from functools import lru_cache +from pydantic import BaseModel, Extra, Field, PrivateAttr + +from kiara.data.values import ( + PipelineInputField, + PipelineOutputField, + StepInputField, + StepOutputField, + StepValueAddress, + ValueSchema, + generate_step_alias, +) +from kiara.defaults import PIPELINE_PARENT_MARKER +from kiara.kiara import Kiara +from kiara.module import KiaraModule + +if typing.TYPE_CHECKING: + from kiara.config import PipelineStepConfig + + +class PipelineStep(BaseModel): + """A step within a pipeline-structure, includes information about it's connection(s) and other metadata.""" + + class Config: + validate_assignment = True + + @classmethod + def create_steps( + cls, parent_id: str, *steps: "PipelineStepConfig" + ) -> typing.List["PipelineStep"]: + + result: typing.List[PipelineStep] = [] + for step in steps: + + _s = PipelineStep( + step_id=step.step_id, + parent_id=parent_id, + module_type=step.module_type, + module_config=step.module_config, + input_links=step.input_links, + ) + result.append(_s) + + return result + + _module: typing.Optional[KiaraModule] = PrivateAttr(default=None) + + step_id: str + parent_id: str + module_type: str = Field(description="The module type.") + module_config: typing.Mapping[str, typing.Any] = Field( + description="The module config.", default_factory=dict + ) + processing_stage: typing.Optional[int] = Field( + default=None, + description="The stage number this step is executed within the pipeline.", + ) + input_links: typing.Mapping[str, typing.List[StepValueAddress]] = Field( + description="The links that connect to inputs of the module.", + default_factory=list, + ) + + @property + def module(self) -> KiaraModule: + + if self._module is None: + + self._module = Kiara.instance().create_module( + id=self.step_id, + module_type=self.module_type, + module_config=self.module_config, + ) + return self._module + + def __eq__(self, other): + + if not isinstance(other, PipelineStep): + return False + + eq = (self.step_id, self.parent_id, self.module, self.processing_stage,) == ( + other.step_id, + other.parent_id, + other.module, + other.processing_stage, + ) + + if not eq: + return False + + hs = DeepHash(self.input_links) + ho = DeepHash(other.input_links) + + return hs[self.input_links] == ho[other.input_links] + + def __hash__(self): + + # TODO: figure out whether that can be made to work without deephash + hs = DeepHash(self.input_links) + return hash( + ( + self.step_id, + self.parent_id, + self.module, + self.processing_stage, + hs[self.input_links], + ) + ) + + def __repr__(self): + + return f"{self.__class__.__name__}(step_id={self.step_id} parent={self.parent_id} module_type={self.module_type} processing_stage={self.processing_stage}" + + def __str__(self): + return self.__repr__() + + +def generate_pipeline_endpoint_name(step_id: str, value_name: str): + + return f"{step_id}__{value_name}" + + +class PipelineStructure(object): + """An object that holds one or several steps, and describes the connections between them.""" + + def __init__( + self, + parent_id: str, + steps: typing.Iterable["PipelineStepConfig"], + input_aliases: typing.Mapping[str, str] = None, + output_aliases: typing.Mapping[str, str] = None, + add_all_workflow_outputs: bool = False, + ): + + if not steps: + raise Exception("No steps provided.") + + self._steps: typing.List[PipelineStep] = PipelineStep.create_steps( + parent_id, *steps + ) + self._pipeline_id: str = parent_id + + if input_aliases is None: + input_aliases = {} + self._input_aliases: typing.Mapping[str, str] = input_aliases + if output_aliases is None: + output_aliases = {} + self._output_aliases: typing.Mapping[str, str] = output_aliases + + self._add_all_workflow_outputs: bool = add_all_workflow_outputs + + self._execution_graph: nx.DiGraph = None # type: ignore + self._data_flow_graph: nx.DiGraph = None # type: ignore + self._data_flow_graph_simple: nx.DiGraph = None # type: ignore + + self._processing_stages: typing.List[typing.List[str]] = None # type: ignore + + self._steps_details: typing.Dict[str, typing.Any] = None # type: ignore + """Holds details about the (current) processing steps contained in this workflow.""" + + @property + def pipeline_id(self) -> str: + return self._pipeline_id + + @property + def steps(self) -> typing.Iterable[PipelineStep]: + return self._steps + + @property + def modules(self) -> typing.Iterable[KiaraModule]: + return (s.module for s in self.steps) + + @property + def steps_details(self) -> typing.Mapping[str, typing.Any]: + + if self._steps_details is None: + self._process_steps() + return self._steps_details + + def get_step(self, step_id: str) -> PipelineStep: + + d = self.steps_details.get(step_id, None) + if d is None: + raise Exception(f"No module with id: {step_id}") + + return d["step"] + + def get_step_inputs(self, step_id: str) -> typing.Iterable[StepInputField]: + + d = self.steps_details.get(step_id, None) + if d is None: + raise Exception(f"No module with id: {step_id}") + + return d["inputs"] + + def get_step_outputs(self, step_id: str) -> typing.Iterable[StepOutputField]: + + d = self.steps_details.get(step_id, None) + if d is None: + raise Exception(f"No module with id: {step_id}") + + return d["outputs"] + + def get_step_details(self, step_id: str) -> typing.Mapping[str, typing.Any]: + + d = self.steps_details.get(step_id, None) + if d is None: + raise Exception(f"No module with id: {step_id}") + + return d + + @property + def execution_graph(self) -> nx.DiGraph: + if self._execution_graph is None: + self._process_steps() + return self._execution_graph + + @property + def data_flow_graph(self) -> nx.DiGraph: + if self._data_flow_graph is None: + self._process_steps() + return self._data_flow_graph + + @property + def data_flow_graph_simple(self) -> nx.DiGraph: + if self._data_flow_graph_simple is None: + self._process_steps() + return self._data_flow_graph_simple + + @property + def processing_stages(self) -> typing.List[typing.List[str]]: + if self._steps_details is None: + self._process_steps() + return self._processing_stages + + @lru_cache() + def _get_node_of_type(self, node_type: str): + if self._steps_details is None: + self._process_steps() + + return [ + node + for node, attr in self._data_flow_graph.nodes(data=True) + if attr["type"] == node_type + ] + + @property + def steps_inputs(self) -> typing.Dict[str, StepInputField]: + return { + node.alias: node + for node in self._get_node_of_type(node_type=StepInputField.__name__) + } + + @property + def steps_outputs(self) -> typing.Dict[str, StepOutputField]: + return { + node.alias: node + for node in self._get_node_of_type(node_type=StepOutputField.__name__) + } + + @property + def pipeline_inputs(self) -> typing.Dict[str, PipelineInputField]: + return { + node.value_name: node + for node in self._get_node_of_type(node_type=PipelineInputField.__name__) + } + + @property + def pipeline_outputs(self) -> typing.Dict[str, PipelineOutputField]: + return { + node.value_name: node + for node in self._get_node_of_type(node_type=PipelineOutputField.__name__) + } + + @property + def pipeline_input_schema(self) -> typing.Mapping[str, ValueSchema]: + + return { + input_name: w_in.value_schema + for input_name, w_in in self.pipeline_inputs.items() + } + + @property + def pipeline_output_schema(self) -> typing.Mapping[str, ValueSchema]: + return { + output_name: w_out.value_schema + for output_name, w_out in self.pipeline_outputs.items() + } + + def _process_steps(self): + """The core method of this class, it connects all the processing modules, their inputs and outputs.""" + + steps_details: typing.Dict[str, typing.Any] = {} + execution_graph = nx.DiGraph() + execution_graph.add_node("__root__") + data_flow_graph = nx.DiGraph() + data_flow_graph_simple = nx.DiGraph() + processing_stages = [] + + # temp variable, to hold all outputs + outputs: typing.Dict[str, StepOutputField] = {} + + # process all pipeline and step outputs first + _temp_steps_map: typing.Dict[str, PipelineStep] = {} + for step in self._steps: + + _temp_steps_map[step.step_id] = step + + if step.step_id in steps_details.keys(): + raise Exception( + f"Can't process steps: duplicate step_id '{step.step_id}'" + ) + + steps_details[step.step_id] = { + "step": step, + "outputs": {}, + "inputs": {}, + } + + data_flow_graph.add_node(step, type="step") + + # go through all the module outputs, create points for them and connect them to pipeline outputs + for output_name, schema in step.module.output_schemas.items(): + + step_output = StepOutputField( + value_name=output_name, + value_schema=schema, + step_id=step.step_id, + pipeline_id=self._pipeline_id, + ) + + steps_details[step.step_id]["outputs"][output_name] = step_output + step_alias = generate_step_alias(step.step_id, output_name) + outputs[step_alias] = step_output + + step_output_name = generate_pipeline_endpoint_name( + step_id=step.step_id, value_name=output_name + ) + if self._output_aliases: + if step_output_name in self._output_aliases.keys(): + step_output_name = self._output_aliases[step_output_name] + else: + if not self._add_all_workflow_outputs: + # this output is not interesting for the workflow + step_output_name = None + + if step_output_name: + step_output_address = StepValueAddress( + step_id=step.step_id, value_name=output_name + ) + pipeline_output = PipelineOutputField( + pipeline_id=self._pipeline_id, + value_name=step_output_name, + connected_output=step_output_address, + value_schema=schema, + ) + step_output.pipeline_output = pipeline_output.value_name + + data_flow_graph.add_node( + pipeline_output, type=PipelineOutputField.__name__ + ) + data_flow_graph.add_edge(step_output, pipeline_output) + + data_flow_graph_simple.add_node( + pipeline_output, type=PipelineOutputField.__name__ + ) + data_flow_graph_simple.add_edge(step, pipeline_output) + + data_flow_graph.add_node(step_output, type=StepOutputField.__name__) + data_flow_graph.add_edge(step, step_output) + + # now process inputs, and connect them to the appropriate output/pipeline-input points + existing_pipeline_input_points: typing.Dict[str, PipelineInputField] = {} + for step in self._steps: + + other_step_dependency: typing.Set = set() + # go through all the inputs of a module, create input points and connect them to either + # other module outputs, or pipeline inputs (which need to be created) + for input_name, schema in step.module.input_schemas.items(): + + matching_input_links: typing.List[StepValueAddress] = [] + for value_name, input_links in step.input_links.items(): + if value_name == input_name: + for input_link in input_links: + if input_link in matching_input_links: + raise Exception(f"Duplicate input link: {input_link}") + matching_input_links.append(input_link) + + if matching_input_links: + # this means we connect to other steps output + + connected_output_points: typing.List[StepOutputField] = [] + connected_outputs: typing.List[StepValueAddress] = [] + + for input_link in matching_input_links: + output_id = generate_step_alias( + input_link.step_id, input_link.value_name + ) + + if output_id not in outputs.keys(): + raise Exception( + f"Can't connect input '{input_name}' for step '{step.step_id}': no output '{output_id}' available." + ) + connected_output_points.append(outputs[output_id]) + connected_outputs.append(input_link) + + other_step_dependency.add(input_link.step_id) + + step_input_point = StepInputField( + step_id=step.step_id, + pipeline_id=self._pipeline_id, + value_name=input_name, + value_schema=schema, + connected_pipeline_input=None, + connected_outputs=connected_outputs, + ) + + for op in connected_output_points: + op.connected_inputs.append(step_input_point.address) + data_flow_graph.add_edge(op, step_input_point) + data_flow_graph_simple.add_edge( + _temp_steps_map[op.step_id], step_input_point + ) # TODO: name edge + data_flow_graph_simple.add_edge( + step_input_point, step + ) # TODO: name edge + + else: + # this means we connect to pipeline input + pipeline_input_name = generate_pipeline_endpoint_name( + step_id=step.step_id, value_name=input_name + ) + if self._input_aliases: + if pipeline_input_name in self._input_aliases.keys(): + # this means we use the pipeline alias + pipeline_input_name = self._input_aliases[ + pipeline_input_name + ] + + if pipeline_input_name in existing_pipeline_input_points.keys(): + # we already created a pipeline input with this name + # TODO: check whether schema fits + connected_pipeline_input = existing_pipeline_input_points[ + pipeline_input_name + ] + else: + # we need to create the pipeline input + connected_pipeline_input = PipelineInputField( + value_name=pipeline_input_name, + value_schema=schema, + pipeline_id=self._pipeline_id, + ) + + existing_pipeline_input_points[ + pipeline_input_name + ] = connected_pipeline_input + + data_flow_graph.add_node( + connected_pipeline_input, type=PipelineInputField.__name__ + ) + data_flow_graph_simple.add_node( + connected_pipeline_input, type=PipelineInputField.__name__ + ) + + step_input_point = StepInputField( + step_id=step.step_id, + pipeline_id=self._pipeline_id, + value_name=input_name, + value_schema=schema, + connected_pipeline_input=connected_pipeline_input.value_name, + connected_outputs=None, + ) + connected_pipeline_input.connected_inputs.append( + step_input_point.address + ) + data_flow_graph.add_edge(connected_pipeline_input, step_input_point) + data_flow_graph_simple.add_edge(connected_pipeline_input, step) + + data_flow_graph.add_node(step_input_point, type=StepInputField.__name__) + + steps_details[step.step_id]["inputs"][input_name] = step_input_point + + data_flow_graph.add_edge(step_input_point, step) + + if other_step_dependency: + for module_id in other_step_dependency: + execution_graph.add_edge(module_id, step.step_id) + else: + execution_graph.add_edge("__root__", step.step_id) + + # calculate execution order + path_lengths: typing.Dict[str, int] = {} + + for step in self._steps: + + step_id = step.step_id + + paths = list(nx.all_simple_paths(execution_graph, "__root__", step_id)) + max_steps = max(paths, key=lambda x: len(x)) + path_lengths[step_id] = len(max_steps) - 1 + + max_length = max(path_lengths.values()) + + for i in range(1, max_length + 1): + stage: typing.List[str] = [ + m for m, length in path_lengths.items() if length == i + ] + processing_stages.append(stage) + for _step_id in stage: + steps_details[_step_id]["processing_stage"] = i + steps_details[_step_id]["step"].processing_stage = i + + self._steps_details = steps_details + self._execution_graph = execution_graph + self._data_flow_graph = data_flow_graph + self._data_flow_graph_simple = data_flow_graph_simple + self._processing_stages = processing_stages + + self._get_node_of_type.cache_clear() + + def to_details(self) -> "PipelineStructureDesc": + + steps = {} + workflow_inputs: typing.Dict[str, typing.List[str]] = {} + workflow_outputs: typing.Dict[str, str] = {} + + for m_id, details in self.steps_details.items(): + + step = details["step"] + + input_connections: typing.Dict[str, typing.List[str]] = {} + for k, v in details["inputs"].items(): + + if v.connected_pipeline_input is not None: + connected_item = v.connected_pipeline_input + input_connections[k] = [ + generate_step_alias(PIPELINE_PARENT_MARKER, connected_item) + ] + workflow_inputs.setdefault(f"{connected_item}", []).append(v.alias) + elif v.connected_outputs is not None: + assert len(v.connected_outputs) > 0 + for co in v.connected_outputs: + input_connections.setdefault(k, []).append(co.alias) + else: + raise TypeError(f"Invalid connection type: {type(connected_item)}") + + output_connections: typing.Dict[str, typing.Any] = {} + for k, v in details["outputs"].items(): + for connected_item in v.connected_inputs: + + output_connections.setdefault(k, []).append( + generate_step_alias( + connected_item.step_id, connected_item.value_name + ) + ) + if v.pipeline_output: + output_connections.setdefault(k, []).append( + generate_step_alias(PIPELINE_PARENT_MARKER, v.pipeline_output) + ) + workflow_outputs[v.pipeline_output] = v.alias + + steps[step.step_id] = StepDesc( + step=step, + processing_stage=details["processing_stage"], + input_connections=input_connections, + output_connections=output_connections, + ) + + return PipelineStructureDesc( + pipeline_id=self._pipeline_id, + steps=steps, + processing_stages=self.processing_stages, + pipeline_input_connections=workflow_inputs, + pipeline_output_connections=workflow_outputs, + ) + + +class StepDesc(BaseModel): + """Details of a single [PipelineStep][kiara.pipeline.structure.PipelineStep] (which lives within a [Pipeline][kiara.pipeline.pipeline.Pipeline]""" + + class Config: + allow_mutation = False + extra = Extra.forbid + + step: PipelineStep = Field(description="Attributes of the step itself.") + processing_stage: int = Field( + description="The processing stage of this step within a Pipeline." + ) + input_connections: typing.Dict[str, typing.List[str]] = Field( + description="A map that explains what elements connect to this steps inputs. A connection could either be a Pipeline input (indicated by the '__pipeline__' token), or another steps output." + ) + output_connections: typing.Dict[str, typing.List[str]] = Field( + description="A map that explains what elemnts connect to this steps outputs. A connection could be either a Pipeline output, or another steps input." + ) + + +class PipelineStructureDesc(BaseModel): + """Outlines the internal structure of a [Pipeline][kiara.pipeline.pipeline.Pipeline].""" + + class Config: + allow_mutation = False + extra = Extra.forbid + + pipeline_id: str = Field(description="The (unique) pipeline id.") + steps: typing.Dict[str, StepDesc] = Field( + description="The steps contained in this pipeline, with the 'step_id' as key." + ) + processing_stages: typing.List[typing.List[str]] = Field( + description="The order in which this pipeline has to be processed (basically the dependencies of each step on other steps, if any)." + ) + pipeline_input_connections: typing.Dict[str, typing.List[str]] = Field( + description="The connections of this pipelines input fields. One input field can be connected to one or several step input fields." + ) + pipeline_output_connections: typing.Dict[str, str] = Field( + description="The connections of this pipelines output fields. Each pipeline output is connected to exactly one step output field." + ) diff --git a/src/kiara/py.typed b/src/kiara/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/kiara/resources/.gitkeep b/src/kiara/resources/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/src/kiara/resources/pipelines/.gitkeep b/src/kiara/resources/pipelines/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/src/kiara/resources/pipelines/logic/nand.json b/src/kiara/resources/pipelines/logic/nand.json new file mode 100644 index 000000000..3814a33f6 --- /dev/null +++ b/src/kiara/resources/pipelines/logic/nand.json @@ -0,0 +1,24 @@ +{ + "module_type_name": "nand", + "doc": "Returns 'False' if both inputs are 'True'.", + "steps": [ + { + "module_type": "and", + "step_id": "and" + }, + { + "module_type": "not", + "step_id": "not", + "input_links": { + "a": "and.y" + } + } + ], + "input_aliases": { + "and__a": "a", + "and__b": "b" + }, + "output_aliases": { + "not__y": "y" + } +} diff --git a/src/kiara/resources/pipelines/logic/nor.json b/src/kiara/resources/pipelines/logic/nor.json new file mode 100644 index 000000000..f461c91a4 --- /dev/null +++ b/src/kiara/resources/pipelines/logic/nor.json @@ -0,0 +1,24 @@ +{ + "module_type_name": "nor", + "doc": "Returns 'True' if both inputs are 'False'.", + "steps": [ + { + "module_type": "or", + "step_id": "or" + }, + { + "module_type": "not", + "step_id": "not", + "input_links": { + "a": "or.y" + } + } + ], + "input_aliases": { + "or__a": "a", + "or__b": "b" + }, + "output_aliases": { + "not__y": "y" + } +} diff --git a/src/kiara/resources/pipelines/logic/xor.json b/src/kiara/resources/pipelines/logic/xor.json new file mode 100644 index 000000000..d5ef18081 --- /dev/null +++ b/src/kiara/resources/pipelines/logic/xor.json @@ -0,0 +1,31 @@ +{ + "module_type_name": "xor", + "doc": "Returns 'True' if exactly one of it's two inputs is 'True'.", + "steps": [ + { + "module_type": "or", + "step_id": "or" + }, + { + "module_type": "nand", + "step_id": "nand" + }, + { + "module_type": "and", + "step_id": "and", + "input_links": { + "a": "or.y", + "b": "nand.y" + } + } + ], + "input_aliases": { + "or__a": "a", + "or__b": "b", + "nand__a": "a", + "nand__b": "b" + }, + "output_aliases": { + "and__y": "y" + } +} diff --git a/src/kiara/utils.py b/src/kiara/utils.py new file mode 100644 index 000000000..450ab491e --- /dev/null +++ b/src/kiara/utils.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +import inspect +import json +import logging +import os +import sys +import typing +import yaml +from io import StringIO +from networkx import Graph +from pathlib import Path +from pydantic.schema import ( + get_flat_models_from_model, + get_model_name_map, + model_process_schema, +) +from rich import box +from rich.table import Table +from ruamel.yaml import YAML +from stevedore import ExtensionManager +from typing import Union + +from kiara.data.values import ValueSchema + +if typing.TYPE_CHECKING: + from kiara.config import KiaraModuleConfig, PipelineModuleConfig + from kiara.module import KiaraModule + +log = logging.getLogger("kiara") + + +def get_data_from_file(path: Union[str, Path]) -> typing.Any: + + if isinstance(path, str): + path = Path(os.path.expanduser(path)) + + content = path.read_text() + + if path.name.endswith(".json"): + content_type = "json\n" + elif path.name.endswith(".yaml") or path.name.endswith(".yml"): + content_type = "yaml\n" + else: + raise ValueError( + "Invalid data format, only 'json' or 'yaml' are supported currently." + ) + + if content_type == "json": + data = json.loads(content) + else: + data = yaml.safe_load(content) + + return data + + +def print_ascii_graph(graph: Graph): + + try: + from asciinet._libutil import check_java + + check_java("Java ") + except Exception: + print( + "\nJava is currently necessary to print ascii graphs. This might change in the future, but to use this functionality please install a JRE." + ) + return + + try: + from asciinet import graph_to_ascii + except: # noqa + print( + "\nCan't print graph on terminal, package 'asciinet' not available. Please install it into the current virtualenv using:\n\npip install 'git+https://github.com/cosminbasca/asciinet.git#egg=asciinet&subdirectory=pyasciinet'" + ) + return + + print(graph_to_ascii(graph)) + + +_AUTO_MODULE_ID: typing.Dict[str, int] = {} + + +def get_auto_workflow_alias(module_type: str, use_incremental_ids: bool = False) -> str: + """Return an id for a workflow obj of a provided module class. + + If 'use_incremental_ids' is set to True, a unique id is returned. + + Args: + module_type (str): the name of the module type + use_incremental_ids (bool): whether to return a unique (incremental) id + + Returns: + str: a module id + """ + + if not use_incremental_ids: + return module_type + + nr = _AUTO_MODULE_ID.setdefault(module_type, 0) + _AUTO_MODULE_ID[module_type] = nr + 1 + + return f"{module_type}_{nr}" + + +def find_kiara_modules() -> typing.Dict[str, typing.Type["KiaraModule"]]: + + log2 = logging.getLogger("stevedore") + out_hdlr = logging.StreamHandler(sys.stdout) + out_hdlr.setFormatter( + logging.Formatter("freckles connector plugin error -> %(message)s") + ) + out_hdlr.setLevel(logging.INFO) + log2.addHandler(out_hdlr) + log2.setLevel(logging.INFO) + + log.debug("Loading kiara modules...") + + mgr = ExtensionManager( + namespace="kiara.modules", invoke_on_load=False, propagate_map_exceptions=True + ) + + result = {} + for plugin in mgr: + name = plugin.name + ep = plugin.entry_point + module_cls = ep.load() + result[name] = module_cls + + return result + + +def create_table_from_config_class( + config_cls: typing.Type["KiaraModuleConfig"], remove_pipeline_config: bool = False +) -> Table: + + table = Table(box=box.HORIZONTALS, show_header=False) + table.add_column("Field name", style="i") + table.add_column("Type") + table.add_column("Description") + flat_models = get_flat_models_from_model(config_cls) + model_name_map = get_model_name_map(flat_models) + m_schema, _, _ = model_process_schema(config_cls, model_name_map=model_name_map) + fields = m_schema["properties"] + + for field_name, details in fields.items(): + if remove_pipeline_config and field_name in [ + "steps", + "input_aliases", + "output_aliases", + "doc", + ]: + continue + + table.add_row( + field_name, details["type"], details.get("description", "-- n/a --") + ) + + return table + + +def create_table_from_field_schemas(**fields: ValueSchema): + + table = Table(box=box.SIMPLE, show_header=False) + table.add_column("Field name", style="i") + table.add_column("Value type") + table.add_column("Default") + + for field_name, schema in fields.items(): + d = "-- no default --" if schema.default is None else str(schema.default) + table.add_row(field_name, schema.type, d) # type: ignore + + return table + + +def module_config_from_cli_args(*args: str) -> typing.Dict[str, typing.Any]: + + if not args: + return {} + + config: typing.Dict[str, typing.Any] = {} + for arg in args: + if "=" in arg: + key, value = arg.split("=", maxsplit=1) + try: + _v = json.loads(value) + except Exception: + _v = value + part_config = {key: _v} + elif os.path.isfile(os.path.realpath(os.path.expanduser(arg))): + path = os.path.realpath(os.path.expanduser(arg)) + part_config = get_data_from_file(path) + assert isinstance(part_config, typing.Mapping) + else: + try: + part_config = json.loads(arg) + assert isinstance(part_config, typing.Mapping) + except Exception: + raise Exception(f"Could not parse argument into module config: {arg}") + + for k, v in part_config.items(): + if k in config.keys(): + log.warning( + f"Duplicate config key '{k}', overwriting old value with: {v}" + ) + config[k] = v + return config + + +def get_doc_for_module_class(module_cls: typing.Type["KiaraModule"]): + + from kiara import PipelineModule + + if module_cls == PipelineModule or not module_cls.is_pipeline(): + + doc = module_cls.__doc__ + if not doc: + doc = "-- n/a --" + else: + doc = inspect.cleandoc(doc) + else: + bpc: "PipelineModuleConfig" = module_cls._base_pipeline_config # type: ignore + doc = bpc.doc + return doc + + +class StringYAML(YAML): + def dump(self, data, stream=None, **kw): + inefficient = False + if stream is None: + inefficient = True + stream = StringIO() + YAML.dump(self, data, stream, **kw) + if inefficient: + return stream.getvalue() diff --git a/src/kiara/workflow.py b/src/kiara/workflow.py new file mode 100644 index 000000000..3196a2086 --- /dev/null +++ b/src/kiara/workflow.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +import os +import typing + +from kiara.config import KiaraWorkflowConfig +from kiara.data.values import ValueSet +from kiara.kiara import Kiara +from kiara.pipeline.module import PipelineModule +from kiara.pipeline.pipeline import Pipeline, StepStatus +from kiara.pipeline.structure import PipelineStructure +from kiara.utils import get_auto_workflow_alias, get_data_from_file + + +class KiaraWorkflow(object): + """A thin wrapper class around a [PipelineModule][kiara.pipeline.PipelineModule], mostly handling initialization from simplified configuration data.""" + + def __init__( + self, + config: typing.Union[KiaraWorkflowConfig, typing.Mapping[str, typing.Any], str], + workflow_id: str = None, + ): + + if isinstance(config, typing.Mapping): + self._workflow_config: KiaraWorkflowConfig = KiaraWorkflowConfig(**config) + + elif isinstance(config, str): + if config == "pipeline": + raise Exception( + "Can't create workflow from 'pipeline' module type without further configuration." + ) + + if config in Kiara.instance().available_module_types: + self._workflow_config = KiaraWorkflowConfig(module_type=config) + + elif os.path.isfile(os.path.expanduser(config)): + path = os.path.expanduser(config) + workflow_config_data = get_data_from_file(path) + self._workflow_config = KiaraWorkflowConfig( + module_config=workflow_config_data, module_type="pipeline" + ) + else: + raise Exception( + f"Can't create workflow config from string: {config}. Value either needs to be a (registered) module type name, or a path to a file." + ) + elif isinstance(config, KiaraWorkflowConfig): + self._workflow_config = config + else: + # raise TypeError(f"Invalid type '{type(workflow_config)}' for workflow configuration: {workflow_config}") + raise TypeError( + f"Invalid type '{type(config)}' for workflow configuration." + ) + + if not workflow_id: + workflow_id = get_auto_workflow_alias( + self._workflow_config.module_type, use_incremental_ids=True + ) + + self._workflow_id: str = workflow_id + + root_module_args: typing.Dict[str, typing.Any] = {"id": self._workflow_id} + if self._workflow_config.module_type == "pipeline": + root_module_args["module_type"] = "pipeline" + root_module_args["module_config"] = self._workflow_config.module_config + elif Kiara.instance().is_pipeline_module(self._workflow_config.module_type): + root_module_args["module_type"] = self._workflow_config.module_type + root_module_args["module_config"] = self._workflow_config.module_config + else: + # means it's a python module, and we wrap it into a single-module pipeline + root_module_args["module_type"] = "pipeline" + steps_conf = { + "steps": [ + { + "module_type": self._workflow_config.module_type, + "step_id": self._workflow_config.module_type, + "module_config": self._workflow_config.module_config, + } + ] + } + root_module_args["module_config"] = steps_conf + + self._root_module: PipelineModule = Kiara.instance().create_module(**root_module_args) # type: ignore + assert isinstance(self._root_module, PipelineModule) + self._pipeline: typing.Optional[Pipeline] = None + + @property + def structure(self) -> PipelineStructure: + return self._root_module.structure + + @property + def pipeline(self) -> Pipeline: + + if self._pipeline is None: + self._pipeline = Pipeline(self.structure) + return self._pipeline + + @property + def state(self) -> StepStatus: + return self.pipeline.status + + @property + def inputs(self) -> ValueSet: + return self.pipeline.inputs + + # @inputs.setter + # def inputs(self, inputs: typing.Mapping[str, typing.Any]): + # self.pipeline.set_pipeline_inputs(**inputs) + + @property + def outputs(self) -> ValueSet: + return self.pipeline.outputs + + @property + def input_names(self) -> typing.List[str]: + return list(self.inputs.keys()) + + @property + def output_names(self) -> typing.List[str]: + return list(self.outputs.keys()) + + @property + def workflow_id(self) -> str: + return self._workflow_id + + def __repr__(self): + + return f"{self.__class__.__name__}(workflow_id={self.workflow_id}, root_module={self._root_module})" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..18ee5a083 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + Dummy conftest.py for kiara. + + If you don't know what this is for, just leave it empty. + Read more about conftest.py under: + https://pytest.org/latest/plugins.html +""" +import pytest + +import os +import typing + +from kiara.config import PipelineModuleConfig + +from .utils import PIPELINES_FOLDER + + +@pytest.fixture +def workflow_paths(): + + result = {} + for root, dirnames, filenames in os.walk(PIPELINES_FOLDER, topdown=True): + + for f in filenames: + full = os.path.join(root, f) + if os.path.isfile(full) and f.endswith(".json"): + result[os.path.splitext(f)[0]] = full + + return result + + +@pytest.fixture +def workflow_configs(workflow_paths) -> typing.Mapping[str, PipelineModuleConfig]: + + return { + name: PipelineModuleConfig.parse_file(path) + for name, path in workflow_paths.items() + } diff --git a/tests/resources/pipelines/dummy/dummy_1.json b/tests/resources/pipelines/dummy/dummy_1.json new file mode 100644 index 000000000..90beb85a3 --- /dev/null +++ b/tests/resources/pipelines/dummy/dummy_1.json @@ -0,0 +1,26 @@ +{ + "steps": [ + { + "module_type": "dummy", + "step_id": "and_1", + "module_config": { + "input_schema": { + "a": { + "type": "integer" + } + }, + "output_schema": { + "x": { + "type": "integer" + }, + "y": { + "type": "string" + }, + "z": { + "type": "boolean" + } + } + } + } + ] +} diff --git a/tests/resources/pipelines/dummy/dummy_1_delay.json b/tests/resources/pipelines/dummy/dummy_1_delay.json new file mode 100644 index 000000000..b86ce0247 --- /dev/null +++ b/tests/resources/pipelines/dummy/dummy_1_delay.json @@ -0,0 +1,30 @@ +{ + "steps": [ + { + "module_type": "dummy", + "step_id": "and_1", + "module_config": { + "input_schema": { + "a": { + "type": "integer" + } + }, + "output_schema": { + "x": { + "type": "integer" + }, + "y": { + "type": "string" + }, + "z": { + "type": "boolean" + } + }, + "delay": 2, + "outputs": { + "x": 2 + } + } + } + ] +} diff --git a/tests/resources/pipelines/logic/logic_1.json b/tests/resources/pipelines/logic/logic_1.json new file mode 100644 index 000000000..278919d45 --- /dev/null +++ b/tests/resources/pipelines/logic/logic_1.json @@ -0,0 +1,9 @@ +{ + "steps": [ + { + "module_type": "and", + "step_id": "and_1" + } + ], + "doc": "Simple example pipeline, wrapping a single 'and' module." +} diff --git a/tests/resources/pipelines/logic/logic_2.json b/tests/resources/pipelines/logic/logic_2.json new file mode 100644 index 000000000..f648200f6 --- /dev/null +++ b/tests/resources/pipelines/logic/logic_2.json @@ -0,0 +1,24 @@ +{ + "steps": [ + { + "module_type": "and", + "step_id": "and_1" + }, + { + "module_type": "and", + "step_id": "and_2", + "input_links": { + "a": "and_1.y" + } + } + ], + "input_aliases": { + "and_1__a": "a", + "and_1__b": "b", + "and_2__b": "c" + }, + "output_aliases": { + "and_2__y": "y" + }, + "doc": "Returns 'true' only if all 3 inputs are also 'true'." +} diff --git a/tests/resources/pipelines/logic/logic_3.json b/tests/resources/pipelines/logic/logic_3.json new file mode 100644 index 000000000..fbbdd92ab --- /dev/null +++ b/tests/resources/pipelines/logic/logic_3.json @@ -0,0 +1,21 @@ +{ + "steps": [ + { + "module_type": "and", + "step_id": "and_1_1" + }, + { + "module_type": "and", + "step_id": "and_1_2" + }, + { + "module_type": "and", + "step_id": "and_2", + "input_links": { + "a": "and_1_1.y", + "b": "and_1_2.y" + } + } + ], + "doc": "Returns 'true' only if all 4 inputs are 'true'." +} diff --git a/tests/test_workflow_creation.py b/tests/test_workflow_creation.py new file mode 100644 index 000000000..6fcf28706 --- /dev/null +++ b/tests/test_workflow_creation.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +import typing + +from kiara import PipelineModule +from kiara.config import PipelineModuleConfig +from kiara.pipeline.structure import PipelineStep + + +def test_workflow_desc_files(workflow_paths): + + for path in workflow_paths.values(): + c = PipelineModuleConfig.parse_file(path) + assert isinstance(c, PipelineModuleConfig) + assert len(c.steps) > 0 + assert c.steps[0].step_id + assert c.steps[0].module_type + + +def test_workflow_obj_attributes( + workflow_configs: typing.Mapping[str, PipelineModuleConfig] +): + + logic_1 = workflow_configs["logic_1"] + + assert len(logic_1.steps) == 1 + assert len(logic_1.input_aliases) == 0 + assert len(logic_1.output_aliases) == 0 + + logic_2 = workflow_configs["logic_2"] + + assert len(logic_2.steps) == 2 + assert len(logic_2.input_aliases) == 3 + assert len(logic_2.output_aliases) == 1 + + logic_3 = workflow_configs["logic_3"] + + assert len(logic_3.steps) == 3 + assert len(logic_3.input_aliases) == 0 + assert len(logic_3.output_aliases) == 0 + + +def test_workflow_obj_creation( + workflow_configs: typing.Mapping[str, PipelineModuleConfig] +): + + logic_1 = workflow_configs["logic_1"] + c = PipelineModule(id="logic_1", module_config=logic_1) + assert isinstance(c, PipelineModule) + + assert c.full_id == "logic_1" + assert c.structure.pipeline_id == "logic_1" + assert len(c.structure.steps) == 1 + assert "and_1" in c.structure.to_details().steps.keys() + assert isinstance(c.structure.to_details().steps["and_1"].step, PipelineStep) diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 000000000..5926ded37 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +import os + +KIARA_TEST_RESOURCES = os.path.join(os.path.dirname(__file__), "resources") +PIPELINES_FOLDER = os.path.join(KIARA_TEST_RESOURCES, "pipelines") + + +def get_workflow_config_path(workflow_name: str): + return os.path.join(PIPELINES_FOLDER, workflow_name)