Skip to content

Commit

Permalink
refactor: support type conversions & metadata extranction via modules
Browse files Browse the repository at this point in the history
  • Loading branch information
makkus committed May 31, 2021
1 parent d603289 commit 348a86d
Show file tree
Hide file tree
Showing 28 changed files with 1,889 additions and 537 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ repos:
files: "^src/"
pass_filenames: true
args: ["--config-file", "setup.cfg"]
additional_dependencies: [pydantic>=1.8.0, rich>=10.0.0, ruamel.yaml, anyio>=3.0.0, pyzmq>=22.0.3]
additional_dependencies: [pydantic>=1.8.0, rich>=10.0.0, ruamel.yaml, anyio>=3.0.0, pyzmq>=22.0.3, bidict]


- repo: git://github.com/pre-commit/pre-commit-hooks
Expand Down
75 changes: 75 additions & 0 deletions dev/dev.ipynb

Large diffs are not rendered by default.

52 changes: 52 additions & 0 deletions docs/modules/module_development.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# Developing *kiara* modules\n",
"\n",
"This page will show you how to create your own *kiara* modules. It's early days still, so the way this is done\n",
"currently is not as pythonic, user-friendly and easy as I hope it will eventually be. I do hope it is easy enough\n",
"for everyone with a bit of Python experience to be able to create their own, simple modules, though.\n",
"\n",
"## (Optional) Create a project structure\n",
"\n",
"*kiara* modules live in Python packages. Although technically it would be possible to just use Python files,\n",
"this is not supported for now, possibly ever. The main reason for that is that it is very important to be\n",
"able to pinpoint the exact version of a module that was used to create/transform some data. Python packages\n",
"can be versioned (as well as their dependencies) relatively easy, in a generic way. Simple scripts can't,\n",
"at least not to the extend we need.\n",
"\n",
"Creating Python packages correctly is not trivial, which is why I created a [project template](https://github.com/DHARPA-Project/kiara_modules.project_template) that includes\n",
"all the necessary bits and integrations to make this as painless as possible."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
8 changes: 4 additions & 4 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ Display information about a modules, like description, configuration schema, sou

#### ...for a core module

{{ cli("kiara", "module", "describe-type", "and") }}
{{ cli("kiara", "module", "explain-type", "and") }}

#### ...for a pipeline module

{{ cli("kiara", "module", "describe-type", "nand") }}
{{ cli("kiara", "module", "explain-type", "nand") }}

### get properties of an instantiated module

Expand All @@ -38,11 +38,11 @@ This command also can take module configuration, in different forms. This will b

#### ...for a core module

{{ cli("kiara", "module", "describe-instance", "and") }}
{{ cli("kiara", "module", "explain-instance", "and") }}

#### ...for a pipeline module

{{ cli("kiara", "module", "describe-instance", "nand") }}
{{ cli("kiara", "module", "explain-instance", "nand") }}

## pipeline-specific sub-commands

Expand Down
20 changes: 20 additions & 0 deletions onboarding.folder_to_table.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"table": {
"id": [
0,
1
],
"rel_path": [
"csv_1.csv",
"csv_2.csv"
],
"file_name": [
"csv_1.csv",
"csv_2.csv"
],
"content": [
"a,b,c\nd,e,f\n",
"a,b,c\nd,e,f\n"
]
}
}
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ packages = find_namespace:
install_requires =
anyio>=3.0.0,<4.0.0
appdirs>=1.4.4,<2.0.0
bidict>=0.21.0
deepdiff>=5.2.0,<6.0.0
filetype>=1.0.0,<2.0.0
networkx>=2.5,<3.0
pp-ez>=0.2.0
pyarrow>=4.0.0,<5.0.0
pydantic>=1.7.0,<2.0.0
python-dateutil>=2.8.0
python-slugify>=5.0.0
pyyaml>=5.4.0,<6.0.0
pyzmq>=22.0.0,<23.0.0
rich>=9.0.0,<11.0.0
Expand All @@ -52,6 +54,7 @@ console_scripts =
kiara = kiara.interfaces.cli:cli
kiara.modules =
pipeline = kiara.pipeline.module:PipelineModule
metadata.extract_python_class = kiara.modules.metadata:ExtractPythonClass

[options.extras_require]
cli =
Expand Down Expand Up @@ -210,6 +213,9 @@ ignore_missing_imports = true
[mypy-filetype]
ignore_missing_imports = true

[mypy-kiara_modules.*]
ignore_missing_imports = true

[mypy-IPython.*]
ignore_missing_imports = true

Expand Down
27 changes: 27 additions & 0 deletions src/kiara/data/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,33 @@ def get_value_data(self, item: typing.Union[str, KiaraValue]) -> typing.Any:

return value

def get_value_metadata(
self,
value: typing.Union[KiaraValue, str],
*metadata_keys: str,
also_return_schema: bool = False,
):

value = self.get_value_item(value)
result = {}
missing = set()
for metadata_key in metadata_keys:
if metadata_keys in value.metadata.keys():
result[metadata_key] = value.metadata[metadata_key]["metadata"]
else:
missing.add(metadata_key)

if not missing:
return result

_md = self._kiara.get_value_metadata(value, metadata_keys=missing)
result.update(_md)

if also_return_schema:
return result
else:
return {k: v["metadata"] for k, v in result.items()}

def get_value_hash(
self, item: typing.Union[str, KiaraValue]
) -> typing.Union[int, ValueHashMarker]:
Expand Down
70 changes: 16 additions & 54 deletions src/kiara/data/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,17 @@
"""

import datetime
import networkx
import networkx as nx
import pyarrow
import typing
from dateutil import parser
from deepdiff import DeepHash
from enum import Enum
from networkx import DiGraph
from rich.console import Console, ConsoleOptions, RenderResult

from kiara.utils import camel_case_to_snake_case

if typing.TYPE_CHECKING:
from kiara.data.values import Value


class ValueHashMarker(Enum):

Expand Down Expand Up @@ -93,9 +92,19 @@ def get_type_transformation_configs(
The name of the transformation is the key of the result dictionary, the configuration is a module configuration
(dictionary wth 'module_type' and optional 'module_config', 'input_name' and 'output_name' keys).
"""
return {
"to_string": {"module_type": "strings.pretty_print", "input_name": "item"}
}
return {"string": {"module_type": "strings.pretty_print", "input_name": "item"}}

@classmethod
def check_value_type(cls, value: "Value") -> typing.Optional["ValueType"]:
return cls.check_data_type(value.get_value_data())

@classmethod
def check_data_type(cls, data: typing.Any) -> typing.Optional["ValueType"]:
return None

@classmethod
def relevant_python_types(cls) -> typing.Optional[typing.Iterable[typing.Type]]:
return None

def __init__(self, **type_config: typing.Any):

Expand Down Expand Up @@ -349,50 +358,3 @@ def parse_value(self, v: typing.Any) -> typing.Any:

def validate(cls, value: typing.Any):
assert isinstance(value, datetime.datetime)


class TableType(ValueType):
def validate(cls, value: typing.Any) -> None:
assert isinstance(value, pyarrow.Table)

def extract_type_metadata(
cls, value: typing.Any
) -> typing.Mapping[str, typing.Any]:

table: pyarrow.Table = value
table_schema = {}
for name in table.schema.names:
field = table.schema.field(name)
md = field.metadata
if not md:
md = {}
_type = field.type
_d = {"item_type": str(_type), "arrow_type_id": _type.id, "metadata": md}
table_schema[name] = _d

return {
"column_names": table.column_names,
"schema": table_schema,
"rows": table.num_rows,
"size_in_bytes": table.nbytes,
}


class NetworkGraphType(ValueType):
def validate(cls, value: typing.Any) -> typing.Any:

if not isinstance(value, networkx.Graph):
raise ValueError(f"Invalid type '{type(value)}' for graph: {value}")
return value

def extract_type_metadata(
cls, value: typing.Any
) -> typing.Mapping[str, typing.Any]:

graph: nx.Graph = value
return {
"directed": isinstance(value, DiGraph),
"number_of_nodes": len(graph.nodes),
"number_of_edges": len(graph.edges),
"density": nx.density(graph),
}
27 changes: 27 additions & 0 deletions src/kiara/data/types/graphs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
import networkx
import networkx as nx
import typing
from networkx import DiGraph

from kiara.data.types import ValueType


class NetworkGraphType(ValueType):
def validate(cls, value: typing.Any) -> typing.Any:

if not isinstance(value, networkx.Graph):
raise ValueError(f"Invalid type '{type(value)}' for graph: {value}")
return value

def extract_type_metadata(
cls, value: typing.Any
) -> typing.Mapping[str, typing.Any]:

graph: nx.Graph = value
return {
"directed": isinstance(value, DiGraph),
"number_of_nodes": len(graph.nodes),
"number_of_edges": len(graph.edges),
"density": nx.density(graph),
}
Loading

0 comments on commit 348a86d

Please sign in to comment.