Skip to content

Commit 0c47e51

Browse files
authored
feat(connector,operator,component): adopt jsonscema validation and dataclass type hint (#74)
Because - provide better DX when configuring `connectors`, `operators` and `components` This commit - add `Makefile` target and bash script to fetch resources and tasks definitions jsonschema - add `Makefile` target and bash script to generate dataclass classes for resources config type hint from jsonschema - add airbyte connectors - adopt dataclass config and jsonschema validations in connectors and operators - add necessary packages in poetry project file resolves ins-3294 resolves ins-3295 resolves ins-3296 resolves ins-3297 resolves ins-3298
1 parent 903bc29 commit 0c47e51

File tree

319 files changed

+26440
-49
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

319 files changed

+26440
-49
lines changed

.pylint.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ extension-pkg-whitelist=
77

88
# Add files or directories to the blacklist. They should be base names, not
99
# paths.
10-
ignore=CVS,protogen,protobufs
10+
ignore=CVS,protogen,protobufs,schema
1111

1212
# Add files or directories matching the regex patterns to the blacklist. The
1313
# regex matches against base names, not paths.
@@ -68,6 +68,7 @@ disable=
6868
too-many-ancestors,
6969
too-many-instance-attributes,
7070
too-many-statements,
71+
too-many-lines,
7172
attribute-defined-outside-init,
7273
unsupported-assignment-operation,
7374
unsupported-delete-operation,

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ get-proto:
4949
update-proto:
5050
@ git submodule update --remote --merge
5151

52+
.PHONY: update-specs
53+
update-specs:
54+
@ bash bin/specs
55+
5256
# TEST ########################################################################
5357

5458
RANDOM_SEED ?= $(shell date +%s)

bin/specs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin bash
2+
3+
connector_schemas=("airbyte" "bigquery" "googlecloudstorage" "stabilityai" "googlesearch" "airbyte" "huggingface" "instill" "numbers" "openai" "pinecone" "redis" "restapi" "website")
4+
5+
for connector in ${connector_schemas[@]}; do
6+
echo "=====================@@@ Fetching and processing $connector @@@====================="
7+
if [[ "$connector" == 'stabilityai' ]] || [[ "$connector" == 'openai' ]]; then
8+
echo "Downloading ${connector}.json..."
9+
curl https://raw.githubusercontent.com/instill-ai/connector/main/pkg/${connector}/config/${connector}.json -L -s -o ./instill/resources/schema/jsons/${connector}.json
10+
fi
11+
curl https://raw.githubusercontent.com/instill-ai/connector/main/pkg/${connector}/config/definitions.json -L -s -o ./instill/resources/schema/jsons/${connector}_definitions.json
12+
echo "Downloading ${connector}_definitions.json..."
13+
cat <<<$(jq '.[0].spec.resource_specification' ./instill/resources/schema/jsons/${connector}_definitions.json) >./instill/resources/schema/jsons/${connector}_definitions.json
14+
o_path=./instill/resources/schema/${connector}.py
15+
if [ "$connector" == "airbyte" ]; then
16+
o_path=./instill/resources/schema/${connector}/
17+
fi
18+
echo "Generating ${o_path}..."
19+
datamodel-codegen --strip-default-none --disable-timestamp --use-schema-description --use-title-as-name --input ./instill/resources/schema/jsons/${connector}_definitions.json --input-file-type jsonschema --output ${o_path} --output-model-type dataclasses.dataclass
20+
# tasks
21+
echo "Downloading ${connector}_tasks.json..."
22+
curl https://raw.githubusercontent.com/instill-ai/connector/main/pkg/${connector}/config/tasks.json -L -s -o ./instill/resources/schema/jsons/${connector}_tasks.json
23+
24+
common=$(cat ./instill/resources/schema/jsons/${connector}_tasks.json | jq -rc '."$defs" // empty')
25+
cat ./instill/resources/schema/jsons/${connector}_tasks.json | jq -rc 'to_entries | .[]' | while read line; do
26+
task=$(echo $line | jq -rc '.key')
27+
if [[ "$task" == '$defs' ]]; then
28+
continue
29+
fi
30+
echo $line | jq -rc '.value | to_entries | .[]' | while read inner_line; do
31+
schema=$(echo $inner_line | jq -rc '.value')
32+
io=$(echo $inner_line | jq -rc '.key')
33+
filename=$(echo "${connector}_${task}_${io}" | tr "[:upper:]" "[:lower:]")
34+
if [ ! -z "$common" ]; then
35+
schema=${schema::${#schema}-1}
36+
schema="${schema},\"\$defs\":${common}}"
37+
fi
38+
echo $schema >./instill/resources/schema/jsons/$filename.json
39+
echo "Generating $filename.py..."
40+
datamodel-codegen --strip-default-none --disable-timestamp --use-schema-description --use-title-as-name --input ./instill/resources/schema/jsons/$filename.json --input-file-type jsonschema --output ./instill/resources/schema/$filename.py --output-model-type dataclasses.dataclass
41+
done
42+
done
43+
done
44+
45+
operator_schemas=("base64" "end" "image" "json" "start" "text")
46+
47+
for operator in ${operator_schemas[@]}; do
48+
echo "=====================@@@ Fetching and processing $operator @@@====================="
49+
echo "Downloading ${operator}_definitions.json..."
50+
curl https://raw.githubusercontent.com/instill-ai/operator/main/pkg/${operator}/config/definitions.json -L -s -o ./instill/resources/schema/jsons/${operator}_definitions.json
51+
# tasks
52+
echo "Downloading ${operator}_tasks.json..."
53+
curl https://raw.githubusercontent.com/instill-ai/operator/main/pkg/${operator}/config/tasks.json -L -s -o ./instill/resources/schema/jsons/${operator}_tasks.json
54+
55+
common=$(cat ./instill/resources/schema/jsons/${operator}_tasks.json | jq -rc '."$defs" // empty')
56+
cat ./instill/resources/schema/jsons/${operator}_tasks.json | jq -rc 'to_entries | .[]' | while read line; do
57+
task=$(echo $line | jq -rc '.key')
58+
if [[ "$task" == '$defs' ]]; then
59+
continue
60+
fi
61+
echo $line | jq -rc '.value | to_entries | .[]' | while read inner_line; do
62+
schema=$(echo $inner_line | jq -rc '.value')
63+
io=$(echo $inner_line | jq -rc '.key')
64+
filename=$(echo "${operator}_${task}_${io}" | tr "[:upper:]" "[:lower:]")
65+
if [ ! -z "$common" ]; then
66+
schema=${schema::${#schema}-1}
67+
schema="${schema},\"\$defs\":${common}}"
68+
fi
69+
echo $schema >./instill/resources/schema/jsons/$filename.json
70+
echo "Generating $filename.py..."
71+
datamodel-codegen --strip-default-none --disable-timestamp --use-schema-description --use-title-as-name --input ./instill/resources/schema/jsons/$filename.json --input-file-type jsonschema --output ./instill/resources/schema/$filename.py --output-model-type dataclasses.dataclass
72+
done
73+
done
74+
done

instill/resources/connector.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ def __call__(self, task_inputs: list, mode="execute"):
3636
self.resource.id, task_inputs
3737
)
3838
return resp.outputs
39-
return self.client.pipeline_service.test_connector(
40-
self.resource.id, task_inputs
41-
).state
39+
return self.test()
4240

4341
@property
4442
def client(self):
@@ -56,7 +54,9 @@ def resource(self):
5654
def resource(self, resource: connector_interface.Connector):
5755
self._resource = resource
5856

59-
def create_component(self, name: str, config: dict) -> pipeline_interface.Component:
57+
def _create_component(
58+
self, name: str, config: dict
59+
) -> pipeline_interface.Component:
6060
component = pipeline_interface.Component()
6161
component.id = name
6262
component.definition_name = self.get_definition().name

instill/resources/connector_ai.py

Lines changed: 151 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,179 @@
1-
# pylint: disable=no-member,wrong-import-position,no-name-in-module
1+
# pylint: disable=no-member,wrong-import-position,no-name-in-module,arguments-renamed
2+
import json
3+
from typing import Union
4+
5+
import jsonschema
6+
27
from instill.clients import InstillClient
8+
from instill.protogen.vdp.pipeline.v1beta.pipeline_pb2 import Component
9+
from instill.resources import const
310
from instill.resources.connector import Connector
11+
from instill.resources.schema import (
12+
instill_task_classification_input,
13+
instill_task_detection_input,
14+
instill_task_image_to_image_input,
15+
instill_task_instance_segmentation_input,
16+
instill_task_keypoint_input,
17+
instill_task_ocr_input,
18+
instill_task_semantic_segmentation_input,
19+
instill_task_text_generation_input,
20+
instill_task_text_to_image_input,
21+
instill_task_visual_question_answering_input,
22+
)
23+
from instill.resources.schema.huggingface import HuggingFaceConnectorSpec
24+
from instill.resources.schema.instill import (
25+
InstillModelConnector as InstillModelConnectorConfig,
26+
)
27+
from instill.resources.schema.openai import OpenAIConnectorResource
28+
from instill.resources.schema.stabilityai import StabilityAIConnectorResource
429

530

6-
class InstillModelConnector(Connector):
31+
class HuggingfaceConnector(Connector):
32+
"""Huggingface Connector"""
33+
34+
with open(
35+
f"{const.SPEC_PATH}/huggingface_definitions.json", "r", encoding="utf8"
36+
) as f:
37+
definitions_jsonschema = json.loads(f.read())
38+
739
def __init__(
840
self,
941
client: InstillClient,
1042
name: str,
11-
server_url: str,
43+
config: HuggingFaceConnectorSpec,
44+
) -> None:
45+
definition = "connector-definitions/hugging-face"
46+
47+
jsonschema.validate(vars(config), StabilityAIConnector.definitions_jsonschema)
48+
super().__init__(client, name, definition, vars(config))
49+
50+
51+
class InstillModelConnector(Connector):
52+
"""Instill Model Connector"""
53+
54+
with open(f"{const.SPEC_PATH}/instill_definitions.json", "r", encoding="utf8") as f:
55+
definitions_jsonschema = json.loads(f.read())
56+
57+
def __init__(
58+
self,
59+
client: InstillClient,
60+
config: InstillModelConnectorConfig,
61+
name: str = "model-connector",
1262
) -> None:
1363
definition = "connector-definitions/instill-model"
14-
configuration = {
15-
"api_token": client.pipeline_service.hosts[
16-
client.pipeline_service.instance
17-
].token,
18-
"server_url": server_url,
19-
}
20-
super().__init__(client, name, definition, configuration)
64+
65+
if config.api_token == "": # type: ignore
66+
config.api_token = client.model_service.hosts[ # type: ignore
67+
client.model_service.instance
68+
].token
69+
if config.server_url == "": # type: ignore
70+
config.server_url = "http://api-gateway:8080" # type: ignore
71+
72+
jsonschema.validate(vars(config), InstillModelConnector.definitions_jsonschema)
73+
super().__init__(client, name, definition, vars(config))
74+
75+
def create_component(
76+
self,
77+
name: str,
78+
inp: Union[
79+
instill_task_classification_input.Input,
80+
instill_task_detection_input.Input,
81+
instill_task_instance_segmentation_input.Input,
82+
instill_task_semantic_segmentation_input.Input,
83+
instill_task_keypoint_input.Input,
84+
instill_task_ocr_input.Input,
85+
instill_task_image_to_image_input.Input,
86+
instill_task_text_generation_input.Input,
87+
instill_task_text_to_image_input.Input,
88+
instill_task_visual_question_answering_input.Input,
89+
],
90+
) -> Component:
91+
if isinstance(inp, instill_task_classification_input.Input):
92+
config = {
93+
"input": vars(inp),
94+
"task": "TASK_CLASSIFICATION",
95+
}
96+
if isinstance(inp, instill_task_detection_input.Input):
97+
config = {
98+
"input": vars(inp),
99+
"task": "TASK_DETECTION",
100+
}
101+
if isinstance(inp, instill_task_instance_segmentation_input.Input):
102+
config = {
103+
"input": vars(inp),
104+
"task": "TASK_INSTANCE_SEGMENTATION",
105+
}
106+
if isinstance(inp, instill_task_semantic_segmentation_input.Input):
107+
config = {
108+
"input": vars(inp),
109+
"task": "TASK_SEMANTIC_SEGMENTATION",
110+
}
111+
if isinstance(inp, instill_task_keypoint_input.Input):
112+
config = {
113+
"input": vars(inp),
114+
"task": "TASK_KEYPOINT",
115+
}
116+
if isinstance(inp, instill_task_ocr_input.Input):
117+
config = {
118+
"input": vars(inp),
119+
"task": "TASK_OCR",
120+
}
121+
if isinstance(inp, instill_task_image_to_image_input.Input):
122+
config = {
123+
"input": vars(inp),
124+
"task": "TASK_IMAGE_TO_IMAGE",
125+
}
126+
if isinstance(inp, instill_task_text_generation_input.Input):
127+
config = {
128+
"input": vars(inp),
129+
"task": "TASK_TEXT_GENERATION",
130+
}
131+
if isinstance(inp, instill_task_text_to_image_input.Input):
132+
config = {
133+
"input": vars(inp),
134+
"task": "TASK_TEXT_TO_IMAGE",
135+
}
136+
if isinstance(inp, instill_task_visual_question_answering_input.Input):
137+
config = {
138+
"input": vars(inp),
139+
"task": "TASK_VISUAL_QUESTION_ANSWERING",
140+
}
141+
return super()._create_component(name, config)
21142

22143

23144
class StabilityAIConnector(Connector):
145+
"""Stability AI Connector"""
146+
147+
with open(
148+
f"{const.SPEC_PATH}/stabilityai_definitions.json", "r", encoding="utf8"
149+
) as f:
150+
definitions_jsonschema = json.loads(f.read())
151+
24152
def __init__(
25153
self,
26154
client: InstillClient,
27155
name: str,
28-
api_key: str,
156+
config: StabilityAIConnectorResource,
29157
) -> None:
30158
definition = "connector-definitions/stability-ai"
31-
configuration = {"api_key": api_key}
32-
super().__init__(client, name, definition, configuration)
159+
160+
jsonschema.validate(vars(config), StabilityAIConnector.definitions_jsonschema)
161+
super().__init__(client, name, definition, vars(config))
33162

34163

35164
class OpenAIConnector(Connector):
165+
"""OpenAI Connector"""
166+
167+
with open(f"{const.SPEC_PATH}/openai_definitions.json", "r", encoding="utf8") as f:
168+
definitions_jsonschema = json.loads(f.read())
169+
36170
def __init__(
37171
self,
38172
client: InstillClient,
39173
name: str,
40-
api_key: str,
174+
config: OpenAIConnectorResource,
41175
) -> None:
42176
definition = "connector-definitions/openai"
43-
configuration = {
44-
"api_key": api_key,
45-
}
46-
super().__init__(client, name, definition, configuration)
177+
178+
jsonschema.validate(vars(config), OpenAIConnector.definitions_jsonschema)
179+
super().__init__(client, name, definition, vars(config))

0 commit comments

Comments
 (0)