diff --git a/forte/data/data_pack.py b/forte/data/data_pack.py index d038c95ef..e9fc66a09 100644 --- a/forte/data/data_pack.py +++ b/forte/data/data_pack.py @@ -32,7 +32,6 @@ import numpy as np from sortedcontainers import SortedList - from forte.common.exception import ( ProcessExecutionException, UnknownOntologyClassException, @@ -47,13 +46,13 @@ from forte.data.ontology.core import EntryType from forte.data.ontology.top import ( Annotation, + Grids, Link, Group, SinglePackEntries, Generics, AudioAnnotation, ImageAnnotation, - Grids, Payload, ) @@ -171,8 +170,8 @@ def __init__(self, pack_name: Optional[str] = None): self._data_store: DataStore = DataStore() self._entry_converter: EntryConverter = EntryConverter() self.image_annotations: List[ImageAnnotation] = [] - self.grids: List[Grids] = [] + self.grids: List[Grids] = [] self.text_payloads: List[Payload] = [] self.audio_payloads: List[Payload] = [] self.image_payloads: List[Payload] = [] @@ -244,7 +243,7 @@ def text(self) -> str: @property def audio(self) -> Optional[np.ndarray]: r"""Return the audio of the data pack""" - return self.get_payload_data_at(Modality.Audio, 0) + return cast(np.ndarray, self.get_payload_data_at(Modality.Audio, 0)) @property def all_annotations(self) -> Iterator[Annotation]: @@ -448,15 +447,12 @@ def get_payload_at( supported_modality = [enum.name for enum in Modality] try: - # if modality.name == "text": if modality == Modality.Text: payloads_length = len(self.text_payloads) payload = self.text_payloads[payload_index] - # elif modality.name == "audio": elif modality == Modality.Audio: payloads_length = len(self.audio_payloads) payload = self.audio_payloads[payload_index] - # elif modality.name == "image": elif modality == Modality.Image: payloads_length = len(self.image_payloads) payload = self.image_payloads[payload_index] @@ -569,7 +565,7 @@ def set_text( # temporary solution for backward compatibility # past API use this method to add a single text in the datapack if len(self.text_payloads) == 0 and text_payload_index == 0: - from ft.onto.base_ontology import ( # pylint: disable=import-outside-toplevel + from ft.onto.payload_ontology import ( # pylint: disable=import-outside-toplevel TextPayload, ) @@ -601,7 +597,7 @@ def set_audio( # temporary solution for backward compatibility # past API use this method to add a single audio in the datapack if len(self.audio_payloads) == 0 and audio_payload_index == 0: - from ft.onto.base_ontology import ( # pylint: disable=import-outside-toplevel + from ft.onto.payload_ontology import ( # pylint: disable=import-outside-toplevel AudioPayload, ) diff --git a/forte/data/ontology/top.py b/forte/data/ontology/top.py index 39688b906..d962faed1 100644 --- a/forte/data/ontology/top.py +++ b/forte/data/ontology/top.py @@ -63,6 +63,7 @@ "Box", "BoundingBox", "Payload", + "Meta", ] QueryType = Union[Dict[str, Any], np.ndarray] @@ -1253,7 +1254,7 @@ def __init__( payload_idx: int = 0, uri: Optional[str] = None, ): - from ft.onto.base_ontology import ( # pylint: disable=import-outside-toplevel + from ft.onto.payload_ontology import ( # pylint: disable=import-outside-toplevel TextPayload, AudioPayload, ImagePayload, @@ -1282,6 +1283,12 @@ def __init__( self.replace_back_operations: Sequence[Tuple] = [] self.processed_original_spans: Sequence[Tuple] = [] self.orig_text_len: int = 0 + self.payloading = None + self.meta = None + + def set_meta(self, meta): + # there might be a better way to set meta + self.meta = meta def get_type(self) -> type: """ @@ -1324,11 +1331,22 @@ def payload_index(self) -> int: @property def uri(self) -> Optional[str]: + """ + Universal resource identifier of the data source. + + Returns: + Optional[str]: Universal resource identifier of the data source. + """ return self._uri + def load(self): + fn = self.payloading.route(self.meta) + self._cache = fn(self.uri) + def set_cache(self, data: Union[str, np.ndarray]): """ - Load cache data into the payload. + Set cache data in the the payload. This method can be useful when users + want to set a new cache. Args: data: data to be set in the payload. It can be str for text data or @@ -1365,6 +1383,14 @@ def __setstate__(self, state): self._modality = getattr(Modality, state["_modality"]) +class Meta(Generics): + def __init__( + self, pack: PackType + ): # pylint: disable=useless-super-delegation + super().__init__(pack) + self.source_type = None # data source type, it can be 'local` or 'web' + + SinglePackEntries = ( Link, Group, @@ -1373,5 +1399,6 @@ def __setstate__(self, state): AudioAnnotation, ImageAnnotation, Payload, + Meta, ) MultiPackEntries = (MultiPackLink, MultiPackGroup, MultiPackGeneric) diff --git a/forte/ontology_specs/base_ontology.json b/forte/ontology_specs/base_ontology.json index f3d854301..8b78809f7 100644 --- a/forte/ontology_specs/base_ontology.json +++ b/forte/ontology_specs/base_ontology.json @@ -444,29 +444,6 @@ "type": "str" } ] - }, - { - "entry_name": "ft.onto.base_ontology.AudioPayload", - "parent_entry": "forte.data.ontology.top.Payload", - "description": "A payload that caches audio data", - "attributes":[ - { - "name": "sample_rate", - "type": "int" - } - ] - }, - { - "entry_name": "ft.onto.base_ontology.TextPayload", - "parent_entry": "forte.data.ontology.top.Payload", - "description": "A payload that caches text data", - "attributes": [] - }, - { - "entry_name": "ft.onto.base_ontology.ImagePayload", - "parent_entry": "forte.data.ontology.top.Payload", - "description": "A payload that caches image data", - "attributes":[] } ] } diff --git a/forte/ontology_specs/payload_ontology.json b/forte/ontology_specs/payload_ontology.json new file mode 100644 index 000000000..ef35841b6 --- /dev/null +++ b/forte/ontology_specs/payload_ontology.json @@ -0,0 +1,76 @@ +{ + "name": "payload_ontology", + "definitions": [ + { + "entry_name": "ft.onto.payload_ontology.AudioPayload", + "parent_entry": "forte.data.ontology.top.Payload", + "description": "A payload that caches audio data", + "attributes":[ + { + "name": "sample_rate", + "type": "int" + } + ] + }, + { + "entry_name": "ft.onto.payload_ontology.TextPayload", + "parent_entry": "forte.data.ontology.top.Payload", + "description": "A payload that caches text data", + "attributes": [] + }, + { + "entry_name": "ft.onto.payload_ontology.ImagePayload", + "parent_entry": "forte.data.ontology.top.Payload", + "description": "A payload that caches image data", + "attributes":[] + }, + { + "entry_name": "ft.onto.payload_ontology.JpegMeta", + "parent_entry": "forte.data.ontology.top.Meta", + "attributes":[ + { + "name": "extension", + "type": "str" + }, + {"name": "mime", + "type": "str"}, + {"name": "type_code", + "type": "str"}, + {"name": "version", + "type": "str"} + ] + }, + { + "entry_name": "ft.onto.payload_ontology.AudioMeta", + "parent_entry": "forte.data.ontology.top.Meta", + "attributes":[ + { + "name": "sample_rate", + "type": "int" + }, + {"name": "channels", + "type": "int"}, + {"name": "bits_per_sample", + "type": "int"}, + {"name": "duration", + "type": "float"}, + {"name": "bitrate", + "type": "int"}, + {"name": "encoding", + "type": "str"}, + {"name": "dtype", + "type": "str"} + ] + }, + { + "entry_name": "ft.onto.payload_ontology.JpegPayload", + "parent_entry": "ft.onto.payload_ontology.ImagePayload", + "attributes":[ + { + "name": "meta", + "type": "ft.onto.payload_ontology.JpegMeta" + } + ] + } + ] +} diff --git a/forte/utils/payload_factory.py b/forte/utils/payload_factory.py new file mode 100644 index 000000000..e5b419d9e --- /dev/null +++ b/forte/utils/payload_factory.py @@ -0,0 +1,159 @@ +from abc import abstractmethod +from multiprocessing.sharedctypes import Value +from typing import Callable +import numpy as np +from forte.data.ontology.top import Meta + + +class PayloadFactory: + """ + A class that handles the creation of payloads. It can register meta data and check its validity. + """ + + def __init__(self): + self.valid_meta = ( + {} + ) # map from meta data name to function that handles this meta data + + def register(self, meta: Meta): + """ + A function that registers a meta data type into the factory. + + Args: + meta_name: a Generic object that is used to register a Payload meta data type. + """ + if meta.source_type not in ("web", "local"): + raise ValueError("Meta data source must be either 'web' or 'local'") + self.valid_meta[type(meta)] = True + + def check_meta(self, meta): + return type(meta) in self.valid_meta + + +class Payloading: + """ + An class that help mapping meta data to loading function. + """ + + def __init__(self): + self._factory = PayloadFactory() + + def load_factory(self, factory): + self._factory = factory + + @abstractmethod + def route(self, meta): + """ + Convert the meta into a loading function that takes uri and read data + from the uri. + + Args: + meta: Meta data + + Returns: + a function that takes uri and read data from the uri. + """ + pass + + +class ImagePayloading(Payloading): + """ + A class that helps mapping Image meta data to loading function. + """ + + def route(self, meta) -> Callable: + """ + A function that takes and analyzes a meta data and returns a + corresponding loading function. + + Args: + meta: a Meta object that is used to determine the loading function. + + Raises: + ValueError: if meta is not a valid meta data. + + Returns: + a function that takes uri and read data from the uri, and it returns the data. + """ + if not self._factory.check_meta(meta): + raise ValueError(f"Meta data{meta} not supported") + if not meta.source_type in ("web", "local"): + raise ValueError("Meta data source must be either 'web' or 'local'") + + if meta.source_type == "local": + try: + import matplotlib.pyplot as plt # pylint: disable=import-outside-toplevel + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "ImagePayloading reading local file requires `matplotlib`" + "package to be installed." + ) from e + return plt.imread + else: + try: + from PIL import Image # pylint: disable=import-outside-toplevel + import requests # pylint: disable=import-outside-toplevel + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "ImagePayloading reading web file requires `PIL` and" + "`requests` packages to be installed." + ) from e + + def read_uri(uri): + # customize this function to read data from uri + uri_obj = requests.get(uri, stream=True) + pil_image = Image.open(uri_obj.raw) + return self._pil_to_nparray(pil_image) + + return read_uri + + def _pil_to_nparray(self, pil_image): + return np.asarray(pil_image) + + +class AudioPayloading(Payloading): + def route(self, meta) -> Callable: + """ + A function that takes and analyzes an audio meta data and returns a + corresponding loading function. + + Args: + meta: an Meta ontology object that represent audio meta data. + + Returns: + a callable function that takes uri and read data from the uri, and it returns the data. + """ + if meta.source_type == "local": + try: + import soundfile # pylint: disable=import-outside-toplevel + except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "AudioPayloading requires 'soundfile' package to be installed." + " You can refer to [extra modules to install]('pip install" + " forte['audio_ext']) or 'pip install forte" + ". Note that additional steps might apply to Linux" + " users (refer to " + "https://pysoundfile.readthedocs.io/en/latest/#installation)." + ) from e + + def get_first( + seq, + ): # takes the first item as soundfile returns a tuple of (data, samplerate) + return seq[0] + + def read_uri(uri): + if meta.encoding is None: # data type is ".raw" + return get_first( + soundfile.read( + file=uri, + samplerate=meta.sample_rate, + channels=meta.channels, + dtype=meta.dtype, + ) + ) + else: # sound file auto detect the + return get_first(soundfile.read(file=uri)) + + return read_uri + else: + pass diff --git a/ft/onto/base_ontology.py b/ft/onto/base_ontology.py index 678b4c63e..c94e5837e 100644 --- a/ft/onto/base_ontology.py +++ b/ft/onto/base_ontology.py @@ -19,7 +19,6 @@ from forte.data.ontology.top import Group from forte.data.ontology.top import Link from forte.data.ontology.top import MultiPackLink -from forte.data.ontology.top import Payload from typing import Dict from typing import Iterable from typing import List @@ -54,9 +53,6 @@ "MRCQuestion", "Recording", "AudioUtterance", - "AudioPayload", - "TextPayload", - "ImagePayload", ] @@ -311,7 +307,12 @@ class PredicateLink(Link): ParentType = PredicateMention ChildType = PredicateArgument - def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Optional[Entry] = None): + def __init__( + self, + pack: DataPack, + parent: Optional[Entry] = None, + child: Optional[Entry] = None, + ): super().__init__(pack, parent, child) self.arg_type: Optional[str] = None @@ -331,7 +332,12 @@ class Dependency(Link): ParentType = Token ChildType = Token - def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Optional[Entry] = None): + def __init__( + self, + pack: DataPack, + parent: Optional[Entry] = None, + child: Optional[Entry] = None, + ): super().__init__(pack, parent, child) self.dep_label: Optional[str] = None self.rel_type: Optional[str] = None @@ -340,7 +346,7 @@ def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Option @dataclass class EnhancedDependency(Link): """ - A `Link` type entry which represent a enhanced dependency: + A `Link` type entry which represent a enhanced dependency: https://universaldependencies.org/u/overview/enhanced-syntax.html Attributes: dep_label (Optional[str]): The enhanced dependency label in Universal Dependency. @@ -351,7 +357,12 @@ class EnhancedDependency(Link): ParentType = Token ChildType = Token - def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Optional[Entry] = None): + def __init__( + self, + pack: DataPack, + parent: Optional[Entry] = None, + child: Optional[Entry] = None, + ): super().__init__(pack, parent, child) self.dep_label: Optional[str] = None @@ -369,7 +380,12 @@ class RelationLink(Link): ParentType = EntityMention ChildType = EntityMention - def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Optional[Entry] = None): + def __init__( + self, + pack: DataPack, + parent: Optional[Entry] = None, + child: Optional[Entry] = None, + ): super().__init__(pack, parent, child) self.rel_type: Optional[str] = None @@ -387,7 +403,12 @@ class CrossDocEntityRelation(MultiPackLink): ParentType = EntityMention ChildType = EntityMention - def __init__(self, pack: MultiPack, parent: Optional[Entry] = None, child: Optional[Entry] = None): + def __init__( + self, + pack: MultiPack, + parent: Optional[Entry] = None, + child: Optional[Entry] = None, + ): super().__init__(pack, parent, child) self.rel_type: Optional[str] = None @@ -400,7 +421,9 @@ class CoreferenceGroup(Group): MemberType = EntityMention - def __init__(self, pack: DataPack, members: Optional[Iterable[Entry]] = None): + def __init__( + self, pack: DataPack, members: Optional[Iterable[Entry]] = None + ): super().__init__(pack, members) @@ -417,7 +440,12 @@ class EventRelation(Link): ParentType = EventMention ChildType = EventMention - def __init__(self, pack: DataPack, parent: Optional[Entry] = None, child: Optional[Entry] = None): + def __init__( + self, + pack: DataPack, + parent: Optional[Entry] = None, + child: Optional[Entry] = None, + ): super().__init__(pack, parent, child) self.rel_type: Optional[str] = None @@ -435,7 +463,12 @@ class CrossDocEventRelation(MultiPackLink): ParentType = EventMention ChildType = EventMention - def __init__(self, pack: MultiPack, parent: Optional[Entry] = None, child: Optional[Entry] = None): + def __init__( + self, + pack: MultiPack, + parent: Optional[Entry] = None, + child: Optional[Entry] = None, + ): super().__init__(pack, parent, child) self.rel_type: Optional[str] = None @@ -457,8 +490,8 @@ class ConstituentNode(Annotation): sentiment: Dict[str, float] is_root: Optional[bool] is_leaf: Optional[bool] - parent_node: Optional['ConstituentNode'] - children_nodes: FList['ConstituentNode'] + parent_node: Optional["ConstituentNode"] + children_nodes: FList["ConstituentNode"] def __init__(self, pack: DataPack, begin: int, end: int): super().__init__(pack, begin, end) @@ -466,8 +499,8 @@ def __init__(self, pack: DataPack, begin: int, end: int): self.sentiment: Dict[str, float] = dict() self.is_root: Optional[bool] = None self.is_leaf: Optional[bool] = None - self.parent_node: Optional['ConstituentNode'] = None - self.children_nodes: FList['ConstituentNode'] = FList(self) + self.parent_node: Optional["ConstituentNode"] = None + self.children_nodes: FList["ConstituentNode"] = FList(self) @dataclass @@ -492,7 +525,6 @@ def __init__(self, pack: DataPack, begin: int, end: int): @dataclass class MCOption(Annotation): - def __init__(self, pack: DataPack, begin: int, end: int): super().__init__(pack, begin, end) @@ -560,38 +592,3 @@ class AudioUtterance(AudioAnnotation): def __init__(self, pack: DataPack, begin: int, end: int): super().__init__(pack, begin, end) self.speaker: Optional[str] = None - - -@dataclass -class AudioPayload(Payload): - """ - A payload that caches audio data - Attributes: - sample_rate (Optional[int]): - """ - - sample_rate: Optional[int] - - def __init__(self, pack: DataPack, payload_idx: int = 0, uri: Optional[str] = None): - super().__init__(pack, payload_idx, uri) - self.sample_rate: Optional[int] = None - - -@dataclass -class TextPayload(Payload): - """ - A payload that caches text data - """ - - def __init__(self, pack: DataPack, payload_idx: int = 0, uri: Optional[str] = None): - super().__init__(pack, payload_idx, uri) - - -@dataclass -class ImagePayload(Payload): - """ - A payload that caches image data - """ - - def __init__(self, pack: DataPack, payload_idx: int = 0, uri: Optional[str] = None): - super().__init__(pack, payload_idx, uri) diff --git a/ft/onto/payload_ontology.py b/ft/onto/payload_ontology.py new file mode 100644 index 000000000..a5d03f0f1 --- /dev/null +++ b/ft/onto/payload_ontology.py @@ -0,0 +1,141 @@ +# ***automatically_generated*** +# ***source json:forte/ontology_specs/payload_ontology.json*** +# flake8: noqa +# mypy: ignore-errors +# pylint: skip-file +""" +Automatically generated ontology payload_ontology. Do not change manually. +""" + +from dataclasses import dataclass +from forte.data.data_pack import DataPack +from forte.data.ontology.top import Meta +from forte.data.ontology.top import Payload +from typing import Optional + +__all__ = [ + "AudioPayload", + "TextPayload", + "ImagePayload", + "JpegMeta", + "AudioMeta", + "JpegPayload", +] + + +@dataclass +class AudioPayload(Payload): + """ + A payload that caches audio data + Attributes: + sample_rate (Optional[int]): + """ + + sample_rate: Optional[int] + + def __init__( + self, pack: DataPack, payload_idx: int = 0, uri: Optional[str] = None + ): + super().__init__(pack, payload_idx, uri) + self.sample_rate: Optional[int] = None + + +@dataclass +class TextPayload(Payload): + """ + A payload that caches text data + """ + + def __init__( + self, pack: DataPack, payload_idx: int = 0, uri: Optional[str] = None + ): + super().__init__(pack, payload_idx, uri) + + +@dataclass +class ImagePayload(Payload): + """ + A payload that caches image data + """ + + def __init__( + self, pack: DataPack, payload_idx: int = 0, uri: Optional[str] = None + ): + super().__init__(pack, payload_idx, uri) + + +@dataclass +class JpegMeta(Meta): + """ + Attributes: + extension (Optional[str]): + mime (Optional[str]): + type_code (Optional[str]): + version (Optional[str]): + source_type (Optional[str]): + """ + + extension: Optional[str] + mime: Optional[str] + type_code: Optional[str] + version: Optional[str] + source_type: Optional[str] + + def __init__(self, pack: DataPack): + super().__init__(pack) + self.extension: Optional[str] = None + self.mime: Optional[str] = None + self.type_code: Optional[str] = None + self.version: Optional[str] = None + self.source_type: Optional[str] = None + + +@dataclass +class AudioMeta(Meta): + """ + Attributes: + sample_rate (Optional[int]): + channels (Optional[int]): + bits_per_sample (Optional[int]): + duration (Optional[float]): + bitrate (Optional[int]): + encoding (Optional[str]): + source_type (Optional[str]): + dtype (Optional[str]): + """ + + sample_rate: Optional[int] + channels: Optional[int] + bits_per_sample: Optional[int] + duration: Optional[float] + bitrate: Optional[int] + encoding: Optional[str] + source_type: Optional[str] + dtype: Optional[str] + + def __init__(self, pack: DataPack): + super().__init__(pack) + self.sample_rate: Optional[int] = None + self.channels: Optional[int] = None + self.bits_per_sample: Optional[int] = None + self.duration: Optional[float] = None + self.bitrate: Optional[int] = None + self.encoding: Optional[str] = None + self.source_type: Optional[str] = None + self.dtype: Optional[str] = None + + +@dataclass +class JpegPayload(ImagePayload): + """ + Attributes: + meta (Optional[JpegMeta]): + """ + + meta: Optional[JpegMeta] + + def __init__( + self, pack: DataPack, payload_idx: int = 0, uri: Optional[str] = None + ): + super().__init__(pack, payload_idx, uri) + self.meta: Optional[JpegMeta] = None diff --git a/ocr.ipynb b/ocr.ipynb new file mode 100644 index 000000000..b11ee65b6 --- /dev/null +++ b/ocr.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/murphy/anaconda3/envs/forte_org_env/lib/python3.7/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "/media/NAS/NLP/Petuum/forte/forte/data/ontology/ontology_code_generator.py:406: UserWarning: No `__init__` function found in the class Meta of the module .\n", + " f\"No `__init__` function found in the class\"\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "from forte.data.data_pack import DataPack\n", + "from forte.pipeline import Pipeline\n", + "from forte.data.data_pack import DataPack\n", + "from forte.pipeline import Pipeline\n", + "from typing import Any, Iterator\n", + "\n", + "from ft.onto.payload_ontology import ImagePayload\n", + "\n", + "from PIL import Image\n", + "from forte.data.base_reader import PackReader" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "class OcrReader(PackReader):\n", + " r\"\"\"\n", + " :class:`StringReader` is designed to read in a list of string variables.\n", + " \"\"\"\n", + "\n", + " # pylint: disable=unused-argument\n", + " def _cache_key_function(self, collection) -> str:\n", + " return str(hash(collection)) + \".html\"\n", + "\n", + " def _collect(self, image_path) -> Iterator[Any]: # type: ignore\n", + " r\"\"\"Should be called with param ``Image_directory`` which is a path to a\n", + " folder containing Image files.\n", + "\n", + " Args:\n", + " Image_directory: Image directory containing the files.\n", + "\n", + " Returns: Iterator over paths to Image files\n", + " \"\"\"\n", + " # construct ImageMeta and store it in DataPack\n", + " return image_path\n", + "\n", + " def _parse_pack(self, data_source: str) -> Iterator[DataPack]:\n", + " r\"\"\"Takes a raw string and converts into a :class:`~forte.data.data_pack.DataPack`.\n", + "\n", + " Args:\n", + " data_source: str that contains text of a document.\n", + "\n", + " Returns: :class:`~forte.data.data_pack.DataPack` containing Document.\n", + " \"\"\"\n", + " pack: DataPack = DataPack()\n", + " payload_idx = 0\n", + " # Read in Image data and store in DataPack\n", + " # add Image payload into DataPack.payloads\n", + " ip = ImagePayload(pack, payload_idx)\n", + " try:\n", + " import pytesseract\n", + " except ModuleNotFoundError as e:\n", + " raise ModuleNotFoundError(\n", + " \"OcrReader requires 'pytesseract' package to be installed.\"\n", + " ) from e\n", + " image_data = Image.open(data_source) # lazy loading\n", + " ip.set_cache(image_data)\n", + " ocr_text = pytesseract.image_to_string(image_data)\n", + " pack.set_text(ocr_text)\n", + " pack.pack_name = data_source\n", + " \n", + " yield pack\n", + "\n", + "\n", + "# TODO: split ocr part into a processor" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/media/NAS/NLP/Petuum/forte/forte/data/ontology/ontology_code_generator.py:406: UserWarning: No `__init__` function found in the class Meta of the module .\n", + " f\"No `__init__` function found in the class\"\n", + "WARNING:root:Re-declared a new class named [ConstituentNode], which is probably used in import.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> \u001b[0;32m/tmp/ipykernel_51872/2424896200.py\u001b[0m(47)\u001b[0;36m_parse_pack\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m 45 \u001b[0;31m \u001b[0mpack\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpack_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_source\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 46 \u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m---> 47 \u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'dafdsad'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 48 \u001b[0;31m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\u001b[0;32m 49 \u001b[0;31m \u001b[0;32myield\u001b[0m \u001b[0mpack\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0m\n", + "\n", + "\n" + ] + }, + { + "ename": "BdbQuit", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mBdbQuit\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_51872/1522021614.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# pipeline.run([\"ocr.jpg\"])\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mpack\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprocess_one\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"ocr.jpg\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpack\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/NAS/NLP/Petuum/forte/forte/pipeline.py\u001b[0m in \u001b[0;36mprocess_one\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 996\u001b[0m \u001b[0mfirst_pack\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 998\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 999\u001b[0m \u001b[0mfirst_pack\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1000\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/NAS/NLP/Petuum/forte/forte/data/base_reader.py\u001b[0m in \u001b[0;36miter\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;31m# Read via parsing dataset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mpack\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lazy_iter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 284\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_type_consistency\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpack\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_meta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"record\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/NAS/NLP/Petuum/forte/forte/data/base_reader.py\u001b[0m in \u001b[0;36m_lazy_iter\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0mnot_first\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 216\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mpack\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_pack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcollection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 217\u001b[0m \u001b[0;31m# write to the cache if _cache_directory specified\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cache_directory\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/media/NAS/NLP/Petuum/forte/forte/data/base_reader.py\u001b[0m in \u001b[0;36mparse_pack\u001b[0;34m(self, collection)\u001b[0m\n\u001b[1;32m 145\u001b[0m )\n\u001b[1;32m 146\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parse_pack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcollection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 148\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_all_remaining_entries\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/tmp/ipykernel_51872/2424896200.py\u001b[0m in \u001b[0;36m_parse_pack\u001b[0;34m(self, data_source)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mpack\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpack_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_source\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'dafdsad'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mpack\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/tmp/ipykernel_51872/2424896200.py\u001b[0m in \u001b[0;36m_parse_pack\u001b[0;34m(self, data_source)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mpack\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpack_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_source\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m;\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'dafdsad'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mpack\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/forte_org_env/lib/python3.7/bdb.py\u001b[0m in \u001b[0;36mtrace_dispatch\u001b[0;34m(self, frame, event, arg)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;31m# None\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'line'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_line\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'call'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/forte_org_env/lib/python3.7/bdb.py\u001b[0m in \u001b[0;36mdispatch_line\u001b[0;34m(self, frame)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstop_here\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbreak_here\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muser_line\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 113\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquitting\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mBdbQuit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 114\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrace_dispatch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mBdbQuit\u001b[0m: " + ] + } + ], + "source": [ + "pipeline = Pipeline[DataPack]()\n", + "pipeline.set_reader(\n", + " OcrReader()\n", + ")\n", + "pipeline.initialize()\n", + "\n", + "for pack in pipeline.process_one([\"ocr.jpg\"]):\n", + " print(pack.text)\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.11 ('forte_org_env')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.11" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "ee55265381053916df67dbe70c8ab208ce4688dabc900b19af91d526111b9fa4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ocr.jpg b/ocr.jpg new file mode 100644 index 000000000..da0e6f68c Binary files /dev/null and b/ocr.jpg differ diff --git a/tests/forte/data/readers/audio_reader_test.py b/tests/forte/data/readers/audio_reader_test.py index 419976370..cda3f774f 100644 --- a/tests/forte/data/readers/audio_reader_test.py +++ b/tests/forte/data/readers/audio_reader_test.py @@ -28,7 +28,6 @@ from forte.data.readers import AudioReader from forte.pipeline import Pipeline from forte.processors.base.pack_processor import PackProcessor -from ft.onto.base_ontology import TextPayload class TestASRProcessor(PackProcessor): diff --git a/tests/forte/grids_test.py b/tests/forte/grids_test.py index df54a28a8..408e6ff26 100644 --- a/tests/forte/grids_test.py +++ b/tests/forte/grids_test.py @@ -16,7 +16,7 @@ """ import unittest from forte.data.modality import Modality -from ft.onto.base_ontology import ImagePayload +from ft.onto.payload_ontology import ImagePayload import numpy as np from numpy import array_equal diff --git a/tests/forte/image_annotation_test.py b/tests/forte/image_annotation_test.py index 35f523c80..ef0ec79c2 100644 --- a/tests/forte/image_annotation_test.py +++ b/tests/forte/image_annotation_test.py @@ -21,7 +21,7 @@ from numpy import array_equal from forte.data.ontology.top import ImageAnnotation -from ft.onto.base_ontology import ImagePayload +from ft.onto.payload_ontology import ImagePayload from forte.data.data_pack import DataPack import unittest diff --git a/tests/forte/utils/payload_factory_test.py b/tests/forte/utils/payload_factory_test.py new file mode 100644 index 000000000..47b5163d4 --- /dev/null +++ b/tests/forte/utils/payload_factory_test.py @@ -0,0 +1,102 @@ +# Copyright 2021 The Forte Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit test for payload factory. +""" +import os +import unittest +from forte.utils.payload_factory import ( + AudioPayloading, + ImagePayloading, + PayloadFactory, +) +from ft.onto.payload_ontology import ( + AudioPayload, + ImagePayload, + JpegMeta, + AudioMeta, +) +from forte.data.data_pack import DataPack + + +class PayloadFactoryTest(unittest.TestCase): + """ + Test PayloadFactory. + """ + + def setUp(self): + + self.f = PayloadFactory() + + def test_image_payloading(self): + datapack = DataPack("image") + img_meta = JpegMeta(datapack) + img_meta.source_type = "local" + + self.f.register(img_meta) + + # 2. each payloading intialized with a factory + payloading = ImagePayloading() + # payload loads the factory with registered meta data + payloading.load_factory(self.f) + + # payloading = UriImagePayloading() + fn = payloading.route(img_meta) + + # 3. datapack and payload + + uri = "test.png" + # uri = "https://assets.website-files.com/6241e60ecd4aa2049d61387c/62576e00dd225cf869b24e0f_61f880d055d4f6f2497fb3cc_symphony-EDITOR-p-1080.jpeg" + ip = ImagePayload(datapack, 0, uri=uri) + ip.payloading = payloading + ip.set_meta(img_meta) # maybe only store a meta name in ip + ip.load() + print(ip.cache) + + def test_audio_payloading(self): + datapack = DataPack("audio") + audio_meta = AudioMeta(datapack) + audio_meta.source_type = "local" + audio_meta.sample_rate = 44100 + audio_meta.channels = 2 + audio_meta.dtype = "float64" + audio_meta.encoding = "flac" + self.f.register(audio_meta) + + # 2. each payloading intialized with a factory + payloading = AudioPayloading() + # payload loads the factory with registered meta data + payloading.load_factory(self.f) + + fn = payloading.route(audio_meta) + + # 3. datapack and payload + + uri = ( + os.path.abspath( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + os.pardir, + os.pardir, + os.pardir, + "data_samples/audio_reader_test", + ) + ) + + "/test_audio_0.flac" + ) + ap = AudioPayload(datapack, 0, uri=uri) + ap.payloading = payloading + ap.set_meta(audio_meta) # maybe only store a meta name in ap + ap.load() + print("Audio payload data:", ap.cache)