From 385ad174cdc46ca7dc6342519a1986374d7c9310 Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 14 Feb 2023 16:18:23 +0400 Subject: [PATCH 01/10] Inital commit for adding basic (lower) level interfaces for attributes at Data_Store level (3 methods added, and corresponding unit test codes --- forte/data/data_store.py | 220 ++++++++++++++++++++++++++++ tests/forte/data/data_store_test.py | 104 +++++++++++++ 2 files changed, 324 insertions(+) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 2f0c660d8..ca03f99db 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1487,6 +1487,226 @@ def get_attribute(self, tid: int, attr_name: str) -> Any: return entry[attr_id] + def get_attributes_of_tid(self, tid: int, attr_names: [str]) -> dict: + r"""This function returns the value of attributes listed in + ``attr_names`` for the entry with ``tid``. It locates the entry data + with ``tid`` and finds attributes listed in ``attr_names`` and return + as a dict. + + Args: + tid: Unique id of the entry. + attr_names: List of names of the attribute. + + Returns: + A dict with keys listed in ``attr_names`` for attributes of the + entry with ``tid``. + + Raises: + KeyError: when ``tid`` or ``attr_name`` is not found. + """ + entry, entry_type = self.get_entry(tid) + attrs: dict = {} + for attr_name in attr_names: + try: + attr_id = self._get_type_attribute_dict(entry_type)[attr_name][ + constants.ATTR_INDEX_KEY + ] + except KeyError as e: + raise KeyError(f"{entry_type} has no {attr_name} attribute.") from e + attrs[attr_name] = entry[attr_id] + + return attrs + + def get_attributes_of_tids(self, tids: [int], attr_names: [str]): + r"""This function returns the value of attributes listed in + ``attr_names`` for entries in listed in the ``tids``. It locates + the entries data with ``tid`` and put attributes listed in + ``attr_name`` in a dict for each entry (tid). + + Args: + tids: List of unique ids (tids) of the entry. + attr_names: List of name of the attribute. + + Returns: + A list of dict with ``attr_name`` as key for atrributes + of the entries listed in``tids``. + + Raises: + KeyError: when ``tid`` or ``attr_name`` is not found. + """ + tids_attrs = [] + for tid in tids: + entry, entry_type = self.get_entry(tid) + attrs: dict = {} + for attr_name in attr_names: + try: + attr_id = self._get_type_attribute_dict(entry_type)[attr_name][ + constants.ATTR_INDEX_KEY + ] + except KeyError as e: + raise KeyError(f"{entry_type} has no {attr_name} attribute.") from e + attrs[attr_name] = (entry[attr_id]) + + tids_attrs.append(attrs) + + return tids_attrs + + def get_attributes_of_type( + self, + type_name: str, + attributes_names: [str], + include_sub_type: bool = True, + range_span: Optional[Tuple[int, int]] = None, + ) -> Iterator[List]: + r"""This function fetches required attributes of entries from the + data store of type ``type_name``. If `include_sub_type` is set to + True and ``type_name`` is in [Annotation], this function also + fetches entries of subtype of ``type_name``. Otherwise, it only + fetches entries of type ``type_name``. + + Args: + type_name: The fully qualified name of the entry. + attributes_names: list of attributes to be fetched for each entry + include_sub_type: A boolean to indicate whether get its subclass. + range_span: A tuple that contains the begin and end indices + of the searching range of entries. + + Returns: + An iterator of the attributes of the entry in dict matching the + provided arguments. + """ + + # def within_range(entry: List[Any], range_span: Tuple[int, int]) -> bool: + # """ + # A helper function for deciding whether an annotation entry is + # inside the `range_span`. + # """ + # begin = self.get_datastore_attr_idx( + # entry[constants.ENTRY_TYPE_INDEX], constants.BEGIN_ATTR_NAME + # ) + # end = self.get_datastore_attr_idx( + # entry[constants.ENTRY_TYPE_INDEX], constants.END_ATTR_NAME + # ) + # + # if not self._is_annotation(entry[constants.ENTRY_TYPE_INDEX]): + # return False + # return entry[begin] >= range_span[0] and entry[end] <= range_span[1] + + entry_class = get_class(type_name) + all_types = set() + if include_sub_type: + for type in self.__elements: + if issubclass(get_class(type), entry_class): + all_types.add(type) + else: + all_types.add(type_name) + all_types = list(all_types) + all_types.sort() + + if self._is_annotation(type_name): + if range_span is None: + #yield from self.co_iterator_annotation_like(all_types) + for entry in self.co_iterator_annotation_like(all_types): + attrs: dict = {} + attrs["tid"] = entry[0] + for attr_name in attributes_names: + try: + attr_id = self._get_type_attribute_dict(type_name)[attr_name][ + constants.ATTR_INDEX_KEY + ] + except KeyError as e: + raise KeyError(f"{type_name} has no {attr_name} attribute.") from e + attrs[attr_name] = entry[attr_id] + + yield attrs + else: + for entry in self.co_iterator_annotation_like( + all_types, range_span=range_span + ): + attrs: dict = {} + attrs["tid"] = entry[0] + for attr_name in attributes_names: + try: + attr_id = self._get_type_attribute_dict(type_name)[attr_name][ + constants.ATTR_INDEX_KEY + ] + except KeyError as e: + raise KeyError(f"{type_name} has no {attr_name} attribute.") from e + attrs[attr_name] = entry[attr_id] + + yield attrs #attrs instead of entry + elif issubclass(entry_class, Link): + raise NotImplementedError(f"{type_name} of Link is not currently supported.") + # for type in all_types: + # if range_span is None: + # yield from self.iter(type) + # else: + # for entry in self.__elements[type]: + # parent_idx = self.get_datastore_attr_idx( + # entry[constants.ENTRY_TYPE_INDEX], + # constants.PARENT_TID_ATTR_NAME, + # ) + # child_idx = self.get_datastore_attr_idx( + # entry[constants.ENTRY_TYPE_INDEX], + # constants.CHILD_TID_ATTR_NAME, + # ) + # + # if (entry[parent_idx] in self.__tid_ref_dict) and ( + # entry[child_idx] in self.__tid_ref_dict + # ): + # parent = self.__tid_ref_dict[entry[parent_idx]] + # child = self.__tid_ref_dict[entry[child_idx]] + # if within_range( + # parent, range_span + # ) and within_range(child, range_span): + # yield entry + elif issubclass(entry_class, Group): + raise NotImplementedError(f"{type_name} of Group is not currently supported.") + # for type in all_types: + # if range_span is None: + # yield from self.iter(type) + # else: + # for entry in self.__elements[type]: + # member_type_idx = self.get_datastore_attr_idx( + # entry[constants.ENTRY_TYPE_INDEX], + # constants.MEMBER_TYPE_ATTR_NAME, + # ) + # members_idx = self.get_datastore_attr_idx( + # entry[constants.ENTRY_TYPE_INDEX], + # constants.MEMBER_TID_ATTR_NAME, + # ) + # + # member_type = entry[member_type_idx] + # if self._is_annotation(member_type): + # members = entry[members_idx] + # within = True + # for m in members: + # e = self.__tid_ref_dict[m] + # if not within_range(e, range_span): + # within = False + # break + # if within: + # yield entry + else: + # Only fetches entries of type ``type_name`` when it's not in + # [Annotation, Group, List]. + if type_name not in self.__elements: + raise ValueError(f"type {type_name} does not exist") + #yield from self.iter(type_name) + for entry in self.iter(type_name): + attrs: dict = {} + attrs["tid"] = entry[0] + for attr_name in attributes_names: + try: + attr_id = self._get_type_attribute_dict(type_name)[attr_name][ + constants.ATTR_INDEX_KEY + ] + except KeyError as e: + raise KeyError(f"{type_name} has no {attr_name} attribute.") from e + attrs[attr_name] = entry[attr_id] + + yield attrs + def _get_attr(self, tid: int, attr_id: int) -> Any: r"""This function locates the entry data with ``tid`` and gets the value of ``attr_id`` of this entry. Called by `get_attribute()`. diff --git a/tests/forte/data/data_store_test.py b/tests/forte/data/data_store_test.py index 33fc1fc93..3bd54bd00 100644 --- a/tests/forte/data/data_store_test.py +++ b/tests/forte/data/data_store_test.py @@ -1039,6 +1039,110 @@ def test_get_attribute(self): ): self.data_store.get_attribute(9999, "class") + def test_get_attributes_of_tid(self): + result_dict = self.data_store.get_attributes_of_tid(9999, ["begin", "end", "speaker"]) + result_dict2 = self.data_store.get_attributes_of_tid(3456, ["payload_idx", "classifications"]) + + self.assertEqual(result_dict["begin"], 6) + self.assertEqual(result_dict["end"], 9) + self.assertEqual(result_dict["speaker"], "teacher") + self.assertEqual(result_dict2["payload_idx"], 1) + self.assertEqual(result_dict2["classifications"], {}) + + # Entry with such tid does not exist + with self.assertRaisesRegex(KeyError, "Entry with tid 1111 not found."): + self.data_store.get_attributes_of_tid(1111, ["speaker"]) + + # Get attribute field that does not exist + with self.assertRaisesRegex( + KeyError, "ft.onto.base_ontology.Sentence has no class attribute." + ): + self.data_store.get_attributes_of_tid(9999, ["class"]) + + def test_get_attributes_of_tids(self): + tids_attrs: list[dict] + #tids_attrs2: list[dict] + tids_attrs = self.data_store.get_attributes_of_tids([9999, 3456], ["begin", "end", "payload_idx"]) + tids_attrs2 = self.data_store.get_attributes_of_tids([9999], ["begin", "speaker"]) + + self.assertEqual(tids_attrs2[0]["begin"], 6) + self.assertEqual(tids_attrs[0]["end"], 9) + self.assertEqual(tids_attrs[1]["payload_idx"], 1) + self.assertEqual(tids_attrs2[0]["speaker"], "teacher") + + # Entry with such tid does not exist + with self.assertRaisesRegex(KeyError, "Entry with tid 1111 not found."): + self.data_store.get_attributes_of_tids([1111], ["speaker"]) + + # Get attribute field that does not exist + with self.assertRaisesRegex( + KeyError, "ft.onto.base_ontology.Sentence has no class attribute." + ): + self.data_store.get_attributes_of_tids([9999], ["class"]) + + def test_get_attributes_of_type(self): + # get document entries + instances = list(self.data_store.get_attributes_of_type( + "ft.onto.base_ontology.Document", ["begin", "end", "payload_idx"])) + # print(instances) + self.assertEqual(len(instances), 2) + # check tid + self.assertEqual(instances[0]["tid"], 1234) + self.assertEqual(instances[0]["end"], 5) + self.assertEqual(instances[1]["tid"], 3456) + self.assertEqual(instances[1]["begin"], 10) + + # For types other than annotation, group or link, not support include_subtype + instances = list(self.data_store.get_attributes_of_type( + "forte.data.ontology.core.Entry", ["begin", "end"])) + self.assertEqual(len(instances), 0) + + self.assertEqual( + self.data_store.get_length("forte.data.ontology.core.Entry"), 0 + ) + + # get annotations with subclasses and range annotation + instances = list( + self.data_store.get_attributes_of_type( + "forte.data.ontology.top.Annotation", ["begin", "end"], + range_span=(1, 20) + ) + ) + self.assertEqual(len(instances), 2) + + # get groups with subclasses + # instances = list(self.data_store.get_attributes_of_type( + # "forte.data.ontology.top.Group", ["begin", "end"])) + # self.assertEqual(len(instances), 3) + + # # get groups with subclasses and range annotation + # instances = list( + # self.data_store.get( + # "forte.data.ontology.top.Group", range_span=(1, 20) + # ) + # ) + # self.assertEqual(len(instances), 0) + # + # # get links with subclasses + # instances = list(self.data_store.get("forte.data.ontology.top.Link")) + # self.assertEqual(len(instances), 1) + # + # # get links with subclasses and range annotation + # instances = list( + # self.data_store.get( + # "forte.data.ontology.top.Link", range_span=(0, 9) + # ) + # ) + # self.assertEqual(len(instances), 1) + # + # # get links with subclasses and range annotation + # instances = list( + # self.data_store.get( + # "forte.data.ontology.top.Link", range_span=(4, 11) + # ) + # ) + # self.assertEqual(len(instances), 0) + def test_set_attribute(self): # change attribute self.data_store.set_attribute(9999, "speaker", "student") From f3a7854b36f0e879d03facff2782e97e17f2facf Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 14 Feb 2023 17:02:07 +0400 Subject: [PATCH 02/10] Fixed black format issue --- forte/data/data_store.py | 67 +++++++++++++++++------------ tests/forte/data/data_store_test.py | 41 ++++++++++++------ 2 files changed, 67 insertions(+), 41 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index ca03f99db..4b52aadc0 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -806,7 +806,6 @@ def fetch_entry_type_data( else: attr_fields: Dict = self._get_entry_attributes_by_class(type_name) for attr_name, attr_info in attr_fields.items(): - attr_class = get_origin(attr_info.type) # Since we store the class specified by get_origin, # if the output it None, we store the class for it, @@ -1047,7 +1046,6 @@ def _add_entry_raw( self._is_subclass(type_name, cls) for cls in (list(SinglePackEntries) + list(MultiPackEntries)) ): - try: self.__elements[type_name].append(entry) except KeyError: @@ -1246,7 +1244,6 @@ def add_entry_raw( allow_duplicate: bool = True, attribute_data: Optional[List] = None, ) -> int: - r""" This function provides a general implementation to add all types of entries to the data store. It can add namely @@ -1512,7 +1509,9 @@ def get_attributes_of_tid(self, tid: int, attr_names: [str]) -> dict: constants.ATTR_INDEX_KEY ] except KeyError as e: - raise KeyError(f"{entry_type} has no {attr_name} attribute.") from e + raise KeyError( + f"{entry_type} has no {attr_name} attribute." + ) from e attrs[attr_name] = entry[attr_id] return attrs @@ -1540,12 +1539,14 @@ def get_attributes_of_tids(self, tids: [int], attr_names: [str]): attrs: dict = {} for attr_name in attr_names: try: - attr_id = self._get_type_attribute_dict(entry_type)[attr_name][ - constants.ATTR_INDEX_KEY - ] + attr_id = self._get_type_attribute_dict(entry_type)[ + attr_name + ][constants.ATTR_INDEX_KEY] except KeyError as e: - raise KeyError(f"{entry_type} has no {attr_name} attribute.") from e - attrs[attr_name] = (entry[attr_id]) + raise KeyError( + f"{entry_type} has no {attr_name} attribute." + ) from e + attrs[attr_name] = entry[attr_id] tids_attrs.append(attrs) @@ -1605,17 +1606,19 @@ def get_attributes_of_type( if self._is_annotation(type_name): if range_span is None: - #yield from self.co_iterator_annotation_like(all_types) + # yield from self.co_iterator_annotation_like(all_types) for entry in self.co_iterator_annotation_like(all_types): attrs: dict = {} attrs["tid"] = entry[0] for attr_name in attributes_names: try: - attr_id = self._get_type_attribute_dict(type_name)[attr_name][ - constants.ATTR_INDEX_KEY - ] + attr_id = self._get_type_attribute_dict(type_name)[ + attr_name + ][constants.ATTR_INDEX_KEY] except KeyError as e: - raise KeyError(f"{type_name} has no {attr_name} attribute.") from e + raise KeyError( + f"{type_name} has no {attr_name} attribute." + ) from e attrs[attr_name] = entry[attr_id] yield attrs @@ -1627,16 +1630,20 @@ def get_attributes_of_type( attrs["tid"] = entry[0] for attr_name in attributes_names: try: - attr_id = self._get_type_attribute_dict(type_name)[attr_name][ - constants.ATTR_INDEX_KEY - ] + attr_id = self._get_type_attribute_dict(type_name)[ + attr_name + ][constants.ATTR_INDEX_KEY] except KeyError as e: - raise KeyError(f"{type_name} has no {attr_name} attribute.") from e + raise KeyError( + f"{type_name} has no {attr_name} attribute." + ) from e attrs[attr_name] = entry[attr_id] - yield attrs #attrs instead of entry + yield attrs # attrs instead of entry elif issubclass(entry_class, Link): - raise NotImplementedError(f"{type_name} of Link is not currently supported.") + raise NotImplementedError( + f"{type_name} of Link is not currently supported." + ) # for type in all_types: # if range_span is None: # yield from self.iter(type) @@ -1661,7 +1668,9 @@ def get_attributes_of_type( # ) and within_range(child, range_span): # yield entry elif issubclass(entry_class, Group): - raise NotImplementedError(f"{type_name} of Group is not currently supported.") + raise NotImplementedError( + f"{type_name} of Group is not currently supported." + ) # for type in all_types: # if range_span is None: # yield from self.iter(type) @@ -1692,17 +1701,19 @@ def get_attributes_of_type( # [Annotation, Group, List]. if type_name not in self.__elements: raise ValueError(f"type {type_name} does not exist") - #yield from self.iter(type_name) + # yield from self.iter(type_name) for entry in self.iter(type_name): attrs: dict = {} attrs["tid"] = entry[0] for attr_name in attributes_names: try: - attr_id = self._get_type_attribute_dict(type_name)[attr_name][ - constants.ATTR_INDEX_KEY - ] + attr_id = self._get_type_attribute_dict(type_name)[ + attr_name + ][constants.ATTR_INDEX_KEY] except KeyError as e: - raise KeyError(f"{type_name} has no {attr_name} attribute.") from e + raise KeyError( + f"{type_name} has no {attr_name} attribute." + ) from e attrs[attr_name] = entry[attr_id] yield attrs @@ -2090,7 +2101,9 @@ def co_iterator_annotation_like( self.get_datastore_attr_idx(tn, constants.BEGIN_ATTR_NAME), self.get_datastore_attr_idx(tn, constants.END_ATTR_NAME), ) - except IndexError as e: # all_entries_range[tn][0] will be caught here. + except ( + IndexError + ) as e: # all_entries_range[tn][0] will be caught here. raise ValueError( f"Entry list of type name, {tn} which is" " one list item of input argument `type_names`," diff --git a/tests/forte/data/data_store_test.py b/tests/forte/data/data_store_test.py index 3bd54bd00..b9e7833c9 100644 --- a/tests/forte/data/data_store_test.py +++ b/tests/forte/data/data_store_test.py @@ -699,7 +699,6 @@ def value_err_fn(): self.assertRaises(ValueError, value_err_fn) def test_add_annotation_raw(self): - # test add Document entry tid_doc: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Document", @@ -1040,8 +1039,12 @@ def test_get_attribute(self): self.data_store.get_attribute(9999, "class") def test_get_attributes_of_tid(self): - result_dict = self.data_store.get_attributes_of_tid(9999, ["begin", "end", "speaker"]) - result_dict2 = self.data_store.get_attributes_of_tid(3456, ["payload_idx", "classifications"]) + result_dict = self.data_store.get_attributes_of_tid( + 9999, ["begin", "end", "speaker"] + ) + result_dict2 = self.data_store.get_attributes_of_tid( + 3456, ["payload_idx", "classifications"] + ) self.assertEqual(result_dict["begin"], 6) self.assertEqual(result_dict["end"], 9) @@ -1061,9 +1064,13 @@ def test_get_attributes_of_tid(self): def test_get_attributes_of_tids(self): tids_attrs: list[dict] - #tids_attrs2: list[dict] - tids_attrs = self.data_store.get_attributes_of_tids([9999, 3456], ["begin", "end", "payload_idx"]) - tids_attrs2 = self.data_store.get_attributes_of_tids([9999], ["begin", "speaker"]) + # tids_attrs2: list[dict] + tids_attrs = self.data_store.get_attributes_of_tids( + [9999, 3456], ["begin", "end", "payload_idx"] + ) + tids_attrs2 = self.data_store.get_attributes_of_tids( + [9999], ["begin", "speaker"] + ) self.assertEqual(tids_attrs2[0]["begin"], 6) self.assertEqual(tids_attrs[0]["end"], 9) @@ -1082,8 +1089,12 @@ def test_get_attributes_of_tids(self): def test_get_attributes_of_type(self): # get document entries - instances = list(self.data_store.get_attributes_of_type( - "ft.onto.base_ontology.Document", ["begin", "end", "payload_idx"])) + instances = list( + self.data_store.get_attributes_of_type( + "ft.onto.base_ontology.Document", + ["begin", "end", "payload_idx"], + ) + ) # print(instances) self.assertEqual(len(instances), 2) # check tid @@ -1093,8 +1104,11 @@ def test_get_attributes_of_type(self): self.assertEqual(instances[1]["begin"], 10) # For types other than annotation, group or link, not support include_subtype - instances = list(self.data_store.get_attributes_of_type( - "forte.data.ontology.core.Entry", ["begin", "end"])) + instances = list( + self.data_store.get_attributes_of_type( + "forte.data.ontology.core.Entry", ["begin", "end"] + ) + ) self.assertEqual(len(instances), 0) self.assertEqual( @@ -1104,8 +1118,9 @@ def test_get_attributes_of_type(self): # get annotations with subclasses and range annotation instances = list( self.data_store.get_attributes_of_type( - "forte.data.ontology.top.Annotation", ["begin", "end"], - range_span=(1, 20) + "forte.data.ontology.top.Annotation", + ["begin", "end"], + range_span=(1, 20), ) ) self.assertEqual(len(instances), 2) @@ -1432,7 +1447,6 @@ def test_get_entry_attribute_by_class(self): ) def test_is_subclass(self): - import forte self.assertEqual( @@ -1500,7 +1514,6 @@ def test_is_subclass(self): ) def test_check_onto_file(self): - expected_type_attributes = { "ft.onto.test.Description": { "attributes": { From c0a0dd9c4d58fbf3c1fbbf86376b1e96a87fc1fe Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 14 Feb 2023 17:28:47 +0400 Subject: [PATCH 03/10] Fixed pylint issues --- forte/data/data_store.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 4b52aadc0..6a94f4a89 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1484,7 +1484,7 @@ def get_attribute(self, tid: int, attr_name: str) -> Any: return entry[attr_id] - def get_attributes_of_tid(self, tid: int, attr_names: [str]) -> dict: + def get_attributes_of_tid(self, tid: int, attr_names: list[str]) -> dict: r"""This function returns the value of attributes listed in ``attr_names`` for the entry with ``tid``. It locates the entry data with ``tid`` and finds attributes listed in ``attr_names`` and return @@ -1516,7 +1516,7 @@ def get_attributes_of_tid(self, tid: int, attr_names: [str]) -> dict: return attrs - def get_attributes_of_tids(self, tids: [int], attr_names: [str]): + def get_attributes_of_tids(self, tids: list[int], attr_names: list[str]): r"""This function returns the value of attributes listed in ``attr_names`` for entries in listed in the ``tids``. It locates the entries data with ``tid`` and put attributes listed in @@ -1555,10 +1555,10 @@ def get_attributes_of_tids(self, tids: [int], attr_names: [str]): def get_attributes_of_type( self, type_name: str, - attributes_names: [str], + attributes_names: list[str], include_sub_type: bool = True, range_span: Optional[Tuple[int, int]] = None, - ) -> Iterator[List]: + ): r"""This function fetches required attributes of entries from the data store of type ``type_name``. If `include_sub_type` is set to True and ``type_name`` is in [Annotation], this function also @@ -1608,8 +1608,7 @@ def get_attributes_of_type( if range_span is None: # yield from self.co_iterator_annotation_like(all_types) for entry in self.co_iterator_annotation_like(all_types): - attrs: dict = {} - attrs["tid"] = entry[0] + attrs: dict = {"tid": entry[0]} for attr_name in attributes_names: try: attr_id = self._get_type_attribute_dict(type_name)[ @@ -1626,8 +1625,7 @@ def get_attributes_of_type( for entry in self.co_iterator_annotation_like( all_types, range_span=range_span ): - attrs: dict = {} - attrs["tid"] = entry[0] + attrs = {"tid": entry[0]} for attr_name in attributes_names: try: attr_id = self._get_type_attribute_dict(type_name)[ @@ -1703,8 +1701,7 @@ def get_attributes_of_type( raise ValueError(f"type {type_name} does not exist") # yield from self.iter(type_name) for entry in self.iter(type_name): - attrs: dict = {} - attrs["tid"] = entry[0] + attrs: dict = {"tid": entry[0]} for attr_name in attributes_names: try: attr_id = self._get_type_attribute_dict(type_name)[ From 726a14f321f2b56d978481b4cd84c962b2e94b87 Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 14 Feb 2023 17:37:38 +0400 Subject: [PATCH 04/10] Fixed pylint issues --- forte/data/data_store.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 6a94f4a89..e93313d47 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1484,7 +1484,7 @@ def get_attribute(self, tid: int, attr_name: str) -> Any: return entry[attr_id] - def get_attributes_of_tid(self, tid: int, attr_names: list[str]) -> dict: + def get_attributes_of_tid(self, tid: int, attr_names: List[str]) -> dict: r"""This function returns the value of attributes listed in ``attr_names`` for the entry with ``tid``. It locates the entry data with ``tid`` and finds attributes listed in ``attr_names`` and return @@ -1516,7 +1516,7 @@ def get_attributes_of_tid(self, tid: int, attr_names: list[str]) -> dict: return attrs - def get_attributes_of_tids(self, tids: list[int], attr_names: list[str]): + def get_attributes_of_tids(self, tids: List[int], attr_names: List[str]): r"""This function returns the value of attributes listed in ``attr_names`` for entries in listed in the ``tids``. It locates the entries data with ``tid`` and put attributes listed in @@ -1555,7 +1555,7 @@ def get_attributes_of_tids(self, tids: list[int], attr_names: list[str]): def get_attributes_of_type( self, type_name: str, - attributes_names: list[str], + attributes_names: List[str], include_sub_type: bool = True, range_span: Optional[Tuple[int, int]] = None, ): From 4f1eeed0ee21010d9260d523d4b3039d5e12feaf Mon Sep 17 00:00:00 2001 From: JamesX Date: Thu, 16 Feb 2023 16:14:43 +0400 Subject: [PATCH 05/10] Added return type hint as suggested --- forte/data/data_store.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index e93313d47..17a99d6b6 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1516,7 +1516,9 @@ def get_attributes_of_tid(self, tid: int, attr_names: List[str]) -> dict: return attrs - def get_attributes_of_tids(self, tids: List[int], attr_names: List[str]): + def get_attributes_of_tids( + self, tids: List[int], attr_names: List[str] + ) -> List[Any]: r"""This function returns the value of attributes listed in ``attr_names`` for entries in listed in the ``tids``. It locates the entries data with ``tid`` and put attributes listed in @@ -1558,7 +1560,7 @@ def get_attributes_of_type( attributes_names: List[str], include_sub_type: bool = True, range_span: Optional[Tuple[int, int]] = None, - ): + ) -> List[Any]: r"""This function fetches required attributes of entries from the data store of type ``type_name``. If `include_sub_type` is set to True and ``type_name`` is in [Annotation], this function also From cf5284616c9a88ecb5934133124bd993fc75de81 Mon Sep 17 00:00:00 2001 From: JamesX Date: Thu, 16 Feb 2023 16:37:35 +0400 Subject: [PATCH 06/10] fix lint issue (2 places) --- forte/data/data_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 17a99d6b6..60daac6f6 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1560,7 +1560,7 @@ def get_attributes_of_type( attributes_names: List[str], include_sub_type: bool = True, range_span: Optional[Tuple[int, int]] = None, - ) -> List[Any]: + ) -> Iterator[dict]: r"""This function fetches required attributes of entries from the data store of type ``type_name``. If `include_sub_type` is set to True and ``type_name`` is in [Annotation], this function also @@ -1703,7 +1703,7 @@ def get_attributes_of_type( raise ValueError(f"type {type_name} does not exist") # yield from self.iter(type_name) for entry in self.iter(type_name): - attrs: dict = {"tid": entry[0]} + attrs = {"tid": entry[0]} for attr_name in attributes_names: try: attr_id = self._get_type_attribute_dict(type_name)[ From 122ea7974ead4797788e8fdef37c5466cda9f8fa Mon Sep 17 00:00:00 2001 From: JamesX Date: Wed, 22 Mar 2023 16:53:45 +0400 Subject: [PATCH 07/10] removed commented out codes (not used in current design/implementation) --- forte/data/data_store.py | 66 ---------------------------------------- 1 file changed, 66 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 60daac6f6..0c9655b91 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1579,22 +1579,6 @@ def get_attributes_of_type( provided arguments. """ - # def within_range(entry: List[Any], range_span: Tuple[int, int]) -> bool: - # """ - # A helper function for deciding whether an annotation entry is - # inside the `range_span`. - # """ - # begin = self.get_datastore_attr_idx( - # entry[constants.ENTRY_TYPE_INDEX], constants.BEGIN_ATTR_NAME - # ) - # end = self.get_datastore_attr_idx( - # entry[constants.ENTRY_TYPE_INDEX], constants.END_ATTR_NAME - # ) - # - # if not self._is_annotation(entry[constants.ENTRY_TYPE_INDEX]): - # return False - # return entry[begin] >= range_span[0] and entry[end] <= range_span[1] - entry_class = get_class(type_name) all_types = set() if include_sub_type: @@ -1644,61 +1628,11 @@ def get_attributes_of_type( raise NotImplementedError( f"{type_name} of Link is not currently supported." ) - # for type in all_types: - # if range_span is None: - # yield from self.iter(type) - # else: - # for entry in self.__elements[type]: - # parent_idx = self.get_datastore_attr_idx( - # entry[constants.ENTRY_TYPE_INDEX], - # constants.PARENT_TID_ATTR_NAME, - # ) - # child_idx = self.get_datastore_attr_idx( - # entry[constants.ENTRY_TYPE_INDEX], - # constants.CHILD_TID_ATTR_NAME, - # ) - # - # if (entry[parent_idx] in self.__tid_ref_dict) and ( - # entry[child_idx] in self.__tid_ref_dict - # ): - # parent = self.__tid_ref_dict[entry[parent_idx]] - # child = self.__tid_ref_dict[entry[child_idx]] - # if within_range( - # parent, range_span - # ) and within_range(child, range_span): - # yield entry elif issubclass(entry_class, Group): raise NotImplementedError( f"{type_name} of Group is not currently supported." ) - # for type in all_types: - # if range_span is None: - # yield from self.iter(type) - # else: - # for entry in self.__elements[type]: - # member_type_idx = self.get_datastore_attr_idx( - # entry[constants.ENTRY_TYPE_INDEX], - # constants.MEMBER_TYPE_ATTR_NAME, - # ) - # members_idx = self.get_datastore_attr_idx( - # entry[constants.ENTRY_TYPE_INDEX], - # constants.MEMBER_TID_ATTR_NAME, - # ) - # - # member_type = entry[member_type_idx] - # if self._is_annotation(member_type): - # members = entry[members_idx] - # within = True - # for m in members: - # e = self.__tid_ref_dict[m] - # if not within_range(e, range_span): - # within = False - # break - # if within: - # yield entry else: - # Only fetches entries of type ``type_name`` when it's not in - # [Annotation, Group, List]. if type_name not in self.__elements: raise ValueError(f"type {type_name} does not exist") # yield from self.iter(type_name) From 3fe14a924458c3ffbe5b3c88db9b8149f006fa5a Mon Sep 17 00:00:00 2001 From: JamesX Date: Wed, 22 Mar 2023 17:10:25 +0400 Subject: [PATCH 08/10] Fixed CI issue (in ontology_code_generator) for python 3.9 check --- forte/data/ontology/ontology_code_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forte/data/ontology/ontology_code_generator.py b/forte/data/ontology/ontology_code_generator.py index c822f103f..28ecd6e0f 100644 --- a/forte/data/ontology/ontology_code_generator.py +++ b/forte/data/ontology/ontology_code_generator.py @@ -41,7 +41,7 @@ except ImportError: # Try backported to PY<39 `importlib_resources`. import importlib_resources as resources # type: ignore - from importlib_resources.abc import Traversable # type: ignore + from importlib_resources.abc import Traversable from forte.data.ontology import top, utils from forte.data.ontology.code_generation_exceptions import ( From e8750493a3c00af21fe12dc485a725d909dc5377 Mon Sep 17 00:00:00 2001 From: JamesX Date: Fri, 31 Mar 2023 12:30:13 +0400 Subject: [PATCH 09/10] Fixed docstring complaint spelling about "tids" --- forte/data/data_store.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 0c9655b91..162baec1b 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1517,26 +1517,26 @@ def get_attributes_of_tid(self, tid: int, attr_names: List[str]) -> dict: return attrs def get_attributes_of_tids( - self, tids: List[int], attr_names: List[str] + self, list_of_tid: List[int], attr_names: List[str] ) -> List[Any]: r"""This function returns the value of attributes listed in - ``attr_names`` for entries in listed in the ``tids``. It locates - the entries data with ``tid`` and put attributes listed in - ``attr_name`` in a dict for each entry (tid). + ``attr_names`` for entries in listed in the ``list_of_tid``. + It locates the entries data with ``tid`` and put attributes + listed in ``attr_name`` in a dict for each entry (tid). Args: - tids: List of unique ids (tids) of the entry. + list_of_tid: List of unique ids of the entry. attr_names: List of name of the attribute. Returns: - A list of dict with ``attr_name`` as key for atrributes - of the entries listed in``tids``. + A list of dict with ``attr_name`` as key for attributes + of the entries listed in``list_of_tid``. Raises: KeyError: when ``tid`` or ``attr_name`` is not found. """ tids_attrs = [] - for tid in tids: + for tid in list_of_tid: entry, entry_type = self.get_entry(tid) attrs: dict = {} for attr_name in attr_names: From 8b3d524855b3a05ba74d6d068b633a280df0a1f4 Mon Sep 17 00:00:00 2001 From: JamesX Date: Fri, 31 Mar 2023 13:20:24 +0400 Subject: [PATCH 10/10] Fixed docstring complaint spelling about "tid" --- forte/data/data_store.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 162baec1b..26678555b 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -1522,7 +1522,7 @@ def get_attributes_of_tids( r"""This function returns the value of attributes listed in ``attr_names`` for entries in listed in the ``list_of_tid``. It locates the entries data with ``tid`` and put attributes - listed in ``attr_name`` in a dict for each entry (tid). + listed in ``attr_name`` in a dict for each entry. Args: list_of_tid: List of unique ids of the entry. @@ -1530,7 +1530,7 @@ def get_attributes_of_tids( Returns: A list of dict with ``attr_name`` as key for attributes - of the entries listed in``list_of_tid``. + of the entries requested. Raises: KeyError: when ``tid`` or ``attr_name`` is not found.