diff --git a/energyml-utils/example/main.py b/energyml-utils/example/main.py index 2572f3e..ce7f6a4 100644 --- a/energyml-utils/example/main.py +++ b/energyml-utils/example/main.py @@ -28,7 +28,7 @@ correct_dor, ) from src.energyml.utils.xml import * -from src.energyml.utils.data.datasets_io import HDF5FileReader +from src.energyml.utils.data.datasets_io import HDF5FileReader, get_path_in_external_with_path fi_cit = Citation( title="An interpretation", @@ -378,6 +378,7 @@ def test_local_depth_crs(): def test_crs(): from energyml.eml.v2_3.commonv2 import LocalEngineeringCompoundCrs + crs = random_value_from_class(LocalEngineeringCompoundCrs) print(is_z_reversed(crs)) diff --git a/energyml-utils/example/tools.py b/energyml-utils/example/tools.py index 4d6d252..ffef0d1 100644 --- a/energyml-utils/example/tools.py +++ b/energyml-utils/example/tools.py @@ -10,10 +10,19 @@ from src.energyml.utils.data.datasets_io import CSVFileReader, HDF5FileWriter, ParquetFileWriter, DATFileReader from src.energyml.utils.data.mesh import MeshFileFormat, export_multiple_data from src.energyml.utils.epc import Epc, gen_energyml_object_path -from src.energyml.utils.introspection import get_class_from_simple_name, random_value_from_class, \ - set_attribute_from_path, get_object_attribute, get_qualified_type_from_class, get_content_type_from_class, \ - get_object_attribute_rgx, get_direct_dor_list, get_obj_uuid, get_class_from_qualified_type, \ - get_object_attribute_or_create +from src.energyml.utils.introspection import ( + get_class_from_simple_name, + random_value_from_class, + set_attribute_from_path, + get_object_attribute, + get_qualified_type_from_class, + get_content_type_from_class, + get_object_attribute_rgx, + get_direct_dor_list, + get_obj_uuid, + get_class_from_qualified_type, + get_object_attribute_or_create, +) from src.energyml.utils.serialization import ( serialize_json, JSON_VERSION, @@ -370,6 +379,7 @@ def xml_to_json(): json_content = serialize_json(objs, JSON_VERSION.OSDU_OFFICIAL) elif args.file.lower().endswith(".epc"): epc = Epc.read_file(args.file) + # print(epc.energyml_objects) json_content = ( "[\n" + ",".join(list(map(lambda o: serialize_json(o, JSON_VERSION.OSDU_OFFICIAL), epc.energyml_objects))) @@ -377,7 +387,9 @@ def xml_to_json(): ) with open(output_path, "w") as fout: - fout.write(json_content) + # print(json_content) + if json_content is not None: + fout.write(json_content) def json_to_xml(): @@ -403,6 +415,29 @@ def json_to_xml(): fout.write(xml_content) +def json_to_epc(): + parser = argparse.ArgumentParser() + parser.add_argument("--file", "-f", type=str, help="Input File") + parser.add_argument("--out", "-o", type=str, default=None, help=f"Output EPC file") + + args = parser.parse_args() + + epc = Epc(epc_file_path=args.out) + with open(args.file, "rb") as f: + f_content = f.read() + objs = [] + try: + objs = read_energyml_json_bytes(f_content, JSON_VERSION.OSDU_OFFICIAL) + except: + objs = read_energyml_json_bytes(f_content, JSON_VERSION.XSDATA) + + dir = pathlib.Path(args.out or args.file).parent.resolve() + for obj in objs: + epc.energyml_objects.append(obj) + + epc.export_file(args.out) + + def describe_as_csv(): parser = argparse.ArgumentParser() parser.add_argument("--folder", "-f", type=str, help="Input File") diff --git a/energyml-utils/pyproject.toml b/energyml-utils/pyproject.toml index 288ff72..88cae1d 100644 --- a/energyml-utils/pyproject.toml +++ b/energyml-utils/pyproject.toml @@ -128,4 +128,5 @@ csv_to_dataset = "example.tools:csv_to_dataset" generate_data = "example.tools:generate_data" xml_to_json = "example.tools:xml_to_json" json_to_xml = "example.tools:json_to_xml" +json_to_epc = "example.tools:json_to_epc" describe_as_csv = "example.tools:describe_as_csv" \ No newline at end of file diff --git a/energyml-utils/src/energyml/utils/constants.py b/energyml-utils/src/energyml/utils/constants.py index e134381..5a3928b 100644 --- a/energyml-utils/src/energyml/utils/constants.py +++ b/energyml-utils/src/energyml/utils/constants.py @@ -305,6 +305,23 @@ def parse_content_or_qualified_type(cqt: str) -> Optional[re.Match[str]]: return parsed +def content_type_to_qualified_type(ct: str): + parsed = parse_content_or_qualified_type(ct) + return parsed.group("domain") + parsed.group("domainVersion").replace(".", "") + "." + parsed.group("type") + + +def qualified_type_to_content_type(qt: str): + parsed = parse_content_or_qualified_type(qt) + return ( + "application/x-" + + parsed.group("domain") + + "+xml;version=" + + re.sub(r"(\d)(\d)", r"\1.\2", parsed.group("domainVersion")) + + ";type=" + + parsed.group("type") + ) + + def get_domain_version_from_content_or_qualified_type(cqt: str) -> Optional[str]: """ return a version number like "2.2" or "2.0" diff --git a/energyml-utils/src/energyml/utils/epc.py b/energyml-utils/src/energyml/utils/epc.py index f0899ec..ea95775 100644 --- a/energyml-utils/src/energyml/utils/epc.py +++ b/energyml-utils/src/energyml/utils/epc.py @@ -29,6 +29,7 @@ Keywords1, TargetMode, ) +from .uri import parse_uri from xsdata.formats.dataclass.models.generics import DerivedElement from .constants import ( @@ -39,6 +40,8 @@ RawFile, EPCRelsRelationshipType, MimeType, + content_type_to_qualified_type, + qualified_type_to_content_type, split_identifier, get_property_kind_dict_path_as_dict, ) @@ -49,6 +52,7 @@ from .introspection import ( get_class_from_content_type, get_obj_type, + is_dor, search_attribute_matching_type, get_obj_version, get_obj_uuid, @@ -65,7 +69,8 @@ set_attribute_from_path, set_attribute_value, get_object_attribute, - get_qualified_type_from_class, get_class_fields, + get_qualified_type_from_class, + get_class_fields, ) from .manager import get_class_pkg, get_class_pkg_version from .serialization import ( @@ -631,32 +636,74 @@ def as_dor(obj_or_identifier: Any, dor_qualified_type: str = "eml23.DataObjectRe """ dor = None if obj_or_identifier is not None: - if isinstance(obj_or_identifier, str): # is an identifier - cls = get_class_from_qualified_type(dor_qualified_type) - dor = cls() - if len(__CACHE_PROP_KIND_DICT__) == 0: - # update the cache to check if it is a - update_prop_kind_dict_cache() - try: - uuid, version = split_identifier(obj_or_identifier) - if uuid in __CACHE_PROP_KIND_DICT__: - return as_dor(__CACHE_PROP_KIND_DICT__[uuid]) - else: - set_attribute_from_path(dor, "uuid", uuid) - set_attribute_from_path(dor, "ObjectVersion", version) - except AttributeError: - logging.error(f"Failed to parse identifier {obj_or_identifier}. DOR will be empty") + cls = get_class_from_qualified_type(dor_qualified_type) + dor = cls() + if isinstance(obj_or_identifier, str): # is an identifier or uri + parsed_uri = parse_uri(obj_or_identifier) + if parsed_uri is not None: + if hasattr(dor, "qualified_type"): + set_attribute_from_path(dor, "qualified_type", parsed_uri.get_qualified_type()) + if hasattr(dor, "content_type"): + set_attribute_from_path( + dor, "content_type", qualified_type_to_content_type(parsed_uri.get_qualified_type()) + ) + set_attribute_from_path(dor, "uuid", parsed_uri.uuid) + if hasattr(dor, "object_version"): + set_attribute_from_path(dor, "version_string", parsed_uri.version) + if hasattr(dor, "version_string"): + set_attribute_from_path(dor, "version_string", parsed_uri.version) + + else: # identifier + if len(__CACHE_PROP_KIND_DICT__) == 0: + # update the cache to check if it is a + try: + update_prop_kind_dict_cache() + except FileNotFoundError as e: + logging.error(f"Failed to parse propertykind dict {e}") + try: + uuid, version = split_identifier(obj_or_identifier) + if uuid in __CACHE_PROP_KIND_DICT__: + return as_dor(__CACHE_PROP_KIND_DICT__[uuid]) + else: + set_attribute_from_path(dor, "uuid", uuid) + set_attribute_from_path(dor, "ObjectVersion", version) + except AttributeError: + logging.error(f"Failed to parse identifier {obj_or_identifier}. DOR will be empty") else: - cls = get_class_from_qualified_type(dor_qualified_type) - dor = cls() - if hasattr(dor, "qualified_type"): - set_attribute_from_path(dor, "qualified_type", get_qualified_type_from_class(obj_or_identifier)) - if hasattr(dor, "content_type"): - set_attribute_from_path(dor, "content_type", get_content_type_from_class(obj_or_identifier)) - - set_attribute_from_path(dor, "uuid", get_object_attribute(obj_or_identifier, "uuid")) - set_attribute_from_path(dor, "object_version", get_object_attribute(obj_or_identifier, "ObjectVersion")) - set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Citation.Title")) + if is_dor(obj_or_identifier): + # If it is a dor, we create a dor conversionif hasattr(dor, "qualified_type"): + if hasattr(dor, "qualified_type"): + if hasattr(obj_or_identifier, "qualified_type"): + dor.qualified_type = get_object_attribute(obj_or_identifier, "qualified_type") + elif hasattr(obj_or_identifier, "content_type"): + dor.qualified_type = content_type_to_qualified_type( + get_object_attribute(obj_or_identifier, "content_type") + ) + + if hasattr(dor, "content_type"): + if hasattr(obj_or_identifier, "qualified_type"): + dor.content_type = qualified_type_to_content_type( + get_object_attribute(obj_or_identifier, "qualified_type") + ) + elif hasattr(obj_or_identifier, "content_type"): + dor.content_type = get_object_attribute(obj_or_identifier, "content_type") + + set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Title")) + + else: + if hasattr(dor, "qualified_type"): + set_attribute_from_path(dor, "qualified_type", get_qualified_type_from_class(obj_or_identifier)) + if hasattr(dor, "content_type"): + set_attribute_from_path(dor, "content_type", get_content_type_from_class(obj_or_identifier)) + + set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Citation.Title")) + + set_attribute_from_path(dor, "uuid", get_obj_uuid(obj_or_identifier)) + + if hasattr(dor, "object_version"): + set_attribute_from_path(dor, "object_version", get_obj_version(obj_or_identifier)) + if hasattr(dor, "version_string"): + set_attribute_from_path(dor, "version_string", get_obj_version(obj_or_identifier)) return dor diff --git a/energyml-utils/src/energyml/utils/introspection.py b/energyml-utils/src/energyml/utils/introspection.py index 68cd505..0449145 100644 --- a/energyml-utils/src/energyml/utils/introspection.py +++ b/energyml-utils/src/energyml/utils/introspection.py @@ -7,7 +7,7 @@ import re import sys import typing -from dataclasses import Field +from dataclasses import Field, field from enum import Enum from importlib import import_module from types import ModuleType @@ -311,8 +311,18 @@ def get_class_fields(cls: Union[type, Any]) -> Dict[str, Field]: try: return cls.__dataclass_fields__ except AttributeError: - # print(list_function_parameters_with_types(cls.__new__, True)) - return list_function_parameters_with_types(cls.__new__, True) + try: + # print(list_function_parameters_with_types(cls.__new__, True)) + return list_function_parameters_with_types(cls.__new__, True) + except AttributeError as e: + # For not working types like proxy type for C++ binding + res = {} + for a_name, a_type in inspect.getmembers(cls): + # print(f"{a_name} => {inspect.getmembers(a_type)}") + if not a_name.startswith("_") and not callable(getattr(cls, a_name, None)): + res[a_name] = field() + + return res def get_class_attributes(cls: Union[type, Any]) -> List[str]: @@ -529,7 +539,7 @@ def get_object_attribute_advanced(obj: Any, attr_dot_path: str) -> Any: return value -def get_object_attribute_no_verif(obj: Any, attr_name: str) -> Any: +def get_object_attribute_no_verif(obj: Any, attr_name: str, default: Optional[Any] = None) -> Any: """ Return the value of the attribute named after param :param:`attr_name` without verification (may raise an exception if it doesn't exists). @@ -540,11 +550,19 @@ def get_object_attribute_no_verif(obj: Any, attr_name: str) -> Any: :return: """ if isinstance(obj, list): - return obj[int(attr_name)] + if int(attr_name) < len(obj): + return obj[int(attr_name)] or default + else: + raise AttributeError(obj, name=attr_name) elif isinstance(obj, dict): - return obj[attr_name] + if attr_name in obj: + return obj.get(attr_name, default) + else: + raise AttributeError(obj, name=attr_name) else: - return getattr(obj, attr_name, None) + return ( + getattr(obj, attr_name) or default + ) # we did not used the "default" of getattr to keep raising AttributeError def get_object_attribute_rgx(obj: Any, attr_dot_path_rgx: str) -> Any: @@ -599,6 +617,14 @@ def class_match_rgx( return False +def is_dor(obj: any) -> bool: + return ( + "dataobjectreference" in get_obj_type(obj).lower() + or get_object_attribute(obj, "ContentType") is not None + or get_object_attribute(obj, "QualifiedType") is not None + ) + + def search_attribute_matching_type_with_path( obj: Any, type_rgx: str, @@ -1016,6 +1042,7 @@ def get_obj_version(obj: Any) -> str: return get_object_attribute_no_verif(obj, "version_string") except Exception: logging.error(f"Error with {type(obj)}") + return None # raise e diff --git a/energyml-utils/src/energyml/utils/serialization.py b/energyml-utils/src/energyml/utils/serialization.py index c90323c..32e830e 100644 --- a/energyml-utils/src/energyml/utils/serialization.py +++ b/energyml-utils/src/energyml/utils/serialization.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import json import logging +import numpy as np import traceback from enum import Enum from io import BytesIO @@ -448,6 +449,9 @@ def _to_json_dict_fn( """ if obj is None: return None + elif isinstance(obj, float) and np.isnan(obj): + print("NaN found") + return None elif is_enum(obj): return obj.value # return { diff --git a/energyml-utils/tests/test_constants.py b/energyml-utils/tests/test_constants.py new file mode 100644 index 0000000..e0a795c --- /dev/null +++ b/energyml-utils/tests/test_constants.py @@ -0,0 +1,15 @@ +from src.energyml.utils.constants import content_type_to_qualified_type, qualified_type_to_content_type + + +def test_content_type_to_qualified_type(): + assert ( + content_type_to_qualified_type("application/x-resqml+xml;version=2.0;type=obj_FaultInterpretation") + == "resqml20.obj_FaultInterpretation" + ) + + +def test_qualified_type_to_content_type(): + assert ( + qualified_type_to_content_type("resqml20.obj_FaultInterpretation") + == "application/x-resqml+xml;version=2.0;type=obj_FaultInterpretation" + ) diff --git a/energyml-utils/tests/test_epc.py b/energyml-utils/tests/test_epc.py index b21cf0c..51dd635 100644 --- a/energyml-utils/tests/test_epc.py +++ b/energyml-utils/tests/test_epc.py @@ -10,6 +10,7 @@ from energyml.resqml.v2_2.resqmlv2 import TriangulatedSetRepresentation from src.energyml.utils.epc import ( + as_dor, get_obj_identifier, gen_energyml_object_path, EpcExportVersion, @@ -18,8 +19,10 @@ epoch_to_date, epoch, gen_uuid, + get_content_type_from_class, get_obj_pkg_pkgv_type_uuid_version, get_obj_uri, + get_qualified_type_from_class, ) fi_cit = Citation20( @@ -134,3 +137,46 @@ def test_gen_energyml_object_path(): gen_energyml_object_path(tr, EpcExportVersion.EXPANDED) == f"namespace_resqml22/{tr.uuid}/TriangulatedSetRepresentation_{tr.uuid}.xml" ) + + +def test_as_dor_object(): + dor_fi = as_dor(fi) + + assert dor_fi.title == fi.citation.title + assert dor_fi.uuid == fi.uuid + assert dor_fi.qualified_type == get_qualified_type_from_class(fi) + + +def test_as_dor_another_dor(): + dor_dor20 = as_dor(dor_correct20, "eml20.DataObjectReference") + assert dor_dor20.title == dor_correct20.title + assert dor_dor20.uuid == fi.uuid + assert dor_dor20.content_type == get_content_type_from_class(fi) + + dor_dor20_bis = as_dor(dor_correct23, "eml20.DataObjectReference") + assert dor_dor20_bis.title == dor_correct23.title + assert dor_dor20_bis.uuid == fi.uuid + assert dor_dor20_bis.content_type == get_content_type_from_class(fi) + + dor_dor23 = as_dor(dor_correct20, "eml23.DataObjectReference") + assert dor_dor23.title == dor_correct20.title + assert dor_dor23.uuid == fi.uuid + assert dor_dor23.qualified_type == get_qualified_type_from_class(fi) + + +def test_as_dor_uri(): + dor_dor20 = as_dor( + "eml:///dataspace('test')/resqml22.TriangulatedSetRepresentation(0a2ba9e1-1018-4bfd-8fec-1c8cef13fa52)", + "eml20.DataObjectReference", + ) + assert dor_dor20.title is None + assert dor_dor20.uuid == "0a2ba9e1-1018-4bfd-8fec-1c8cef13fa52" + assert dor_dor20.content_type == "application/x-resqml+xml;version=2.2;type=TriangulatedSetRepresentation" + + dor_dor23 = as_dor( + "eml:///dataspace('test')/resqml22.TriangulatedSetRepresentation(0a2ba9e1-1018-4bfd-8fec-1c8cef13fa52)", + "eml23.DataObjectReference", + ) + assert dor_dor23.title is None + assert dor_dor23.uuid == "0a2ba9e1-1018-4bfd-8fec-1c8cef13fa52" + assert dor_dor23.qualified_type == "resqml22.TriangulatedSetRepresentation"