Skip to content

better "as_dor" function #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion energyml-utils/example/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
correct_dor,
)
from src.energyml.utils.xml import *
from src.energyml.utils.data.datasets_io import HDF5FileReader
from src.energyml.utils.data.datasets_io import HDF5FileReader, get_path_in_external_with_path

fi_cit = Citation(
title="An interpretation",
Expand Down Expand Up @@ -378,6 +378,7 @@ def test_local_depth_crs():

def test_crs():
from energyml.eml.v2_3.commonv2 import LocalEngineeringCompoundCrs

crs = random_value_from_class(LocalEngineeringCompoundCrs)
print(is_z_reversed(crs))

Expand Down
45 changes: 40 additions & 5 deletions energyml-utils/example/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,19 @@
from src.energyml.utils.data.datasets_io import CSVFileReader, HDF5FileWriter, ParquetFileWriter, DATFileReader
from src.energyml.utils.data.mesh import MeshFileFormat, export_multiple_data
from src.energyml.utils.epc import Epc, gen_energyml_object_path
from src.energyml.utils.introspection import get_class_from_simple_name, random_value_from_class, \
set_attribute_from_path, get_object_attribute, get_qualified_type_from_class, get_content_type_from_class, \
get_object_attribute_rgx, get_direct_dor_list, get_obj_uuid, get_class_from_qualified_type, \
get_object_attribute_or_create
from src.energyml.utils.introspection import (
get_class_from_simple_name,
random_value_from_class,
set_attribute_from_path,
get_object_attribute,
get_qualified_type_from_class,
get_content_type_from_class,
get_object_attribute_rgx,
get_direct_dor_list,
get_obj_uuid,
get_class_from_qualified_type,
get_object_attribute_or_create,
)
from src.energyml.utils.serialization import (
serialize_json,
JSON_VERSION,
Expand Down Expand Up @@ -370,14 +379,17 @@ def xml_to_json():
json_content = serialize_json(objs, JSON_VERSION.OSDU_OFFICIAL)
elif args.file.lower().endswith(".epc"):
epc = Epc.read_file(args.file)
# print(epc.energyml_objects)
json_content = (
"[\n"
+ ",".join(list(map(lambda o: serialize_json(o, JSON_VERSION.OSDU_OFFICIAL), epc.energyml_objects)))
+ "]"
)

with open(output_path, "w") as fout:
fout.write(json_content)
# print(json_content)
if json_content is not None:
fout.write(json_content)


def json_to_xml():
Expand All @@ -403,6 +415,29 @@ def json_to_xml():
fout.write(xml_content)


def json_to_epc():
parser = argparse.ArgumentParser()
parser.add_argument("--file", "-f", type=str, help="Input File")
parser.add_argument("--out", "-o", type=str, default=None, help=f"Output EPC file")

args = parser.parse_args()

epc = Epc(epc_file_path=args.out)
with open(args.file, "rb") as f:
f_content = f.read()
objs = []
try:
objs = read_energyml_json_bytes(f_content, JSON_VERSION.OSDU_OFFICIAL)
except:
objs = read_energyml_json_bytes(f_content, JSON_VERSION.XSDATA)

dir = pathlib.Path(args.out or args.file).parent.resolve()
for obj in objs:
epc.energyml_objects.append(obj)

epc.export_file(args.out)


def describe_as_csv():
parser = argparse.ArgumentParser()
parser.add_argument("--folder", "-f", type=str, help="Input File")
Expand Down
1 change: 1 addition & 0 deletions energyml-utils/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,5 @@ csv_to_dataset = "example.tools:csv_to_dataset"
generate_data = "example.tools:generate_data"
xml_to_json = "example.tools:xml_to_json"
json_to_xml = "example.tools:json_to_xml"
json_to_epc = "example.tools:json_to_epc"
describe_as_csv = "example.tools:describe_as_csv"
17 changes: 17 additions & 0 deletions energyml-utils/src/energyml/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,23 @@ def parse_content_or_qualified_type(cqt: str) -> Optional[re.Match[str]]:
return parsed


def content_type_to_qualified_type(ct: str):
parsed = parse_content_or_qualified_type(ct)
return parsed.group("domain") + parsed.group("domainVersion").replace(".", "") + "." + parsed.group("type")


def qualified_type_to_content_type(qt: str):
parsed = parse_content_or_qualified_type(qt)
return (
"application/x-"
+ parsed.group("domain")
+ "+xml;version="
+ re.sub(r"(\d)(\d)", r"\1.\2", parsed.group("domainVersion"))
+ ";type="
+ parsed.group("type")
)


def get_domain_version_from_content_or_qualified_type(cqt: str) -> Optional[str]:
"""
return a version number like "2.2" or "2.0"
Expand Down
99 changes: 73 additions & 26 deletions energyml-utils/src/energyml/utils/epc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
Keywords1,
TargetMode,
)
from .uri import parse_uri
from xsdata.formats.dataclass.models.generics import DerivedElement

from .constants import (
Expand All @@ -39,6 +40,8 @@
RawFile,
EPCRelsRelationshipType,
MimeType,
content_type_to_qualified_type,
qualified_type_to_content_type,
split_identifier,
get_property_kind_dict_path_as_dict,
)
Expand All @@ -49,6 +52,7 @@
from .introspection import (
get_class_from_content_type,
get_obj_type,
is_dor,
search_attribute_matching_type,
get_obj_version,
get_obj_uuid,
Expand All @@ -65,7 +69,8 @@
set_attribute_from_path,
set_attribute_value,
get_object_attribute,
get_qualified_type_from_class, get_class_fields,
get_qualified_type_from_class,
get_class_fields,
)
from .manager import get_class_pkg, get_class_pkg_version
from .serialization import (
Expand Down Expand Up @@ -631,32 +636,74 @@ def as_dor(obj_or_identifier: Any, dor_qualified_type: str = "eml23.DataObjectRe
"""
dor = None
if obj_or_identifier is not None:
if isinstance(obj_or_identifier, str): # is an identifier
cls = get_class_from_qualified_type(dor_qualified_type)
dor = cls()
if len(__CACHE_PROP_KIND_DICT__) == 0:
# update the cache to check if it is a
update_prop_kind_dict_cache()
try:
uuid, version = split_identifier(obj_or_identifier)
if uuid in __CACHE_PROP_KIND_DICT__:
return as_dor(__CACHE_PROP_KIND_DICT__[uuid])
else:
set_attribute_from_path(dor, "uuid", uuid)
set_attribute_from_path(dor, "ObjectVersion", version)
except AttributeError:
logging.error(f"Failed to parse identifier {obj_or_identifier}. DOR will be empty")
cls = get_class_from_qualified_type(dor_qualified_type)
dor = cls()
if isinstance(obj_or_identifier, str): # is an identifier or uri
parsed_uri = parse_uri(obj_or_identifier)
if parsed_uri is not None:
if hasattr(dor, "qualified_type"):
set_attribute_from_path(dor, "qualified_type", parsed_uri.get_qualified_type())
if hasattr(dor, "content_type"):
set_attribute_from_path(
dor, "content_type", qualified_type_to_content_type(parsed_uri.get_qualified_type())
)
set_attribute_from_path(dor, "uuid", parsed_uri.uuid)
if hasattr(dor, "object_version"):
set_attribute_from_path(dor, "version_string", parsed_uri.version)
if hasattr(dor, "version_string"):
set_attribute_from_path(dor, "version_string", parsed_uri.version)

else: # identifier
if len(__CACHE_PROP_KIND_DICT__) == 0:
# update the cache to check if it is a
try:
update_prop_kind_dict_cache()
except FileNotFoundError as e:
logging.error(f"Failed to parse propertykind dict {e}")
try:
uuid, version = split_identifier(obj_or_identifier)
if uuid in __CACHE_PROP_KIND_DICT__:
return as_dor(__CACHE_PROP_KIND_DICT__[uuid])
else:
set_attribute_from_path(dor, "uuid", uuid)
set_attribute_from_path(dor, "ObjectVersion", version)
except AttributeError:
logging.error(f"Failed to parse identifier {obj_or_identifier}. DOR will be empty")
else:
cls = get_class_from_qualified_type(dor_qualified_type)
dor = cls()
if hasattr(dor, "qualified_type"):
set_attribute_from_path(dor, "qualified_type", get_qualified_type_from_class(obj_or_identifier))
if hasattr(dor, "content_type"):
set_attribute_from_path(dor, "content_type", get_content_type_from_class(obj_or_identifier))

set_attribute_from_path(dor, "uuid", get_object_attribute(obj_or_identifier, "uuid"))
set_attribute_from_path(dor, "object_version", get_object_attribute(obj_or_identifier, "ObjectVersion"))
set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Citation.Title"))
if is_dor(obj_or_identifier):
# If it is a dor, we create a dor conversionif hasattr(dor, "qualified_type"):
if hasattr(dor, "qualified_type"):
if hasattr(obj_or_identifier, "qualified_type"):
dor.qualified_type = get_object_attribute(obj_or_identifier, "qualified_type")
elif hasattr(obj_or_identifier, "content_type"):
dor.qualified_type = content_type_to_qualified_type(
get_object_attribute(obj_or_identifier, "content_type")
)

if hasattr(dor, "content_type"):
if hasattr(obj_or_identifier, "qualified_type"):
dor.content_type = qualified_type_to_content_type(
get_object_attribute(obj_or_identifier, "qualified_type")
)
elif hasattr(obj_or_identifier, "content_type"):
dor.content_type = get_object_attribute(obj_or_identifier, "content_type")

set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Title"))

else:
if hasattr(dor, "qualified_type"):
set_attribute_from_path(dor, "qualified_type", get_qualified_type_from_class(obj_or_identifier))
if hasattr(dor, "content_type"):
set_attribute_from_path(dor, "content_type", get_content_type_from_class(obj_or_identifier))

set_attribute_from_path(dor, "title", get_object_attribute(obj_or_identifier, "Citation.Title"))

set_attribute_from_path(dor, "uuid", get_obj_uuid(obj_or_identifier))

if hasattr(dor, "object_version"):
set_attribute_from_path(dor, "object_version", get_obj_version(obj_or_identifier))
if hasattr(dor, "version_string"):
set_attribute_from_path(dor, "version_string", get_obj_version(obj_or_identifier))

return dor

Expand Down
41 changes: 34 additions & 7 deletions energyml-utils/src/energyml/utils/introspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re
import sys
import typing
from dataclasses import Field
from dataclasses import Field, field
from enum import Enum
from importlib import import_module
from types import ModuleType
Expand Down Expand Up @@ -311,8 +311,18 @@ def get_class_fields(cls: Union[type, Any]) -> Dict[str, Field]:
try:
return cls.__dataclass_fields__
except AttributeError:
# print(list_function_parameters_with_types(cls.__new__, True))
return list_function_parameters_with_types(cls.__new__, True)
try:
# print(list_function_parameters_with_types(cls.__new__, True))
return list_function_parameters_with_types(cls.__new__, True)
except AttributeError as e:
# For not working types like proxy type for C++ binding
res = {}
for a_name, a_type in inspect.getmembers(cls):
# print(f"{a_name} => {inspect.getmembers(a_type)}")
if not a_name.startswith("_") and not callable(getattr(cls, a_name, None)):
res[a_name] = field()

return res


def get_class_attributes(cls: Union[type, Any]) -> List[str]:
Expand Down Expand Up @@ -529,7 +539,7 @@ def get_object_attribute_advanced(obj: Any, attr_dot_path: str) -> Any:
return value


def get_object_attribute_no_verif(obj: Any, attr_name: str) -> Any:
def get_object_attribute_no_verif(obj: Any, attr_name: str, default: Optional[Any] = None) -> Any:
"""
Return the value of the attribute named after param :param:`attr_name` without verification (may raise an exception
if it doesn't exists).
Expand All @@ -540,11 +550,19 @@ def get_object_attribute_no_verif(obj: Any, attr_name: str) -> Any:
:return:
"""
if isinstance(obj, list):
return obj[int(attr_name)]
if int(attr_name) < len(obj):
return obj[int(attr_name)] or default
else:
raise AttributeError(obj, name=attr_name)
elif isinstance(obj, dict):
return obj[attr_name]
if attr_name in obj:
return obj.get(attr_name, default)
else:
raise AttributeError(obj, name=attr_name)
else:
return getattr(obj, attr_name, None)
return (
getattr(obj, attr_name) or default
) # we did not used the "default" of getattr to keep raising AttributeError


def get_object_attribute_rgx(obj: Any, attr_dot_path_rgx: str) -> Any:
Expand Down Expand Up @@ -599,6 +617,14 @@ def class_match_rgx(
return False


def is_dor(obj: any) -> bool:
return (
"dataobjectreference" in get_obj_type(obj).lower()
or get_object_attribute(obj, "ContentType") is not None
or get_object_attribute(obj, "QualifiedType") is not None
)


def search_attribute_matching_type_with_path(
obj: Any,
type_rgx: str,
Expand Down Expand Up @@ -1016,6 +1042,7 @@ def get_obj_version(obj: Any) -> str:
return get_object_attribute_no_verif(obj, "version_string")
except Exception:
logging.error(f"Error with {type(obj)}")
return None
# raise e


Expand Down
4 changes: 4 additions & 0 deletions energyml-utils/src/energyml/utils/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
import json
import logging
import numpy as np
import traceback
from enum import Enum
from io import BytesIO
Expand Down Expand Up @@ -448,6 +449,9 @@ def _to_json_dict_fn(
"""
if obj is None:
return None
elif isinstance(obj, float) and np.isnan(obj):
print("NaN found")
return None
elif is_enum(obj):
return obj.value
# return {
Expand Down
15 changes: 15 additions & 0 deletions energyml-utils/tests/test_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from src.energyml.utils.constants import content_type_to_qualified_type, qualified_type_to_content_type


def test_content_type_to_qualified_type():
assert (
content_type_to_qualified_type("application/x-resqml+xml;version=2.0;type=obj_FaultInterpretation")
== "resqml20.obj_FaultInterpretation"
)


def test_qualified_type_to_content_type():
assert (
qualified_type_to_content_type("resqml20.obj_FaultInterpretation")
== "application/x-resqml+xml;version=2.0;type=obj_FaultInterpretation"
)
Loading