Skip to content

Commit 0e45d74

Browse files
possibility to add object to an existing epc instance (#10)
* possibility to add object to an existing epc instance * bugfix for vertical Axis * allow python 3.9
1 parent 5e6be2f commit 0e45d74

File tree

9 files changed

+151
-29
lines changed

9 files changed

+151
-29
lines changed

energyml-utils/example/main.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from dataclasses import fields
66

77
from energyml.eml.v2_3.commonv2 import *
8+
from energyml.eml.v2_3.commonv2 import AbstractObject
89
from energyml.resqml.v2_0_1.resqmlv2 import DoubleHdf5Array
910
from energyml.resqml.v2_2.resqmlv2 import (
1011
TriangulatedSetRepresentation,
@@ -14,8 +15,8 @@
1415
AbstractColorMap,
1516
)
1617

17-
from src.energyml.utils.data.hdf import *
18-
from src.energyml.utils.data.helper import get_projected_uom
18+
# from src.energyml.utils.data.hdf import *
19+
from src.energyml.utils.data.helper import get_projected_uom, is_z_reversed
1920
from src.energyml.utils.epc import *
2021
from src.energyml.utils.introspection import *
2122
from src.energyml.utils.manager import *
@@ -27,7 +28,7 @@
2728
correct_dor,
2829
)
2930
from src.energyml.utils.xml import *
30-
from utils.data.datasets_io import HDF5FileReader
31+
from src.energyml.utils.data.datasets_io import HDF5FileReader
3132

3233
fi_cit = Citation(
3334
title="An interpretation",
@@ -375,6 +376,12 @@ def test_local_depth_crs():
375376
print(e)
376377

377378

379+
def test_crs():
380+
from energyml.eml.v2_3.commonv2 import LocalEngineeringCompoundCrs
381+
crs = random_value_from_class(LocalEngineeringCompoundCrs)
382+
print(is_z_reversed(crs))
383+
384+
378385
def test_get_projected_uom():
379386
# Fails because the xsi:type="VerticalCrsEpsgCode" doesn't
380387
# contain the namespace : xsi:type="eml:VerticalCrsEpsgCode"
@@ -468,4 +475,5 @@ def class_field():
468475
# test_obj_attribs()
469476
# test_copy_values()
470477
# class_field()
471-
test_get_projected_uom()
478+
# test_get_projected_uom()
479+
test_crs()

energyml-utils/example/tools.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def csv_to_dataset():
212212
parser = argparse.ArgumentParser()
213213
parser.add_argument("--csv", "-f", type=str, help="Csv file path")
214214
parser.add_argument("--output", "-o", type=str, help="Output file path")
215-
parser.add_argument("--prefix", "-p", type=str, default="", help="Output file path")
215+
parser.add_argument("--prefix", "-p", type=str, default="", help="Dataset path prefix")
216216
parser.add_argument("--csv-delimiter", "-d", type=str, default=",", help="CSV delimiter")
217217
parser.add_argument(
218218
"--mapping",
@@ -227,10 +227,15 @@ def csv_to_dataset():
227227

228228
args = parser.parse_args()
229229

230+
print(args.csv_delimiter)
231+
print(args.mapping_line)
232+
230233
mapping = args.mapping_line or args.mapping
231234
if mapping is not None:
232235
mapping = json.loads(mapping)
233236

237+
print(mapping)
238+
234239
output_file_path = args.output
235240
if output_file_path.lower().endswith(".parquet") or output_file_path.lower().endswith(".pqt"):
236241
csv_to_parquet(

energyml-utils/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "poetry_dynamic_versioning.backend"
66

77
[tool.poetry]
88
name = "energyml-utils"
9-
version = "1.0.1.dev4" # Set at build time
9+
version = "0.0.0" # Set at build time
1010
description = "Energyml helper"
1111
authors = [
1212
"Valentin Gauthier <[email protected]>"
@@ -50,7 +50,7 @@ parquet = ["pyarrow", "numpy", "pandas"]
5050
hdf5 = ["h5py"]
5151

5252
[tool.poetry.dependencies]
53-
python = "^3.10"
53+
python = "^3.9"
5454
xsdata = {version = "^24.0", extras = ["cli", "lxml"]}
5555
energyml-opc = "^1.12.0"
5656
h5py = { version = "^3.7.0", optional = false }

energyml-utils/src/energyml/utils/constants.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106

107107
RGX_XML_HEADER = r"^\s*<\?xml(\s+(encoding\s*=\s*\"(?P<encoding>[^\"]+)\"|version\s*=\s*\"(?P<version>[^\"]+)\"|standalone\s*=\s*\"(?P<standalone>[^\"]+)\"))+" # pylint: disable=C0301
108108

109-
RGX_IDENTIFIER = f"{RGX_UUID}(.(?P<version>\w+)?)?"
109+
RGX_IDENTIFIER = rf"{RGX_UUID}(.(?P<version>\w+)?)?"
110110

111111

112112
# __ ______ ____
@@ -225,23 +225,21 @@ class EPCRelsRelationshipType(Enum):
225225
EXTENDED_CORE_PROPERTIES = "extended-core-properties"
226226

227227
def get_type(self) -> str:
228-
match self:
229-
case EPCRelsRelationshipType.EXTENDED_CORE_PROPERTIES:
230-
return "http://schemas.f2i-consulting.com/package/2014/relationships/" + str(self.value)
231-
case EPCRelsRelationshipType.CORE_PROPERTIES:
232-
return "http://schemas.openxmlformats.org/package/2006/relationships/metadata/" + str(self.value)
233-
case (
234-
EPCRelsRelationshipType.CHUNKED_PART
235-
| EPCRelsRelationshipType.DESTINATION_OBJECT
236-
| EPCRelsRelationshipType.SOURCE_OBJECT
237-
| EPCRelsRelationshipType.ML_TO_EXTERNAL_PART_PROXY
238-
| EPCRelsRelationshipType.EXTERNAL_PART_PROXY_TO_ML
239-
| EPCRelsRelationshipType.EXTERNAL_RESOURCE
240-
| EPCRelsRelationshipType.DestinationMedia
241-
| EPCRelsRelationshipType.SOURCE_MEDIA
242-
| _
243-
):
244-
return "http://schemas.energistics.org/package/2012/relationships/" + str(self.value)
228+
if self == EPCRelsRelationshipType.EXTENDED_CORE_PROPERTIES:
229+
return "http://schemas.f2i-consulting.com/package/2014/relationships/" + str(self.value)
230+
elif EPCRelsRelationshipType.CORE_PROPERTIES:
231+
return "http://schemas.openxmlformats.org/package/2006/relationships/metadata/" + str(self.value)
232+
# elif (
233+
# self == EPCRelsRelationshipType.CHUNKED_PART
234+
# or self == EPCRelsRelationshipType.DESTINATION_OBJECT
235+
# or self == EPCRelsRelationshipType.SOURCE_OBJECT
236+
# or self == EPCRelsRelationshipType.ML_TO_EXTERNAL_PART_PROXY
237+
# or self == EPCRelsRelationshipType.EXTERNAL_PART_PROXY_TO_ML
238+
# or self == EPCRelsRelationshipType.EXTERNAL_RESOURCE
239+
# or self == EPCRelsRelationshipType.DestinationMedia
240+
# or self == EPCRelsRelationshipType.SOURCE_MEDIA
241+
# ):
242+
return "http://schemas.energistics.org/package/2012/relationships/" + str(self.value)
245243

246244

247245
@dataclass
@@ -306,6 +304,8 @@ def parse_content_or_qualified_type(cqt: str) -> Optional[re.Match[str]]:
306304
try:
307305
parsed = parse_content_type(cqt)
308306
except:
307+
pass
308+
if parsed is None:
309309
try:
310310
parsed = parse_qualified_type(cqt)
311311
except:
@@ -335,7 +335,7 @@ def get_domain_version_from_content_or_qualified_type(cqt: str) -> Optional[str]
335335

336336
def split_identifier(identifier: str) -> Tuple[str, Optional[str]]:
337337
match = re.match(RGX_IDENTIFIER, identifier)
338-
return match.group(URI_RGX_GRP_UUID), match.group(URI_RGX_GRP_VERSION),
338+
return (match.group(URI_RGX_GRP_UUID), match.group(URI_RGX_GRP_VERSION), )
339339

340340

341341
def now(time_zone=datetime.timezone.utc) -> float:

energyml-utils/src/energyml/utils/data/helper.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,11 @@ def is_z_reversed(crs: Optional[Any]) -> bool:
9191
# resqml >= 22
9292
vert_axis = search_attribute_matching_name(crs, "VerticalAxis.Direction")
9393
if len(vert_axis) > 0:
94-
reverse_z_values = vert_axis[0].lower() == "down"
94+
vert_axis_str = str(vert_axis[0])
95+
if "." in vert_axis_str:
96+
vert_axis_str = vert_axis_str.split(".")[-1]
97+
98+
reverse_z_values = vert_axis_str.lower() == "down"
9599

96100
return reverse_z_values
97101

energyml-utils/src/energyml/utils/epc.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import datetime
88
import json
99
import logging
10+
import os
11+
import random
1012
import re
1113
import traceback
1214
import zipfile
@@ -43,6 +45,7 @@
4345
from .data.datasets_io import (
4446
read_external_dataset_array,
4547
)
48+
from .exception import UnparsableFile
4649
from .introspection import (
4750
get_class_from_content_type,
4851
get_obj_type,
@@ -69,7 +72,7 @@
6972
serialize_xml,
7073
read_energyml_xml_str,
7174
read_energyml_xml_bytes,
72-
read_energyml_json_str,
75+
read_energyml_json_str, read_energyml_json_bytes, JSON_VERSION,
7376
)
7477
from .workspace import EnergymlWorkspace
7578
from .xml import is_energyml_content_type
@@ -130,6 +133,71 @@ def __str__(self):
130133
# + f"\n{[serialize_json(ar) for ar in self.additional_rels]}"
131134
)
132135

136+
def add_file(self, obj: Union[List, bytes, BytesIO, str, RawFile]):
137+
"""
138+
Add one ore multiple files to the epc file.
139+
For non energyml file, it is better to use the RawFile class.
140+
The input can be a single file content, file path, or a list of them
141+
:param obj:
142+
:return:
143+
"""
144+
if isinstance(obj, list):
145+
for o in obj:
146+
self.add_file(o)
147+
elif isinstance(obj, bytes) or isinstance(obj, BytesIO):
148+
try:
149+
xml_obj = read_energyml_xml_bytes(obj)
150+
self.energyml_objects.append(xml_obj)
151+
except:
152+
try:
153+
if isinstance(obj, BytesIO):
154+
obj.seek(0)
155+
json_obj = read_energyml_json_bytes(obj, json_version=JSON_VERSION.OSDU_OFFICIAL)
156+
self.add_file(json_obj)
157+
except:
158+
# if isinstance(obj, BytesIO):
159+
# obj.seek(0)
160+
# self.add_file(RawFile(path=f"pleaseRenameThisFile_{str(random.random())}", content=obj))
161+
raise UnparsableFile()
162+
elif isinstance(obj, RawFile):
163+
self.raw_files.append(obj)
164+
elif isinstance(obj, str):
165+
# Can be a path or a content
166+
if os.path.exists(obj):
167+
with open(obj, "rb") as f:
168+
file_content = f.read()
169+
f_name = os.path.basename(obj)
170+
_, f_ext = os.path.splitext(f_name)
171+
if f_ext.lower().endswith(".xml") or f_ext.lower().endswith(".json"):
172+
try:
173+
self.add_file(file_content)
174+
except UnparsableFile:
175+
self.add_file(RawFile(f_name, BytesIO(file_content)))
176+
elif not f_ext.lower().endswith(".rels"):
177+
self.add_file(RawFile(f_name, BytesIO(file_content)))
178+
else:
179+
logging.error(f"Not supported file extension {f_name}")
180+
else:
181+
try:
182+
xml_obj = read_energyml_xml_str(obj)
183+
self.energyml_objects.append(xml_obj)
184+
except:
185+
try:
186+
if isinstance(obj, BytesIO):
187+
obj.seek(0)
188+
json_obj = read_energyml_json_str(obj, json_version=JSON_VERSION.OSDU_OFFICIAL)
189+
self.add_file(json_obj)
190+
except:
191+
if isinstance(obj, BytesIO):
192+
obj.seek(0)
193+
self.add_file(RawFile(path=f"pleaseRenameThisFile_{str(random.random())}.txt", content=obj))
194+
elif str(type(obj).__module__).startswith("energyml."):
195+
# We should test "energyml.(resqml|witsml|prodml|eml|common)" but I didn't to avoid issues if
196+
# another specific package comes in the future
197+
self.energyml_objects.append(obj)
198+
else:
199+
logging.error(f"unsupported type {str(type(obj))}")
200+
133201
# EXPORT functions
134202

135203
def gen_opc_content_type(self) -> Types:

energyml-utils/src/energyml/utils/exception.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,8 @@ def __init__(self, qt: Optional[str] = None):
3434
class NotParsableType(Exception):
3535
def __init__(self, t: Optional[str] = None):
3636
super().__init__(f"type: {t}")
37+
38+
39+
class UnparsableFile(Exception):
40+
def __init__(self, t: Optional[str] = None):
41+
super().__init__(f"File is not parsable for an EPC file. Please use RawFile class for non energyml files.")

energyml-utils/src/energyml/utils/introspection.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1105,13 +1105,42 @@ def get_qualified_type_from_class(cls: Union[type, Any], print_dev_version=True)
11051105
)
11061106

11071107

1108-
def get_object_uri(obj: any, dataspace: Optional[str] = None) -> Uri:
1108+
def get_object_uri(obj: any, dataspace: Optional[str] = None) -> Optional[Uri]:
11091109
""" Returns an ETP URI """
11101110
return parse_uri(
11111111
f"eml:///dataspace('{dataspace or ''}')/{get_qualified_type_from_class(obj)}({get_obj_uuid(obj)})"
11121112
)
11131113

11141114

1115+
def dor_to_uris(dor: Any, dataspace: Optional[str] = None) -> Optional[Uri]:
1116+
"""
1117+
Transform a DOR into an etp uri
1118+
"""
1119+
result = None
1120+
try:
1121+
value = get_object_attribute_no_verif(dor, "qualified_type")
1122+
result = parse_qualified_type(value)
1123+
except Exception as e:
1124+
print(e)
1125+
try:
1126+
value = get_object_attribute_no_verif(dor, "content_type")
1127+
result = parse_content_type(value)
1128+
except Exception as e2:
1129+
print(e2)
1130+
1131+
if result is None:
1132+
return None
1133+
1134+
return Uri(
1135+
dataspace=dataspace,
1136+
domain=result.group("domain"),
1137+
domain_version=result.group("domainVersion"),
1138+
object_type=result.group("type"),
1139+
uuid=dor.uuid,
1140+
version=dor.object_version,
1141+
)
1142+
1143+
11151144
def get_content_type_from_class(cls: Union[type, Any], print_dev_version=True, nb_max_version_digits=2):
11161145
if not isinstance(cls, type):
11171146
cls = type(cls)

energyml-utils/src/energyml/utils/uri.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ def is_object_uri(self):
5757
and self.uuid is not None
5858
)
5959

60+
def get_qualified_type(self):
61+
return f"{self.domain}{self.domain_version}.{self.object_type}"
62+
6063
def __str__(self):
6164
res = "eml:///"
6265
if self.dataspace is not None and len(self.dataspace) > 0:

0 commit comments

Comments
 (0)