Skip to content

Commit 6113561

Browse files
Dev 03 25 (#11)
* Bugfix in introspection.py.
1 parent 0e45d74 commit 6113561

File tree

11 files changed

+187
-113
lines changed

11 files changed

+187
-113
lines changed

energyml-utils/.flake8

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[flake8]
2+
# Ignore specific error codes (comma-separated list)
3+
ignore = E501, E722 #, W503, F403
4+
5+
# Max line length (default is 79, can be changed)
6+
max-line-length = 120
7+
8+
# Set the allowed complexity for functions (default is 10)
9+
complexity = 15
10+
11+
# Exclude certain files and directories
12+
exclude =
13+
.git,
14+
__pycache__,
15+
old_version_files,
16+
migrations

energyml-utils/.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,8 @@ manip*
5353
*.zip
5454

5555
*.xml
56-
*.json
56+
*.json
57+
58+
59+
# WIP
60+
src/energyml/utils/wip*

energyml-utils/example/main.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ def tests_0():
104104
print("==>", get_object_attribute(adict, "a"))
105105
print("==>", get_object_attribute(tr, "citation.title"))
106106

107-
print(re.split(r"(?<!\\)\.+", "[Cc]itation.[Tt]it\.*"))
108-
print("==>", get_object_attribute_rgx(fi, "[Cc]itation.[Tt]it\.*"))
107+
print(re.split(r"(?<!\\)\.+", r"[Cc]itation.[Tt]it\.*"))
108+
print("==>", get_object_attribute_rgx(fi, r"[Cc]itation.[Tt]it\.*"))
109109

110110
# print("==>", type(cit), type(Citation))
111111
# print("==>", type(cit) == type, type(Citation) == type)
@@ -458,6 +458,12 @@ def class_field():
458458
# print(create_external_part_reference("2.0", "myfile.h5"))
459459

460460

461+
def test_dor_conversion():
462+
463+
print(serialize_json(dor_correct))
464+
print(serialize_json(as_dor(dor_correct, "eml20.DataObjectReference")))
465+
466+
461467
if __name__ == "__main__":
462468
# tests_0()
463469
# tests_content_type()
@@ -476,4 +482,5 @@ def class_field():
476482
# test_copy_values()
477483
# class_field()
478484
# test_get_projected_uom()
479-
test_crs()
485+
# test_crs()
486+
test_dor_conversion()

energyml-utils/example/tools.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ def generate_data():
266266
default="energyml.resqml.v2_2.resqmlv2.TriangulatedSetRepresentation",
267267
help="Object type (e.g. energyml.resqml.v2_2.resqmlv2.TriangulatedSetRepresentation)",
268268
)
269+
269270
parser.add_argument(
270271
"--file-format",
271272
"-ff",

energyml-utils/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ h5py = { version = "^3.7.0", optional = false }
5757
pyarrow = { version = "^14.0.1", optional = false }
5858
numpy = { version = "^1.16.6", optional = false }
5959

60-
[tool.poetry.dev-dependencies]
60+
[poetry.group.dev.dependencies]
6161
pandas = { version = "^1.1.0", optional = false }
6262
coverage = {extras = ["toml"], version = "^6.2"}
6363
pytest = "^8.1.1"

energyml-utils/src/energyml/utils/constants.py

Lines changed: 69 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -72,37 +72,29 @@
7272
RGX_CT_TOKEN_TYPE = r"type=(?P<type>[\w\_]+)"
7373

7474
RGX_CONTENT_TYPE = (
75-
RGX_MIME_TYPE_MEDIA
76-
+ "/"
77-
+ "(?P<rawDomain>("
78-
+ RGX_CT_ENERGYML_DOMAIN
79-
+ ")|("
80-
+ RGX_CT_XML_DOMAIN
81-
+ r")|([\w-]+\.?)+)"
82-
+ "(;(("
83-
+ RGX_CT_TOKEN_VERSION
84-
+ ")|("
85-
+ RGX_CT_TOKEN_TYPE
86-
+ ")))*"
87-
)
88-
RGX_QUALIFIED_TYPE = (
89-
r"(?P<domain>[a-zA-Z]+)" + RGX_DOMAIN_VERSION_FLAT + r"\.(?P<type>[\w_]+)"
75+
RGX_MIME_TYPE_MEDIA
76+
+ "/"
77+
+ "(?P<rawDomain>("
78+
+ RGX_CT_ENERGYML_DOMAIN
79+
+ ")|("
80+
+ RGX_CT_XML_DOMAIN
81+
+ r")|([\w-]+\.?)+)"
82+
+ "(;(("
83+
+ RGX_CT_TOKEN_VERSION
84+
+ ")|("
85+
+ RGX_CT_TOKEN_TYPE
86+
+ ")))*"
9087
)
88+
RGX_QUALIFIED_TYPE = r"(?P<domain>[a-zA-Z]+)" + RGX_DOMAIN_VERSION_FLAT + r"\.(?P<type>[\w_]+)"
9189
# =========
9290

9391
RGX_SCHEMA_VERSION = (
94-
r"(?P<name>[eE]ml|[cC]ommon|[rR]esqml|[wW]itsml|[pP]rodml|[oO]pc)?\s*v?"
95-
+ RGX_DOMAIN_VERSION
96-
+ r"\s*$"
92+
r"(?P<name>[eE]ml|[cC]ommon|[rR]esqml|[wW]itsml|[pP]rodml|[oO]pc)?\s*v?" + RGX_DOMAIN_VERSION + r"\s*$"
9793
)
9894

9995
RGX_ENERGYML_FILE_NAME_OLD = r"(?P<type>[\w]+)_" + RGX_UUID_NO_GRP + r"\.xml$"
100-
RGX_ENERGYML_FILE_NAME_NEW = (
101-
RGX_UUID_NO_GRP + r"\.(?P<objectVersion>\d+(\.\d+)*)\.xml$"
102-
)
103-
RGX_ENERGYML_FILE_NAME = (
104-
rf"^(.*/)?({RGX_ENERGYML_FILE_NAME_OLD})|({RGX_ENERGYML_FILE_NAME_NEW})"
105-
)
96+
RGX_ENERGYML_FILE_NAME_NEW = RGX_UUID_NO_GRP + r"\.(?P<objectVersion>\d+(\.\d+)*)\.xml$"
97+
RGX_ENERGYML_FILE_NAME = rf"^(.*/)?({RGX_ENERGYML_FILE_NAME_OLD})|({RGX_ENERGYML_FILE_NAME_NEW})"
10698

10799
RGX_XML_HEADER = r"^\s*<\?xml(\s+(encoding\s*=\s*\"(?P<encoding>[^\"]+)\"|version\s*=\s*\"(?P<version>[^\"]+)\"|standalone\s*=\s*\"(?P<standalone>[^\"]+)\"))+" # pylint: disable=C0301
108100

@@ -128,47 +120,43 @@
128120
URI_RGX_GRP_QUERY = "query"
129121

130122
# Patterns
131-
_URI_RGX_PKG_NAME = "|".join(
132-
ENERGYML_NAMESPACES.keys()
133-
) # "[a-zA-Z]+\w+" //witsml|resqml|prodml|eml
123+
_URI_RGX_PKG_NAME = "|".join(ENERGYML_NAMESPACES.keys()) # "[a-zA-Z]+\w+" //witsml|resqml|prodml|eml
134124
URI_RGX = (
135-
r"^eml:\/\/\/(?:dataspace\('(?P<"
136-
+ URI_RGX_GRP_DATASPACE
137-
+ r">[^']*?(?:''[^']*?)*)'\)\/?)?((?P<"
138-
+ URI_RGX_GRP_DOMAIN
139-
+ r">"
140-
+ _URI_RGX_PKG_NAME
141-
+ r")(?P<"
142-
+ URI_RGX_GRP_DOMAIN_VERSION
143-
+ r">[1-9]\d)\.(?P<"
144-
+ URI_RGX_GRP_OBJECT_TYPE
145-
+ r">\w+)(\((?:(?P<"
146-
+ URI_RGX_GRP_UUID
147-
+ r">(uuid=)?"
148-
+ RGX_UUID_NO_GRP
149-
+ r")|uuid=(?P<"
150-
+ URI_RGX_GRP_UUID2
151-
+ r">"
152-
+ RGX_UUID_NO_GRP
153-
+ r"),\s*version='(?P<"
154-
+ URI_RGX_GRP_VERSION
155-
+ r">[^']*?(?:''[^']*?)*)')\))?)?(\/(?P<"
156-
+ URI_RGX_GRP_COLLECTION_DOMAIN
157-
+ r">"
158-
+ _URI_RGX_PKG_NAME
159-
+ r")(?P<"
160-
+ URI_RGX_GRP_COLLECTION_DOMAIN_VERSION
161-
+ r">[1-9]\d)\.(?P<"
162-
+ URI_RGX_GRP_COLLECTION_TYPE
163-
+ r">\w+))?(?:\?(?P<"
164-
+ URI_RGX_GRP_QUERY
165-
+ r">[^#]+))?$"
125+
r"^eml:\/\/\/(?:dataspace\('(?P<"
126+
+ URI_RGX_GRP_DATASPACE
127+
+ r">[^']*?(?:''[^']*?)*)'\)\/?)?((?P<"
128+
+ URI_RGX_GRP_DOMAIN
129+
+ r">"
130+
+ _URI_RGX_PKG_NAME
131+
+ r")(?P<"
132+
+ URI_RGX_GRP_DOMAIN_VERSION
133+
+ r">[1-9]\d)\.(?P<"
134+
+ URI_RGX_GRP_OBJECT_TYPE
135+
+ r">\w+)(\((?:(?P<"
136+
+ URI_RGX_GRP_UUID
137+
+ r">(uuid=)?"
138+
+ RGX_UUID_NO_GRP
139+
+ r")|uuid=(?P<"
140+
+ URI_RGX_GRP_UUID2
141+
+ r">"
142+
+ RGX_UUID_NO_GRP
143+
+ r"),\s*version='(?P<"
144+
+ URI_RGX_GRP_VERSION
145+
+ r">[^']*?(?:''[^']*?)*)')\))?)?(\/(?P<"
146+
+ URI_RGX_GRP_COLLECTION_DOMAIN
147+
+ r">"
148+
+ _URI_RGX_PKG_NAME
149+
+ r")(?P<"
150+
+ URI_RGX_GRP_COLLECTION_DOMAIN_VERSION
151+
+ r">[1-9]\d)\.(?P<"
152+
+ URI_RGX_GRP_COLLECTION_TYPE
153+
+ r">\w+))?(?:\?(?P<"
154+
+ URI_RGX_GRP_QUERY
155+
+ r">[^#]+))?$"
166156
)
167157

168158
# ================================
169-
RELS_CONTENT_TYPE = (
170-
"application/vnd.openxmlformats-package.core-properties+xml"
171-
)
159+
RELS_CONTENT_TYPE = "application/vnd.openxmlformats-package.core-properties+xml"
172160
RELS_FOLDER_NAME = "_rels"
173161

174162
primitives = (bool, str, int, float, type(None))
@@ -179,6 +167,7 @@
179167

180168
class MimeType(Enum):
181169
"""Some mime types"""
170+
182171
CSV = "text/csv"
183172
HDF5 = "application/x-hdf5"
184173
PARQUET = "application/x-parquet"
@@ -199,7 +188,7 @@ class EpcExportVersion(Enum):
199188

200189

201190
class EPCRelsRelationshipType(Enum):
202-
""" Rels relationship types """
191+
"""Rels relationship types"""
203192

204193
#: The object in Target is the destination of the relationship.
205194
DESTINATION_OBJECT = "destinationObject"
@@ -244,10 +233,12 @@ def get_type(self) -> str:
244233

245234
@dataclass
246235
class RawFile:
247-
""" A class for a non energyml file to be stored in an EPC file """
236+
"""A class for a non energyml file to be stored in an EPC file"""
237+
248238
path: str = field(default="_")
249239
content: BytesIO = field(default=None)
250240

241+
251242
# ______ __ _
252243
# / ____/_ ______ _____/ /_(_)___ ____ _____
253244
# / /_ / / / / __ \/ ___/ __/ / __ \/ __ \/ ___/
@@ -335,7 +326,10 @@ def get_domain_version_from_content_or_qualified_type(cqt: str) -> Optional[str]
335326

336327
def split_identifier(identifier: str) -> Tuple[str, Optional[str]]:
337328
match = re.match(RGX_IDENTIFIER, identifier)
338-
return (match.group(URI_RGX_GRP_UUID), match.group(URI_RGX_GRP_VERSION), )
329+
return (
330+
match.group(URI_RGX_GRP_UUID),
331+
match.group(URI_RGX_GRP_VERSION),
332+
)
339333

340334

341335
def now(time_zone=datetime.timezone.utc) -> float:
@@ -356,12 +350,10 @@ def date_to_epoch(date: str) -> int:
356350

357351

358352
def epoch_to_date(
359-
epoch_value: int,
353+
epoch_value: int,
360354
) -> str:
361355
date = datetime.datetime.fromtimestamp(epoch_value, datetime.timezone.utc)
362-
return date.astimezone(datetime.timezone.utc).strftime(
363-
"%Y-%m-%dT%H:%M:%SZ"
364-
)
356+
return date.astimezone(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
365357
# date = datetime.datetime.fromtimestamp(epoch_value, datetime.timezone.utc)
366358
# return date.astimezone(datetime.timezone(datetime.timedelta(hours=0), "UTC")).strftime('%Y-%m-%dT%H:%M:%SZ')
367359
# return date.strftime("%Y-%m-%dT%H:%M:%SZ%z")
@@ -379,9 +371,9 @@ def mime_type_to_file_extension(mime_type: str) -> Optional[str]:
379371
if mime_type is not None:
380372
mime_type_lw = mime_type.lower()
381373
if (
382-
mime_type_lw == "application/x-parquet"
383-
or mime_type_lw == "application/parquet"
384-
or mime_type_lw == "application/vnd.apache.parquet"
374+
mime_type_lw == "application/x-parquet"
375+
or mime_type_lw == "application/parquet"
376+
or mime_type_lw == "application/vnd.apache.parquet"
385377
):
386378
return "parquet"
387379
elif mime_type_lw == "application/x-hdf5":
@@ -419,7 +411,10 @@ def _get_property_kind_dict_path_as_str(file_type: str = "xml") -> str:
419411
try:
420412
import energyml.utils.rc as RC
421413
except:
422-
import src.energyml.utils.rc as RC
414+
try:
415+
import src.energyml.utils.rc as RC
416+
except:
417+
import utils.rc as RC
423418
return files(RC).joinpath(f"PropertyKindDictionary_v2.3.{file_type.lower()}").read_text(encoding="utf-8")
424419

425420

@@ -452,3 +447,5 @@ def get_property_kind_dict_path_as_xml() -> str:
452447
print(path_iter(".Citation.Title.Coucou"))
453448
print(path_iter(".Citation.Ti\\.*.Coucou"))
454449

450+
print(URI_RGX)
451+
print(RGX_UUID_NO_GRP)

energyml-utils/src/energyml/utils/data/datasets_io.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def write_array(
9393
):
9494
if isinstance(array, list):
9595
array = np.asarray(array)
96+
print("writing array", target)
9697
with h5py.File(target, "a") as f:
9798
# print(array.dtype, h5py.string_dtype(), array.dtype == 'O')
9899
# print("\t", dtype or (h5py.string_dtype() if array.dtype == '0' else array.dtype))
@@ -175,9 +176,7 @@ def write_array(
175176

176177
@dataclass
177178
class ParquetFileReader:
178-
def read_array(
179-
self, source: Union[BytesIO, str], path_in_external_file: Optional[str] = None
180-
) -> None:
179+
def read_array(self, source: Union[BytesIO, str], path_in_external_file: Optional[str] = None) -> None:
181180
raise MissingExtraInstallation(extra_name="parquet")
182181

183182
def get_array_dimension(self, source: Union[BytesIO, str], path_in_external_file: str) -> Optional[List[Any]]:
@@ -254,7 +253,7 @@ def read_array(
254253
logging.debug("cst", _cst)
255254

256255
max_line_number = 0
257-
for (_, n, _) in items:
256+
for _, n, _ in items:
258257
if n > max_line_number:
259258
max_line_number = n
260259

0 commit comments

Comments
 (0)