Skip to content

Commit 2c79f35

Browse files
committed
add support for OGC TrainingDML-AI
1 parent 91c70cf commit 2c79f35

File tree

6 files changed

+606
-6
lines changed

6 files changed

+606
-6
lines changed

pygeometa/schemas/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,14 @@
5353
THISDIR = os.path.dirname(os.path.realpath(__file__))
5454

5555
SCHEMAS = {
56+
'dcat': 'pygeometa.schemas.dcat.DCATOutputSchema',
5657
'iso19139': 'pygeometa.schemas.iso19139.ISO19139OutputSchema',
5758
'iso19139-2': 'pygeometa.schemas.iso19139_2.ISO19139_2OutputSchema',
5859
'iso19139-hnap': 'pygeometa.schemas.iso19139_hnap.ISO19139HNAPOutputSchema', # noqa
5960
'oarec-record': 'pygeometa.schemas.ogcapi_records.OGCAPIRecordOutputSchema', # noqa
6061
'stac-item': 'pygeometa.schemas.stac.STACItemOutputSchema',
61-
'dcat': 'pygeometa.schemas.dcat.DCATOutputSchema',
62+
'tdml-ai': 'pygeometa.schemas.tdml_ai.TDML_AIOutputSchema',
63+
'tdml-ai-oarec': 'pygeometa.schemas.tdml_ai_oarec.TDML_AIOARecOutputSchema', # noqa
6264
'wmo-cmp': 'pygeometa.schemas.wmo_cmp.WMOCMPOutputSchema',
6365
'wmo-wcmp2': 'pygeometa.schemas.wmo_wcmp2.WMOWCMP2OutputSchema',
6466
'wmo-wigos': 'pygeometa.schemas.wmo_wigos.WMOWIGOSOutputSchema'

pygeometa/schemas/ogcapi_records/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,7 @@ def write(self, mcf: dict, stringify: str = True) -> Union[dict, str]:
125125
}
126126

127127
LOGGER.debug('Checking for temporal')
128-
if all(['temporal' in mcf['identification']['extents'],
129-
mcf['identification']['extents']['temporal'] != [{}]]):
130-
128+
try:
131129
begin = mcf['identification']['extents']['temporal'][0]['begin']
132130
end = mcf['identification']['extents']['temporal'][0].get('end')
133131

@@ -151,6 +149,9 @@ def write(self, mcf: dict, stringify: str = True) -> Union[dict, str]:
151149
if 'resolution' in mcf['identification']['extents']['temporal'][0]: # noqa
152150
record['time']['resolution'] = mcf['identification']['extents']['temporal'][0]['resolution'] # noqa
153151

152+
except (IndexError, KeyError):
153+
record['time'] = None
154+
154155
LOGGER.debug('Checking for dates')
155156
if 'dates' in mcf['identification']:
156157
if 'creation' in mcf['identification']['dates']:

pygeometa/schemas/tdml_ai/__init__.py

Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
# =================================================================
2+
#
3+
# Terms and Conditions of Use
4+
#
5+
# Unless otherwise noted, computer program source code of this
6+
# distribution # is covered under Crown Copyright, Government of
7+
# Canada, and is distributed under the MIT License.
8+
#
9+
# The Canada wordmark and related graphics associated with this
10+
# distribution are protected under trademark law and copyright law.
11+
# No permission is granted to use them outside the parameters of
12+
# the Government of Canada's corporate identity program. For
13+
# more information, see
14+
# http://www.tbs-sct.gc.ca/fip-pcim/index-eng.asp
15+
#
16+
# Copyright title to all 3rd party software distributed with this
17+
# software is held by the respective copyright holders as noted in
18+
# those files. Users are asked to read the 3rd Party Licenses
19+
# referenced with those assets.
20+
#
21+
# Copyright (c) 2024 Tom Kralidis
22+
#
23+
# Permission is hereby granted, free of charge, to any person
24+
# obtaining a copy of this software and associated documentation
25+
# files (the "Software"), to deal in the Software without
26+
# restriction, including without limitation the rights to use,
27+
# copy, modify, merge, publish, distribute, sublicense, and/or sell
28+
# copies of the Software, and to permit persons to whom the
29+
# Software is furnished to do so, subject to the following
30+
# conditions:
31+
#
32+
# The above copyright notice and this permission notice shall be
33+
# included in all copies or substantial portions of the Software.
34+
#
35+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
37+
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
39+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
40+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
41+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
42+
# OTHER DEALINGS IN THE SOFTWARE.
43+
#
44+
# =================================================================
45+
46+
import json
47+
import logging
48+
import os
49+
from typing import Union
50+
51+
from pygeometa.core import get_charstring
52+
from pygeometa.helpers import json_serial
53+
from pygeometa.schemas.base import BaseOutputSchema
54+
55+
THISDIR = os.path.dirname(os.path.realpath(__file__))
56+
57+
LOGGER = logging.getLogger(__name__)
58+
59+
60+
class TDML_AIOutputSchema(BaseOutputSchema):
61+
"""OGC Training Data Markup Language for Artificial Intelligence"""
62+
63+
def __init__(self):
64+
"""
65+
Initialize object
66+
67+
:returns: pygeometa.schemas.base.BaseOutputSchema
68+
"""
69+
70+
description = 'OGC Training Data Markup Language for Artificial Intelligence' # noqa
71+
72+
super().__init__('tdml-ai', description, 'json', THISDIR)
73+
74+
def write(self, mcf: dict, stringify: str = True) -> Union[dict, str]:
75+
"""
76+
Write outputschema to JSON string buffer
77+
78+
:param mcf: dict of MCF content model
79+
:param stringify: whether to return a string representation (default)
80+
else native (dict, etree)
81+
82+
83+
:returns: `dict` or `str` of MCF as an OARec record representation
84+
"""
85+
86+
self.lang1 = mcf['metadata'].get('language')
87+
self.lang2 = mcf['metadata'].get('language_alternate')
88+
89+
minx, miny, maxx, maxy = (mcf['identification']['extents']
90+
['spatial'][0]['bbox'])
91+
92+
title = get_charstring(mcf['identification'].get('title'),
93+
self.lang1, self.lang2)
94+
95+
description = get_charstring(mcf['identification'].get('abstract'),
96+
self.lang1, self.lang2)
97+
98+
dataset = {
99+
'version': '1.0',
100+
'id': mcf['metadata']['identifier'],
101+
'type': 'AI_EOTrainingDataset',
102+
'name': title[0],
103+
'description': description[0],
104+
'extent': {
105+
'geographicElement': {
106+
'geographicBoundingBox': {
107+
'westBoundLongitude': minx,
108+
'eastBoundLongitude': maxx,
109+
'southBoundLatitude': miny,
110+
'northBoundLatitude': maxy
111+
}
112+
}
113+
}
114+
}
115+
116+
LOGGER.debug('Checking for temporal')
117+
try:
118+
begin = mcf['identification']['extents']['temporal'][0]['begin']
119+
end = mcf['identification']['extents']['temporal'][0].get('end')
120+
121+
if begin in ['now', 'None', None]:
122+
begin = None
123+
124+
if end in ['now', 'None', None]:
125+
end = None
126+
127+
if [begin, end] == [None, None]:
128+
pass
129+
130+
else:
131+
dataset['extent']['temporalElement'] = {'TimePeriod': {}}
132+
for pos in [[begin, 'beginPosition'], ['end', 'endPosition']]:
133+
if pos[0] is not None:
134+
dataset['extent']['temporalElement']['TimePeriod'][pos[1]] = pos[0] # noqa
135+
136+
except (IndexError, KeyError):
137+
pass
138+
139+
dataset['license'] = mcf['identification']['license']['name']
140+
141+
LOGGER.debug('Checking for dates')
142+
if 'dates' in mcf['identification']:
143+
if 'creation' in mcf['identification']['dates']:
144+
dataset['createdTime'] = str(mcf['identification']['dates']['creation']) # noqa
145+
if 'revision' in mcf['identification']['dates']:
146+
dataset['updatedTime'] = str(mcf['identification']['dates']['revision']) # noqa
147+
148+
LOGGER.debug('Checking for contacts')
149+
dataset['providers'] = self.generate_providers(mcf['contact'])
150+
151+
LOGGER.debug('Checking for tasks')
152+
dataset['tasks'] = self.generate_tasks(mcf['tasks'])
153+
154+
LOGGER.debug('Checking for classes')
155+
dataset['classes'] = self.generate_classes(mcf['classes'])
156+
dataset['numberOfClasses'] = len(dataset['classes'])
157+
158+
LOGGER.debug('Checking for bands')
159+
dataset['variables'] = self.generate_variables(mcf['attributes'])
160+
161+
LOGGER.debug('Checking for doi')
162+
if 'doi' in mcf['identification']:
163+
dataset['doi'] = mcf['identification']['doi']
164+
165+
all_keywords = []
166+
167+
LOGGER.debug('Checking for keywords')
168+
for key, value in mcf['identification']['keywords'].items():
169+
keywords = get_charstring(value.get('keywords'), self.lang1,
170+
self.lang2)
171+
172+
for kw in keywords[0]:
173+
all_keywords.append(kw)
174+
175+
if all_keywords:
176+
dataset['keywords'] = all_keywords
177+
178+
LOGGER.debug('Checking for data')
179+
dataset['data'] = self.generate_data(mcf['training-data'])
180+
181+
if stringify:
182+
return json.dumps(dataset, default=json_serial, indent=4)
183+
return dataset
184+
185+
def generate_variables(self, attributes: list) -> list:
186+
"""
187+
Generates 1..n tasks
188+
189+
:param contact: `list` of attributes
190+
191+
:returns: `list` of variable objects
192+
"""
193+
194+
variables = []
195+
196+
for attribute in attributes:
197+
variable = {
198+
'name': attribute['name'],
199+
}
200+
if 'units' in attribute:
201+
variable['unit'] = attribute['units']
202+
if 'abstract' in attribute:
203+
variable['description'] = attribute['abstract']
204+
205+
variables.append(variable)
206+
207+
return variables
208+
209+
def generate_classes(self, classes: list) -> list:
210+
"""
211+
Generates 1..n tasks
212+
213+
:param contact: `list` of classes
214+
215+
:returns: `list` of class objects
216+
"""
217+
218+
classes_ = []
219+
220+
for count, value in enumerate(classes):
221+
classes_.append({
222+
'key': value,
223+
'value': count
224+
})
225+
226+
return classes_
227+
228+
def generate_tasks(self, tasks: dict) -> list:
229+
"""
230+
Generates 1..n tasks
231+
232+
:param contact: `dict` of tasks
233+
234+
:returns: `list` of tasks
235+
"""
236+
237+
tasks_ = []
238+
239+
for key, value in tasks.items():
240+
tasks_.append({
241+
'id': key,
242+
'type:': 'AI_EOTask',
243+
'description': value['description'],
244+
'taskType': value['type']
245+
})
246+
247+
return tasks_
248+
249+
def generate_providers(self, contact: dict) -> list:
250+
"""
251+
Generates 1..n providers
252+
253+
:param contact: `dict` of contacts
254+
255+
:returns: `list` of providers
256+
"""
257+
258+
providers = []
259+
260+
for key, value in contact.items():
261+
providers.append(value['organization'])
262+
263+
return providers
264+
265+
def generate_data(self, training_data: dict) -> dict:
266+
"""
267+
Generates training data objects from MCF training-data object
268+
269+
:param training_data: `dict` of MCF training-data
270+
271+
:returns: `list` of training data objects
272+
"""
273+
274+
datas = []
275+
276+
for key, value in training_data.items():
277+
data = {
278+
'type': 'AI_EO_TrainingData',
279+
'id': key,
280+
'dataURL': [value['url']],
281+
'labels': []
282+
}
283+
for label in value['labels']:
284+
data['labels'].append({
285+
'type': f"AI_{label['type']}Label",
286+
f"{label['type']}LabelURL": label['url'],
287+
f"{label['type']}LabelField": label['field'],
288+
})
289+
290+
datas.append(data)
291+
292+
return datas

0 commit comments

Comments
 (0)