Skip to content

Commit d3b13dc

Browse files
author
hugo.roussaffa
committed
feat(metadata) : get last
fix flakes
1 parent 955ccc7 commit d3b13dc

File tree

10 files changed

+153
-351
lines changed

10 files changed

+153
-351
lines changed

examples/create_indicator.ipynb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@
119119
"source": [
120120
"client = settings.getDaskClient()\n",
121121
"configFile = settings.initializeBilboProject('.env')\n",
122-
"\n",
123-
"client.run(settings.initializeWorkers, configFile)\n",
124122
"\n"
125123
]
126124
},

oeilnc_config/metadata.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import yaml
21
import pandas as pd
32
from datetime import datetime
43
from oeilnc_utils.connection import getEngine
@@ -7,7 +6,8 @@
76
import json
87
import importlib.metadata
98

10-
DB_META_SCHEMA = 'processing'
9+
DB_META_SCHEMA = "processing"
10+
TABLE_NAME = "processing_metadata"
1111

1212

1313
class ProcessingMetadata:
@@ -237,17 +237,44 @@ def from_config(self, config):
237237
pass
238238

239239
def get_metadata_by_id(self, id):
240-
table = "bilbo.processing_metadata"
241-
requete_sql = f"SELECT * FROM {table} WHERE id='{id}'"
240+
table = f"{DB_META_SCHEMA}.{TABLE_NAME}"
241+
requete_sql = f"SELECT * FROM {table} WHERE id ='{id}'"
242242
print(requete_sql)
243243
df = pd.read_sql_query(requete_sql, self.engine)
244244
return df
245245

246246
def get_metadata_by_run_id(self, run_id):
247-
table = "bilbo.processing_metadata"
248-
requete_sql = f"SELECT * FROM {table} WHERE run_id='{run_id}'"
247+
table = f"{DB_META_SCHEMA}.{TABLE_NAME}"
248+
requete_sql = f"SELECT * FROM {table} WHERE run_id ='{run_id}'"
249249
print(requete_sql)
250250
df = pd.read_sql_query(requete_sql, self.engine)
251+
return df
252+
253+
def get_all(self):
254+
table = f"{DB_META_SCHEMA}.{TABLE_NAME}"
255+
requete_sql = f"SELECT * FROM {table}"
256+
print(requete_sql)
257+
df = pd.read_sql_query(requete_sql, self.engine)
258+
return df
259+
260+
def get_all_by_conf_property(self, conf, prop, value):
261+
_df = self.get_all()
262+
df_non_null = _df[_df[conf].apply(lambda x: x is not None)]
263+
filtered_df = df_non_null[df_non_null[conf].apply(lambda x: json.loads(x).get(prop) == value)]
264+
265+
return filtered_df
266+
267+
def get_last_from_zoi_config(self):
268+
conf = "zoi_config"
269+
prop = "dataName"
270+
value= self.zoi_config.get(prop)
271+
272+
df = self.get_all_by_conf_property(conf, prop, value)
273+
274+
275+
276+
277+
251278
return df
252279

253280
def insert_metadata(self):

oeilnc_utils/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# geoindicatorproject/db/__init__.py
2-
from . import connection, catalog, dataframe, geometry, raster
2+
from . import connection, catalog, dataframe, file_management, geometry, raster

oeilnc_utils/catalog.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import os
21
import yaml
3-
import intake
42
import logging
53

4+
logging.info("Utils - Catalog Imported")
5+
66
def create_yaml_intake_catalog_from_dict(dict: dict, file_name="tmp"):
77
logging.info(f"{dict}")
88
path = f"{file_name}.yaml"
@@ -11,5 +11,3 @@ def create_yaml_intake_catalog_from_dict(dict: dict, file_name="tmp"):
1111
yaml.dump(dict, f)
1212

1313
return path
14-
15-

oeilnc_utils/connection.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
11
import os
22
import logging
3-
from oeilnc_config import settings
4-
53
from sqlalchemy import create_engine, Engine
64
from geopandas import GeoDataFrame, GeoSeries
75
from pandas import DataFrame, Series
86
from shapely.geometry import Polygon,MultiPolygon
9-
10-
import dask
11-
127
from intake import entry
138
from intake import open_catalog
149

oeilnc_utils/file_management.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import os
2+
import logging
3+
4+
logging.info("Utils - File management Imported")
5+
6+
7+
8+
def exist_file(chemin_fichier):
9+
"""
10+
Vérifie si un fichier existe à l'emplacement spécifié.
11+
12+
Args:
13+
chemin_fichier (str): Le chemin complet du fichier à vérifier.
14+
15+
Returns:
16+
bool: True si le fichier existe, False sinon.
17+
"""
18+
return os.path.exists(chemin_fichier)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "bilbo-packages"
3-
version = "0.1.5"
3+
version = "0.1.6"
44
description = "Package permettant de générer des indicateurs à partir de différentes sources de données (Google Earth Engine, ...)."
55
authors = ["Clément Niot <[email protected]>", "Hugo Roussaffa <[email protected]>"]
66
readme = "README.md"

run.py

Lines changed: 73 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
from oeilnc_config import settings
24-
from oeilnc_utils import connection
24+
from oeilnc_utils import connection, file_management
2525
from oeilnc_geoindicator.calculation import create_indicator
2626
from intake import open_catalog
2727
from oeilnc_config.metadata import ProcessingMetadata
@@ -32,8 +32,8 @@
3232

3333
list_data_to_calculate = [ # ZOI / individu
3434
## p1
35-
# "H3_6_NC", #
36-
"H3_8_NC",
35+
"H3_6_NC", #
36+
#"H3_8_NC",
3737
"Foncier",
3838
"Reserves_indicateurSpec",
3939
"UNESCO_Zones_terrestres",
@@ -70,17 +70,15 @@
7070

7171
steplist= [1,2,3] # 1 : generate indicators by spatial intersection (interpolation/raster/vector)/ 2: spliting byDims & calculate ratio... / 3: persist
7272
list_indicateur_to_calculate = [ # thematique
73-
#"KBA",
74-
#"observation_nidification"
7573
#"GFC_gain_2012",
7674
#"GFC_treecover2000",
7775

78-
"TMF_transitionMap_v12022",
76+
#"TMF_transitionMap_v12022",
7977
"TMF_degradationyear_v12022",
8078
"TMF_DeforestationYear_v12022",
8179
"GFC_gain_2020",
8280
"GFC_lossyear",
83-
"GFC_treecover2021", # donnée à récupérer"
81+
#"GFC_treecover2021", # donnée à récupérer"
8482
#"TMF_annualChangeCollection_v12022_Dec_1991",
8583
#"TMF_annualChangeCollection_v12022_Dec_1992",
8684
#"TMF_annualChangeCollection_v12022_Dec_1993",
@@ -201,6 +199,7 @@
201199
faits = list()
202200
theme=configFile.get('project_db_schema')
203201

202+
204203
cat_dimensions = open_catalog(f"{configFile.get('dimension_catalog_dir')}DWH_Dimensions.yaml")
205204

206205
dim_spatial = cat_dimensions.dim_spatial
@@ -235,43 +234,76 @@ def run(list_data_to_calculate, configFile,list_indicateur_to_calculate):
235234
if len(list_indicateur_to_calculate) > 0:
236235
for indicateurFileName in list_indicateur_to_calculate:
237236
logging.info(f"--- {indicateurFileName} ---")
237+
path_file= f"{configFile.get('data_config_file')}{indicateurFileName}.yaml"
238+
if file_management.exist_file(path_file):
238239

239-
with open(f"{configFile.get('data_config_file')}{indicateurFileName}.yaml", 'r') as file:
240-
indicateurSpec = yaml.load(file, Loader=yaml.Loader)
241-
indicateurSpec["confDb"]["schema"] = theme
242-
243-
logging.info(f"individu: {dataFileName} | indicateur: {indicateurFileName}")
244-
245-
246-
if not fromIndexList:
247-
indexList= None
248-
249-
if settings.checkTableName(indicateurSpec,individuStatSpec) :
250-
logging.info(f"nbchuncks: {individuStatSpec.get('nbchuncks','aucun')}")
251-
252-
if {individuStatSpec.get('catalogUri',None)}:
253-
catalog = f"{configFile.get('data_catalog_dir')}{individuStatSpec.get('catalogUri',None)}"
254-
dataName = individuStatSpec.get('dataName',None)
255-
entryCatalog = getattr(open_catalog(catalog),dataName)
256-
selectString = individuStatSpec.get('selectString',entryCatalog.describe().get('args').get('sql_expr'))
257-
indexRef = individuStatSpec.get('indexRef',None)
258-
nbLignes = connection.getNbLignes(entryCatalog)
259-
260-
if {indicateurSpec.get('catalogUri', None)}:
261-
themeCatalog = f"{configFile.get('data_catalog_dir')}{indicateurSpec.get('catalogUri',None)}"
262-
else:
263-
themeCatalog = ''
240+
with open(path_file, 'r') as file:
241+
indicateurSpec = yaml.load(file, Loader=yaml.Loader)
242+
indicateurSpec["confDb"]["schema"] = theme
264243

244+
logging.info(f"individu: {dataFileName} | indicateur: {indicateurFileName}")
265245

266-
print(f"GO ------------->>>>>> individu: {dataFileName} | indicateur: {indicateurFileName}")
267246

268-
#client.run(settings.initializeWorkers)
247+
if not fromIndexList:
248+
indexList= None
269249

250+
if settings.checkTableName(indicateurSpec,individuStatSpec) :
251+
logging.info(f"nbchuncks: {individuStatSpec.get('nbchuncks','aucun')}")
270252

271-
if offset >= 0 or limit > 0:
272-
273-
while offset < nbLignes:
274-
253+
if {individuStatSpec.get('catalogUri',None)}:
254+
catalog = f"{configFile.get('data_catalog_dir')}{individuStatSpec.get('catalogUri',None)}"
255+
dataName = individuStatSpec.get('dataName',None)
256+
entryCatalog = getattr(open_catalog(catalog),dataName)
257+
indexRef = individuStatSpec.get('indexRef',None)
258+
nbLignes = connection.getNbLignes(entryCatalog)
259+
260+
if {indicateurSpec.get('catalogUri', None)}:
261+
themeCatalog = f"{configFile.get('data_catalog_dir')}{indicateurSpec.get('catalogUri',None)}"
262+
else:
263+
themeCatalog = ''
264+
265+
266+
print(f"GO ------------->>>>>> individu: {dataFileName} | indicateur: {indicateurFileName}")
267+
268+
269+
if offset >= 0 or limit > 0:
270+
271+
while offset < nbLignes:
272+
273+
274+
metadata = ProcessingMetadata(run_id=run_id)
275+
metadata.environment_variables = configFile
276+
metadata.output_schema = configFile.get('project_db_schema')
277+
metadata.operator_name = configFile.get('user')
278+
metadata.log_file_name = log_filename
279+
metadata.zoi_config = individuStatSpec
280+
metadata.dimensions_spatiales = individuStatSpec["confDims"]["isin_id_spatial"]
281+
metadata.theme_config = indicateurSpec
282+
metadata.theme_catalog = themeCatalog
283+
metadata.zoi_catalog = entryCatalog
284+
285+
sql_pagination = f"order by {indexRef} limit {limit} offset {offset}"
286+
logging.info(f"sql_pagination : {sql_pagination}")
287+
288+
faitsname = create_indicator(
289+
bbox=bb,
290+
individuStatSpec=individuStatSpec,
291+
indicateurSpec=indicateurSpec,
292+
dims=(dim_spatial,dim_mesure),
293+
stepList=steplist,
294+
indexListIndicator=indexList,
295+
sql_pagination=sql_pagination,
296+
indicateur_sql_flow=indicateur_sql_flow,
297+
daskComputation=daskComputation,
298+
metadata=metadata)
299+
300+
metadata.output_table_name = faitsname
301+
metadata.offset_value = offset
302+
metadata.limit_value = limit
303+
metadata.insert_metadata()
304+
305+
offset += limit
306+
else:
275307

276308
metadata = ProcessingMetadata(run_id=run_id)
277309
metadata.environment_variables = configFile
@@ -280,8 +312,7 @@ def run(list_data_to_calculate, configFile,list_indicateur_to_calculate):
280312
metadata.log_file_name = log_filename
281313
metadata.zoi_config = individuStatSpec
282314
metadata.dimensions_spatiales = individuStatSpec["confDims"]["isin_id_spatial"]
283-
metadata.theme_config = indicateurSpec
284-
metadata.theme_catalog = themeCatalog
315+
metadata.theme_config = themeCatalog
285316
metadata.zoi_catalog = entryCatalog
286317

287318
sql_pagination = f"order by {indexRef} limit {limit} offset {offset}"
@@ -294,7 +325,6 @@ def run(list_data_to_calculate, configFile,list_indicateur_to_calculate):
294325
dims=(dim_spatial,dim_mesure),
295326
stepList=steplist,
296327
indexListIndicator=indexList,
297-
sql_pagination=sql_pagination,
298328
indicateur_sql_flow=indicateur_sql_flow,
299329
daskComputation=daskComputation,
300330
metadata=metadata)
@@ -303,40 +333,8 @@ def run(list_data_to_calculate, configFile,list_indicateur_to_calculate):
303333
metadata.offset_value = offset
304334
metadata.limit_value = limit
305335
metadata.insert_metadata()
306-
307-
offset += limit
308-
else:
309-
310-
metadata = ProcessingMetadata(run_id=run_id)
311-
metadata.environment_variables = configFile
312-
metadata.output_schema = configFile.get('project_db_schema')
313-
metadata.operator_name = configFile.get('user')
314-
metadata.log_file_name = log_filename
315-
metadata.zoi_config = individuStatSpec
316-
metadata.dimensions_spatiales = individuStatSpec["confDims"]["isin_id_spatial"]
317-
metadata.theme_config = themeCatalog
318-
metadata.zoi_catalog = entryCatalog
319-
320-
sql_pagination = f"order by {indexRef} limit {limit} offset {offset}"
321-
logging.info(f"sql_pagination : {sql_pagination}")
322-
323-
faitsname = create_indicator(
324-
bbox=bb,
325-
individuStatSpec=individuStatSpec,
326-
indicateurSpec=indicateurSpec,
327-
dims=(dim_spatial,dim_mesure),
328-
stepList=steplist,
329-
indexListIndicator=indexList,
330-
indicateur_sql_flow=indicateur_sql_flow,
331-
daskComputation=daskComputation,
332-
metadata=metadata)
333-
334-
metadata.output_table_name = faitsname
335-
metadata.offset_value = offset
336-
metadata.limit_value = limit
337-
metadata.insert_metadata()
338-
else :
339-
pass
336+
else :
337+
pass
340338

341339

342340
#settings.checkConfigFiles(list_data_to_calculate, configFile,list_indicateur_to_calculate)

0 commit comments

Comments
 (0)