Skip to content

Commit 5f3160c

Browse files
author
hugo.roussaffa
committed
fix(worker) : un dictionnaire de conf venant du client est passé à l'initialisation aux workers
1 parent 7a5130d commit 5f3160c

File tree

3 files changed

+237
-61
lines changed

3 files changed

+237
-61
lines changed

examples/create_indicator.ipynb

Lines changed: 213 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -23,7 +23,7 @@
2323
},
2424
{
2525
"cell_type": "code",
26-
"execution_count": null,
26+
"execution_count": 2,
2727
"metadata": {},
2828
"outputs": [],
2929
"source": [
@@ -72,7 +72,7 @@
7272
},
7373
{
7474
"cell_type": "code",
75-
"execution_count": null,
75+
"execution_count": 3,
7676
"metadata": {},
7777
"outputs": [],
7878
"source": [
@@ -96,9 +96,31 @@
9696
},
9797
{
9898
"cell_type": "code",
99-
"execution_count": null,
99+
"execution_count": 4,
100100
"metadata": {},
101-
"outputs": [],
101+
"outputs": [
102+
{
103+
"name": "stderr",
104+
"output_type": "stream",
105+
"text": [
106+
"20/02/2024 17:38:00 - INFO - Config - Settings Imported\n",
107+
"20/02/2024 17:38:00 - WARNING - The following variables are null: commun_path, project_dir, data_catalog_dir, data_output_dir, sig_data_path, project_db_schema\n",
108+
"20/02/2024 17:38:01 - INFO - Utils - Connection Imported\n",
109+
"20/02/2024 17:38:01 - INFO - Utils - Dataframe Imported\n",
110+
"20/02/2024 17:38:01 - INFO - Utils - Geometry Imported\n",
111+
"20/02/2024 17:38:01 - DEBUG - Could not import boto3, continuing with reduced functionality.\n",
112+
"20/02/2024 17:38:01 - INFO - Utils - Raster Imported\n",
113+
"20/02/2024 17:38:02 - DEBUG - Could not import boto3, continuing with reduced functionality.\n",
114+
"20/02/2024 17:38:02 - DEBUG - GDAL_DATA found in environment.\n",
115+
"20/02/2024 17:38:02 - DEBUG - PROJ_DATA found in environment.\n",
116+
"20/02/2024 17:38:02 - INFO - GeoIndicator - Gee Imported\n",
117+
"20/02/2024 17:38:02 - INFO - Utils - Interpolation Imported\n",
118+
"20/02/2024 17:38:02 - INFO - GeoIndicator - Distribution Imported\n",
119+
"20/02/2024 17:38:02 - INFO - GeoIndicator - Raster Imported\n",
120+
"20/02/2024 17:38:02 - INFO - GeoIndicator - Calculation Imported\n"
121+
]
122+
}
123+
],
102124
"source": [
103125
"%load_ext autoreload\n",
104126
"%autoreload 2\n",
@@ -114,23 +136,202 @@
114136
},
115137
{
116138
"cell_type": "code",
117-
"execution_count": null,
139+
"execution_count": 5,
118140
"metadata": {},
119-
"outputs": [],
141+
"outputs": [
142+
{
143+
"data": {
144+
"text/html": [
145+
"\n",
146+
" <style>\n",
147+
" .geemap-dark {\n",
148+
" --jp-widgets-color: white;\n",
149+
" --jp-widgets-label-color: white;\n",
150+
" --jp-ui-font-color1: white;\n",
151+
" --jp-layout-color2: #454545;\n",
152+
" background-color: #383838;\n",
153+
" }\n",
154+
" \n",
155+
" .geemap-dark .jupyter-button {\n",
156+
" --jp-layout-color3: #383838;\n",
157+
" }\n",
158+
" \n",
159+
" .geemap-colab {\n",
160+
" background-color: var(--colab-primary-surface-color, white);\n",
161+
" }\n",
162+
" \n",
163+
" .geemap-colab .jupyter-button {\n",
164+
" --jp-layout-color3: var(--colab-primary-surface-color, white);\n",
165+
" }\n",
166+
" </style>\n",
167+
" "
168+
],
169+
"text/plain": [
170+
"<IPython.core.display.HTML object>"
171+
]
172+
},
173+
"metadata": {},
174+
"output_type": "display_data"
175+
},
176+
{
177+
"name": "stderr",
178+
"output_type": "stream",
179+
"text": [
180+
"20/02/2024 17:38:02 - WARNING - La variable d'environnement SCHEDULER_IP doit être renseignée pour effectuer les traitements de manière distribuée\n",
181+
"20/02/2024 17:38:02 - INFO - on applique cette ip par défaut : 172.20.12.13:9786\n",
182+
"20/02/2024 17:38:02 - DEBUG - Using selector: EpollSelector\n"
183+
]
184+
},
185+
{
186+
"data": {
187+
"text/plain": [
188+
"{'tcp://172.20.10.112:40691': None}"
189+
]
190+
},
191+
"execution_count": 5,
192+
"metadata": {},
193+
"output_type": "execute_result"
194+
}
195+
],
120196
"source": [
121197
"client = settings.getDaskClient()\n",
198+
"config = settings.initializeBilboProject(dotenvPath='.dev_env')\n",
199+
"client.run(settings.initializeWorkers, config)\n",
122200
"\n",
123-
"client.run(settings.initializeWorkers)\n",
124201
"\n",
125-
"config = settings.initializeBilboProject(dotenvPath='.dev_env')\n",
126-
"config"
202+
"\n"
127203
]
128204
},
129205
{
130206
"cell_type": "code",
131-
"execution_count": null,
207+
"execution_count": 6,
132208
"metadata": {},
133-
"outputs": [],
209+
"outputs": [
210+
{
211+
"data": {
212+
"text/html": [
213+
"\n",
214+
" <style>\n",
215+
" .geemap-dark {\n",
216+
" --jp-widgets-color: white;\n",
217+
" --jp-widgets-label-color: white;\n",
218+
" --jp-ui-font-color1: white;\n",
219+
" --jp-layout-color2: #454545;\n",
220+
" background-color: #383838;\n",
221+
" }\n",
222+
" \n",
223+
" .geemap-dark .jupyter-button {\n",
224+
" --jp-layout-color3: #383838;\n",
225+
" }\n",
226+
" \n",
227+
" .geemap-colab {\n",
228+
" background-color: var(--colab-primary-surface-color, white);\n",
229+
" }\n",
230+
" \n",
231+
" .geemap-colab .jupyter-button {\n",
232+
" --jp-layout-color3: var(--colab-primary-surface-color, white);\n",
233+
" }\n",
234+
" </style>\n",
235+
" "
236+
],
237+
"text/plain": [
238+
"<IPython.core.display.HTML object>"
239+
]
240+
},
241+
"metadata": {},
242+
"output_type": "display_data"
243+
},
244+
{
245+
"name": "stderr",
246+
"output_type": "stream",
247+
"text": [
248+
"20/02/2024 17:38:07 - DEBUG - open file: /media/commun/commun/Informatique/SIG/Application/Jupyterhub/projets/catalogFiles/DWH_Dimensions.yaml\n",
249+
"20/02/2024 17:38:07 - INFO - step list : {steplist}\n",
250+
"20/02/2024 17:38:07 - INFO - Initial offset : 0 , limit : 10000\n",
251+
"20/02/2024 17:38:07 - INFO - Id Spatial qui seront calculés : ['0', 'C', 'F', 'G', 'B', 'D', 'A', 'E', '1', '2', '3', '98827', '98817', '98813', '98826', '98819', '98816', '98828', '98824', '98832', '98821', '98831', '98818', '98804', '98822', '98814', '98815', '98806', '98809', '98820', '98811', '98807', '98805', '98803', '98823', '98810', '98812', '98833', '98808', '98825', '98801', '98830', '98802', '98829']\n",
252+
"20/02/2024 17:38:07 - INFO - individu: H3_6_NC\n",
253+
"20/02/2024 17:38:07 - INFO - indicateur: KBA\n",
254+
"20/02/2024 17:38:07 - INFO - le nom de la table de faits en base de donnée en sortie de traitement répond au pré-requis : faits_kba_h3_nc_6\n",
255+
"20/02/2024 17:38:07 - INFO - nbchuncks: 300\n",
256+
"20/02/2024 17:38:07 - DEBUG - open file: /home/hugo/projets/bilbo-packages/examples/../test_catalog_file/h3.yaml\n",
257+
"20/02/2024 17:38:07 - INFO - {'sources': {'compte_entites': {'driver': 'sql', 'metadata': {}, 'args': {'uri': 'postgresql://{{env(\"DB_USER\")}}:{{env(\"DB_PWD\")}}@{{env(\"DB_HOST\")}}:{{env(\"DB_PORT\")}}/oeil_traitement', 'sql_expr': 'SELECT COUNT(*) as nb FROM bilbo.h3_nc_6;'}, 'description': 'Compter le nombre d’entités sans charger les géométries'}}}\n",
258+
"20/02/2024 17:38:07 - DEBUG - open file: /home/hugo/projets/bilbo-packages/examples/tmp.yaml\n",
259+
"20/02/2024 17:38:07 - INFO - bilbo.h3_nc_6 nblignes : 759\n",
260+
"20/02/2024 17:38:07 - INFO - sql_pagination : order by hex_id limit 10000 offset 0\n",
261+
"20/02/2024 17:38:07 - INFO - Settings - getPaths data_config_file ../test_catalog_file/config/ \n",
262+
"20/02/2024 17:38:07 - DEBUG - Settings - getPaths - config {'commun_path': '/media/commun/commun/', 'project_dir': '/media/commun/commun/Informatique/SIG/Application/Jupyterhub/projets/stac/', 'data_catalog_dir': '../test_catalog_file/', 'data_output_dir': '../target/', 'sig_data_path': '/media/commun/commun/Informatique/SIG/Donnees/', 'project_db_schema': 'bilbo', 'data_config_file': '../test_catalog_file/config/', 'dimension_catalog_dir': '/media/commun/commun/Informatique/SIG/Application/Jupyterhub/projets/catalogFiles/'}\n",
263+
"20/02/2024 17:38:07 - INFO - create_indicator: Etape 1\n",
264+
"20/02/2024 17:38:07 - DEBUG - open file: /home/hugo/projets/bilbo-packages/examples/../test_catalog_file/h3.yaml\n",
265+
"20/02/2024 17:38:07 - INFO - create_indicator: Etape 1 sans bbox\n",
266+
"20/02/2024 17:38:07 - INFO - create_indicator: Etape 1 --> indicateurSpec.get('catalogUri') and indicateurSpec.get('dataName') is not None\n",
267+
"20/02/2024 17:38:07 - INFO - create_indicator: Etape 1 --> sourceType : None\n",
268+
"20/02/2024 17:38:07 - INFO - source Type OTHER : ex . VECTOR \n",
269+
"20/02/2024 17:38:07 - DEBUG - open file: /home/hugo/projets/bilbo-packages/examples/../test_catalog_file/data_reference.yaml\n",
270+
"20/02/2024 17:38:07 - INFO - Calculation ...\n",
271+
"20/02/2024 17:38:07 - INFO - with Dask - metaModelList : ' ['hex_id', 'kba', 'geometry', 'id_split']\n",
272+
"20/02/2024 17:38:07 - INFO - reading intake source sources:\n",
273+
" h3_nc_6:\n",
274+
" args:\n",
275+
" geopandas_kwargs:\n",
276+
" crs: 3163\n",
277+
" geom_col: geometry\n",
278+
" sql_expr: select hex_id, geometry from bilbo.h3_nc_6 order by hex_id limit\n",
279+
" 10000 offset 0\n",
280+
" table: bilbo.h3_nc_6\n",
281+
" uri: postgresql://hroussaffa:[email protected]:5432/oeil_traitement\n",
282+
" description: \"Maille H3 niveau 6 sur les terres emerg\\xE9es de NC\"\n",
283+
" driver: intake_geopandas.geopandas.PostGISSource\n",
284+
" metadata:\n",
285+
" catalog_dir: /home/hugo/projets/bilbo-packages/examples/../test_catalog_file/\n",
286+
"...\n"
287+
]
288+
},
289+
{
290+
"name": "stdout",
291+
"output_type": "stream",
292+
"text": [
293+
"sources:\n",
294+
" h3_nc_6:\n",
295+
" args:\n",
296+
" geopandas_kwargs:\n",
297+
" crs: 3163\n",
298+
" geom_col: geometry\n",
299+
" sql_expr: select hex_id, geometry from bilbo.h3_nc_6\n",
300+
" table: bilbo.h3_nc_6\n",
301+
" uri: postgresql://hroussaffa:[email protected]:5432/oeil_traitement\n",
302+
" description: \"Maille H3 niveau 6 sur les terres emerg\\xE9es de NC\"\n",
303+
" driver: intake_geopandas.geopandas.PostGISSource\n",
304+
" metadata:\n",
305+
" catalog_dir: /home/hugo/projets/bilbo-packages/examples/../test_catalog_file/\n",
306+
"\n"
307+
]
308+
},
309+
{
310+
"name": "stderr",
311+
"output_type": "stream",
312+
"text": [
313+
"20/02/2024 17:38:07 - DEBUG - df: 759\n",
314+
"20/02/2024 17:38:07 - DEBUG - metaModelList ['hex_id', 'kba', 'geometry', 'id_split']\n",
315+
"20/02/2024 17:38:07 - DEBUG - Load data in memory (759, 2)\n",
316+
"20/02/2024 17:38:07 - DEBUG - converting to dask with chunksize 300\n",
317+
"20/02/2024 17:38:08 - DEBUG - PROJ_ERROR: proj_create: unrecognized format / unknown name\n",
318+
"20/02/2024 17:38:08 - DEBUG - data : Dask GeoDataFrame Structure:\n",
319+
" hex_id geometry\n",
320+
"npartitions=300 \n",
321+
"0 string geometry\n",
322+
"3 ... ...\n",
323+
"... ... ...\n",
324+
"757 ... ...\n",
325+
"758 ... ...\n",
326+
"Dask Name: to_pyarrow_string, 2 graph layers\n",
327+
"20/02/2024 17:38:08 - DEBUG - func : <function generateIndicateur_parallel_v2 at 0x7f58ac352ca0>\n",
328+
"20/02/2024 17:38:08 - INFO - Etape 1 - Result: <class 'dask_geopandas.core.GeoDataFrame'>\n",
329+
"20/02/2024 17:38:08 - INFO - create_indicator: Etape 2\n",
330+
"20/02/2024 17:38:08 - INFO - indexListIndicator None\n",
331+
"20/02/2024 17:38:08 - INFO - create_indicator: Etape 2 --> pas de indexListIndicator\n"
332+
]
333+
}
334+
],
134335
"source": [
135336
"%%time\n",
136337
"\n",

oeilnc_config/settings.py

Lines changed: 22 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,7 @@
4747
if null_variables:
4848
logging.warning("The following variables are null: {}".format(", ".join(null_variables)))
4949

50-
def initializeWorkers(dotenvPath='.env'):
51-
52-
53-
54-
load_dotenv(dotenv_path=dotenvPath)
50+
def initializeWorkers(config_dict: dict):
5551

5652
global commun_path
5753
global data_catalog_dir
@@ -65,48 +61,27 @@ def initializeWorkers(dotenvPath='.env'):
6561
global pswd
6662
global host
6763
global port
68-
69-
commun_path = getenv("COMMUN_PATH")
70-
project_dir = getenv("PROJECT_PATH")
71-
data_catalog_dir = getenv("DATA_CATALOG_DIR")
72-
data_output_dir = getenv("DATA_OUTPUT_DIR")
73-
sig_data_path = getenv("SIG_DATA_PATH")
74-
project_db_schema = getenv("PROJECT_DB_SCHEMA")
75-
data_config_file = getenv("DATA_CONFIG_DIR")
76-
dimension_catalog_dir = getenv("DIM_CATALOG_DIR")
77-
78-
79-
user = getenv("DB_USER")
80-
pswd = getenv("DB_PWD")
81-
host = getenv("DB_HOST")
82-
port = getenv("DB_PORT")
83-
84-
home = getenv("HOME_PATH")
85-
db_traitement = getenv("DB_WORKSPACE")
86-
db_ref = getenv("DB_REF")
87-
db_externe = getenv("DB_EXT")
88-
89-
90-
config_dict = {
91-
"user": user,
92-
"pswd": pswd,
93-
"host": host,
94-
"port": port,
95-
"home": home,
96-
"db_traitement": db_traitement,
97-
"db_ref": db_ref,
98-
"db_externe": db_externe,
99-
"commun_path": commun_path,
100-
"project_dir": project_dir,
101-
"data_catalog_dir": data_catalog_dir,
102-
"data_output_dir": data_output_dir,
103-
"sig_data_path": sig_data_path,
104-
"project_db_schema": project_db_schema,
105-
"data_config_file": data_config_file,
106-
"dimension_catalog_dir": dimension_catalog_dir
107-
}
108-
109-
logging.debug(f'Workers Settings - getPaths - config {config_dict}')
64+
global home
65+
global db_traitement
66+
global db_ref
67+
global db_externe
68+
69+
user = config_dict.get("user")
70+
pswd = config_dict.get("pswd")
71+
host = config_dict.get("host")
72+
port = config_dict.get("port")
73+
home = config_dict.get("home")
74+
db_traitement = config_dict.get("db_traitement")
75+
db_ref = config_dict.get("db_ref")
76+
db_externe = config_dict.get("db_externe")
77+
commun_path = config_dict.get("commun_path")
78+
project_dir = config_dict.get("project_dir")
79+
data_catalog_dir = config_dict.get("data_catalog_dir")
80+
data_output_dir = config_dict.get("data_output_dir")
81+
sig_data_path = config_dict.get("sig_data_path")
82+
project_db_schema = config_dict.get("project_db_schema")
83+
data_config_file = config_dict.get("data_config_file")
84+
dimension_catalog_dir = config_dict.get("dimension_catalog_dir")
11085

11186
return
11287

test_catalog_file/config/H3_6_NC.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ confRatio:
2222
dataName: h3_nc_6
2323
indexRef: hex_id
2424
nbchuncks: 300
25-
limit: 100
26-
offset: 600
25+
limit: 10000
26+
offset: 0
2727
epsg: EPSG:3163
2828
overlayHow: intersection

0 commit comments

Comments
 (0)