BU-ISCIII · Shettland · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/relecov_tools/conf/laboratory_address.json b/relecov_tools/conf/laboratory_address.json
@@ -11087,5 +11087,94 @@
         "submitting_institution": "Hospital Comarcal de Melilla",
         "submitting_institution_address": "Remonta, 2",
         "submitting_institution_email": ""
+    },
+    "Ministerio Sanidad": {
+        "collecting_institution_address": "Paseo del Prado, 18",
+        "collecting_institution_email": "",
+        "geo_loc_state": "Comunidad de Madrid",
+        "geo_loc_region": "Madrid",
+        "geo_loc_city": "Madrid",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Ministerio Sanidad",
+        "submitting_institution_address": "Paseo del Prado, 18",
+        "submitting_institution_email": ""
+    },
+    "Analiza, S.L. Hospital Moncloa": {
+        "collecting_institution_address": "Av. de Valladolid, 83",
+        "collecting_institution_email": "[email protected]",
+        "geo_loc_state": "Comunidad de Madrid",
+        "geo_loc_region": "Madrid",
+        "geo_loc_city": "Madrid",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Analiza, S.L. Hospital Moncloa",
+        "submitting_institution_address": "Av. de Valladolid, 83",
+        "submitting_institution_email": "[email protected]"
+    },
+    "Atimmunolab": {
+        "collecting_institution_address": "Calle de la Salud, 14",
+        "collecting_institution_email": "[email protected]",
+        "geo_loc_state": "Comunidad Valenciana",
+        "geo_loc_region": "Valencia",
+        "geo_loc_city": "Valencia",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Atimmunolab",
+        "submitting_institution_address": "Calle de la Salud, 14",
+        "submitting_institution_email": "[email protected]"
+    },
+    "Centro Nacional De Microbiologia": {
+        "collecting_institution_address": "Carretera Majadahonda-Pozuelo, Km 2",
+        "collecting_institution_email": "[email protected]",
+        "geo_loc_state": "Comunidad de Madrid",
+        "geo_loc_region": "Madrid",
+        "geo_loc_city": "Madrid",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Centro Nacional De Microbiologia",
+        "submitting_institution_address": "Carretera Majadahonda-Pozuelo, Km 2",
+        "submitting_institution_email": "[email protected]"
+    },
+    "Presidencia Del Gobierno": {
+        "collecting_institution_address": "Complejo de la Moncloa, Av. Puerta de Hierro, s/n",
+        "collecting_institution_email": "[email protected]",
+        "geo_loc_state": "Comunidad de Madrid",
+        "geo_loc_region": "Madrid",
+        "geo_loc_city": "Madrid",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Presidencia Del Gobierno",
+        "submitting_institution_address": "Complejo de la Moncloa, Av. Puerta de Hierro, s/n",
+        "submitting_institution_email": "[email protected]"
+    },
+    "Xerencia De Xestión Integrada De Pontevedra": {
+        "collecting_institution_address": "Rúa Loureiro Crespo, 2",
+        "collecting_institution_email": "[email protected]",
+        "geo_loc_state": "Galicia",
+        "geo_loc_region": "Pontevedra",
+        "geo_loc_city": "Pontevedra",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Xerencia De Xestión Integrada De Pontevedra",
+        "submitting_institution_address": "Rúa Loureiro Crespo, 2",
+        "submitting_institution_email": "[email protected]"
+    },
+    "Consejeria De Sanidad": {
+        "collecting_institution_address": "Calle de Aduana, 29",
+        "collecting_institution_email": "[email protected]",
+        "geo_loc_state": "Comunidad de Madrid",
+        "geo_loc_region": "Madrid",
+        "geo_loc_city": "Madrid",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Consejeria De Sanidad",
+        "submitting_institution_address": "Calle de Aduana, 29",
+        "submitting_institution_email": "[email protected]"
+    },
+    "Instituto De Medicina Legal De Toledo": {
+        "collecting_institution_address": "Calle de Dinamarca, 1",
+        "collecting_institution_email": "[email protected]",
+        "geo_loc_state": "Castilla-La Mancha",
+        "geo_loc_region": "Toledo",
+        "geo_loc_city": "Toledo",
+        "geo_loc_country": "Spain",
+        "submitting_institution": "Instituto De Medicina Legal De Toledo",
+        "submitting_institution_address": "Calle de Dinamarca, 1",
+        "submitting_institution_email": "[email protected]"
     }
+
 }
diff --git a/relecov_tools/institution_scripts/ISCIII.py b/relecov_tools/institution_scripts/ISCIII.py
@@ -26,7 +26,6 @@ def replace_originating_lab(metadata, f_data, mapped_fields, heading):
             except KeyError as e:
                 log.error("Value  %s does not exist ", e)
                 stderr.print(f"[red] Value {e} does not exist")
-                sys.exit(1)
     return metadata
 
 
@@ -41,7 +40,7 @@ def added_seq_inst_model(metadata, f_data, mapped_fields, heading):
             except KeyError as e:
                 log.error("Value  %s does not exist ", e)
                 stderr.print(f"[red] Value {e} does not exist")
-                sys.exit(1)
+                continue
             if "nextseq" in run_name:
                 row[m_idx] = "Illumina NextSeq 500"
             elif "next_seq" in run_name:
@@ -55,7 +54,6 @@ def added_seq_inst_model(metadata, f_data, mapped_fields, heading):
             else:
                 log.error("Value  %s is not defined in the mapping ", run_name)
                 stderr.print(f"[red] Value {run_name} is not defined in the mapping")
-                sys.exit(1)
     return metadata
 
 
@@ -70,20 +68,19 @@ def translate_gender_to_english(metadata, f_data, mapped_fields, heading):
         "unknown": "Not Provided",
     }
     for row in metadata[1:]:
-        for key, val in mapped_fields.items():
+        for key, _ in mapped_fields.items():
             m_idx = heading.index(key)
             if row[m_idx] is None or row[m_idx] == "":
                 row[m_idx] = "Not Provided"
                 continue
-            item = row[m_idx].lower()
+            item = str(row[m_idx]).lower()
             if item in map_dict:
                 row[m_idx] = map_dict[item]
             else:
-                log.error("The '%s' is not a valid data for translation", row[m_idx])
+                log.error("The %s is not a valid data for translation", row[m_idx])
                 stderr.print(
-                    "f[red] The '{row[m_idx]}' is not a valid data for translation"
+                    f"[red] The '{row[m_idx]}' is not a valid data for translation"
                 )
-                sys.exit(1)
     return metadata
 
 
@@ -93,7 +90,7 @@ def translate_specimen_source(metadata, f_data, mapped_fields, heading):
         for key, val in mapped_fields.items():
             m_idx = heading.index(key)
             if row[m_idx] is None:
-                row[m_idx] = "not provided"
+                row[m_idx] = "Not Provided"
             elif "ASPIRADO NASOFARÍNGEO" in row[m_idx].upper():
                 row[m_idx] = "Nasopharynx Aspiration"
             elif "ASPIRADO BRONQUIAL" in row[m_idx].upper():
@@ -103,19 +100,18 @@ def translate_specimen_source(metadata, f_data, mapped_fields, heading):
             elif "EXTRACTO" in row[m_idx].upper():
                 row[m_idx] = "Scraping"
             elif "EXUDADO FARÍNGEO" in row[m_idx].upper():
-                row[m_idx] = "Pharynx Swabbing"
+                row[m_idx] = "Pharynx Swab"
             elif "EXUDADO NASOFARÍNGEO" in row[m_idx].upper():
-                row[m_idx] = "Nasopharynx Swabbing"
+                row[m_idx] = "Nasopharynx swab"
             elif "EXUDADO OROFARINGEO" in row[m_idx].upper():
-                row[m_idx] = "Oropharynx Swabbing"
+                row[m_idx] = "Oropharynx Swab"
             elif "PLACENTA" in row[m_idx].upper():
                 row[m_idx] = "Placenta"
             elif "SALIVA" in row[m_idx].upper():
                 row[m_idx] = "Saliva"
             else:
                 log.error("The field is not correctly written or is not filled")
                 stderr.print("The field is not correctly written or not filled")
-                sys.exit(1)
     return metadata
 
 
@@ -157,7 +153,6 @@ def translate_purpose_seq_to_english(metadata, f_data, mapped_fields, heading):
                 stderr.print(
                     "f[red] The {row[m_idx]} is not a valid data for translation"
                 )
-                sys.exit(1)
     return metadata
 
 
@@ -195,5 +190,4 @@ def findout_library_layout(metadata, f_data, mapped_fields, heading):
                 stderr.print(
                     f"[red] {e} is not defined in function findout_library_layout"
                 )
-                sys.exit(1)
     return metadata
diff --git a/relecov_tools/metadata_homogeneizer.py b/relecov_tools/metadata_homogeneizer.py
@@ -2,9 +2,11 @@
 import os
 import sys
 import logging
+import json
 import rich.console
 
 import relecov_tools.utils
+import pandas as pd
 from relecov_tools.config_json import ConfigJson
 
 log = logging.getLogger(__name__)
@@ -26,7 +28,9 @@ def __init__(self, institution=None, directory=None, output_folder=None):
         self.heading = self.config_json.get_topic_data(
             "lab_metadata", "metadata_lab_heading"
         )
-
+        self.metadata_processing = self.config_json.get_topic_data(
+            "sftp_handle", "metadata_processing"
+        )
         # handle institution
         if institution is None:
             self.institution = relecov_tools.utils.prompt_selection(
@@ -164,9 +168,9 @@ def handling_files(self, file_data, data_to_add):
             elif f_name.endswith(".csv"):
                 data = relecov_tools.utils.read_csv_file_return_dict(f_name, ",")
             elif f_name.endswith(".xlsx"):
-                header_flag = self.metadata_processing.get("header_flag")
-                data = relecov_tools.utils.read_excel_file(
-                    f_name, "Sheet", header_flag, leave_empty=True
+                excel_sheet = self.metadata_processing.get("excel_sheet")
+                data, _ = relecov_tools.utils.read_excel_file(
+                    f_name, excel_sheet, "ID CNM", leave_empty=True
                 )
             else:
                 log.error("Additional file extension %s is not supported ", f_name)
@@ -200,17 +204,21 @@ def handling_files(self, file_data, data_to_add):
                         + str(s_value)
                     )
                     continue
+
                     # sys.exit(1)
                 for m_field, f_field in file_data["mapped_fields"].items():
                     try:
                         meta_idx = self.heading.index(m_field)
                     except ValueError as e:
-                        log.error("Field %s does not exist in Metadata ", e)
+                        log.error("Field %s does not exist in Metadata heading, check config", e)
                         stderr.print(f"[red] Field {e} does not exist")
-                        sys.exit(1)
+                        break
                     row[meta_idx] = item_data[f_field]
 
+
         else:
+            if data == {'ERROR': 'not valid format'}:
+                raise ValueError(f"Unknown error during processing of {file_data["file_name"]}")
             func_name = file_data["function"]
             stderr.print("[yellow] Start processing function " + func_name)
             exec(
@@ -224,7 +232,6 @@ def handling_files(self, file_data, data_to_add):
                 func_name
                 + "(data_to_add, data, file_data['mapped_fields'], self.heading)"
             )
-
         stderr.print("[green] Succesful processing of additional file ")
         return data_to_add
 

diff --git a/relecov_tools/schema/institution_schemas/ISCIII.json b/relecov_tools/schema/institution_schemas/ISCIII.json
@@ -88,14 +88,6 @@
             "mapped_key": "Runid",
             "function": "None"
         },
-        "read_length": {
-            "file_name": "samples_run_services_length.tsv",
-            "mapped_fields": {
-                "Read Length" : "read1_cycles"
-            },
-            "mapped_key": "Sample ID given for sequencing",
-            "function": "None"
-        },
         "samples_in_run": {
             "file_name": "run_and_num_of_samples.csv",
             "mapped_fields": {
@@ -114,7 +106,7 @@
         },
         "purpose_of_sequencing": {
             "file_name": "",
-            "mapped_fields" : {"Purpose of Sequencing" : "" },
+            "mapped_fields" : {"Purpose of sampling" : "" },
             "mapped_key" : "",
             "function": "translate_purpose_seq_to_english"
         },

diff --git a/relecov_tools/schema/relecov_schema.json b/relecov_tools/schema/relecov_schema.json
@@ -975,7 +975,15 @@
                  "Centro Sanitario Cinco Villas",
                  "Hospital Viamed Montecanal",
                  "Hospital Universitario De Ceuta",
-                 "Hospital Comarcal"
+                 "Hospital Comarcal",
+                 "Analiza, S.L. Hospital Moncloa",
+                 "Atimmunolab",
+                 "Ministerio Sanidad",
+                 "Centro Nacional De Microbiologia",
+                 "Presidencia Del Gobierno",
+                 "Xerencia De Xestión Integrada De Pontevedra",
+                 "Consejeria De Sanidad",
+                 "Instituto De Medicina Legal De Toledo"
             ],
             "ontology": "GENEPIO:0001153",
             "type": "string",

diff --git a/relecov_tools/utils.py b/relecov_tools/utils.py
@@ -62,6 +62,29 @@ def read_json_file(j_file):
     return data
 
 
+def write_to_excel_file(data, f_name, sheet_name, post_process=None):
+    book = openpyxl.Workbook()
+    sheet = book.active
+    for row in data:
+        sheet.append(row)
+    # adding one column with row number
+    if "insert_cols" in post_process:
+        sheet.insert_cols(post_process["insert_cols"])
+        sheet["A1"] = "CAMPO"
+        counter = 1
+        for i in range(len(data) - 1):
+            idx = "A" + str(counter + 1)
+            sheet[idx] = counter
+            counter += 1
+    # adding 3 empty rows
+    if "insert_rows" in post_process:
+        for x in range(post_process["insert_rows"]):
+            sheet.insert_rows(1)
+        sheet.title = sheet_name
+    book.save(f_name)
+    return
+
+
 def read_excel_file(f_name, sheet_name, header_flag, leave_empty=True):
     """Read the input excel file and give the information in a list
     of dictionaries
@@ -73,7 +96,7 @@ def read_excel_file(f_name, sheet_name, header_flag, leave_empty=True):
             idx + 1 for idx, x in enumerate(ws_metadata_lab.values) if header_flag in x
         ][0]
     except IndexError:
-        raise KeyError(f"Header flag '{header_flag}' could not be found in {f_name}")
+        raise IndexError(f"Header flag '{header_flag}' could not be found in {f_name}")
     heading = [str(i.value).strip() for i in ws_metadata_lab[heading_row] if i.value]
     ws_data = []
     for row in islice(ws_metadata_lab.values, heading_row, ws_metadata_lab.max_row):