33from hashlib import sha1
44import json
55from math import isnan
6- from pandas import DataFrame , read_csv , concat
6+ from pandas import DataFrame , read_csv , concat , read_excel
77import re
88import yaml
99try : import tomllib
@@ -943,6 +943,58 @@ def add_rows(self) -> None:
943943 self .datacard_data ["google_datacard" ] = temp_data
944944 self .set_schema_2 (self .datacard_data )
945945
946+ class GenesisDatacard (FileReader ):
947+ """
948+ DSI Reader that stores a dataset's data card as a row in the `genesis_datacard` table.
949+ Input datacard should follow template in `examples/test/template_dc_genesis.xlsx`
950+ """
951+ def __init__ (self , filenames , ** kwargs ):
952+ """
953+ `filenames` : str or list of str
954+ File name(s) of Excel data card files to ingest. Each file must adhere to the
955+ LANL Gensis metadata standard. The Excel file must only have one sheet.
956+ """
957+ super ().__init__ (filenames , ** kwargs )
958+ if isinstance (filenames , str ):
959+ self .datacard_files = [filenames ]
960+ else :
961+ self .datacard_files = filenames
962+ self .genesis_data = OrderedDict ()
963+
964+ def add_rows (self ) -> None :
965+ """
966+ Flattens data in the input data card as a row in the `genesis_datacard` table
967+
968+ `return`: None.
969+ If an error occurs, a tuple in the format - (ErrorType, "error message") - is returned to and printed by the core
970+ """
971+ temp_data = OrderedDict ()
972+ for filename in self .datacard_files :
973+ try :
974+ temp_df = read_excel (filename , sheet_name = 0 )
975+ except :
976+ raise ValueError (f"Error reading in { filename } for the Genesis data card reader" )
977+
978+ required_columns = ["Metadata Element" , "Supporting Element" , "Requirement Level" , "LANL Input Example" ]
979+ if not set (required_columns ).issubset (temp_df .columns .tolist ()):
980+ raise ValueError (f"The required metadata columns are { ', ' .join (required_columns )} " )
981+
982+ for _ , row in temp_df .iterrows ():
983+ if row ['Requirement Level' ].lower () == "mandatory" :
984+ if type (row ['Metadata Element' ]) == str and row ['Metadata Element' ].strip () not in ["" , None ]:
985+ if row ["Metadata Element" ] in temp_data .keys ():
986+ temp_data [row ["Metadata Element" ]].append (row ["LANL Input Example" ])
987+ else :
988+ temp_data [row ["Metadata Element" ]] = [row ["LANL Input Example" ]]
989+ elif type (row ['Supporting Element' ]) == str and row ['Supporting Element' ].strip () not in ["" , None ]:
990+ if row ["Supporting Element" ] in temp_data .keys ():
991+ temp_data [row ["Supporting Element" ]].append (row ["LANL Input Example" ])
992+ else :
993+ temp_data [row ["Supporting Element" ]] = [row ["LANL Input Example" ]]
994+
995+ self .genesis_data ["genesis_datacard" ] = temp_data
996+ self .set_schema_2 (self .genesis_data )
997+
946998class MetadataReader1 (FileReader ):
947999 """
9481000 DSI Reader that reads in an individual or a set of JSON metadata files
@@ -999,4 +1051,4 @@ def add_rows(self) -> None:
9991051 filename = self .target_table_prefix + "__" + filename
10001052 self .metadata_file_data [filename ] = json_data
10011053
1002- self .set_schema_2 (self .metadata_file_data )
1054+ self .set_schema_2 (self .metadata_file_data )
0 commit comments