Skip to content

Commit b3d8a50

Browse files
authored
Merge pull request #195 from lanl/schema_update
updated user api schema() to print out a db's create table statements
2 parents 7167d45 + b9817f8 commit b3d8a50

File tree

6 files changed

+74
-21
lines changed

6 files changed

+74
-21
lines changed

dsi/backends/duckdb.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,16 @@ def get_table_names(self, query):
366366
tables = [table for from_tbl, join_tbl in all_names if (table := from_tbl or join_tbl)]
367367
return tables
368368

369+
def get_schema(self):
370+
"""
371+
Returns the structural schema of this database in the form of CREATE TABLE statements.
372+
373+
`return`: str
374+
Each table's CREATE TABLE statement is concatenated into one large string.
375+
"""
376+
schema_stmts = self.query_artifacts(query="SELECT sql FROM duckdb_tables where sql NOT NULL ")
377+
return schema_stmts["sql"].str.cat(sep="\n")
378+
369379
# OLD NAME OF notebook(). TO BE DEPRECATED IN FUTURE DSI RELEASE
370380
def inspect_artifacts(self, interactive=False):
371381
return self.notebook(interactive)

dsi/backends/sqlite.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,10 +357,20 @@ def get_table_names(self, query):
357357
tables = [table for from_tbl, join_tbl in all_names if (table := from_tbl or join_tbl)]
358358
return tables
359359

360+
def get_schema(self):
361+
"""
362+
Returns the structural schema of this database in the form of CREATE TABLE statements.
363+
364+
`return`: str
365+
Each table's CREATE TABLE statement is concatenated into one large string.
366+
"""
367+
schema_stmts = self.query_artifacts(query="SELECT sql FROM sqlite_master where sql NOT NULL ORDER BY type, name")
368+
return schema_stmts["sql"].str.cat(sep="\n")
369+
360370
# OLD NAME OF notebook(). TO BE DEPRECATED IN FUTURE DSI RELEASE
361371
def inspect_artifacts(self, interactive=False):
362372
return self.notebook(interactive)
363-
373+
364374
def notebook(self, interactive=False):
365375
"""
366376
Generates a Jupyter notebook displaying all the data in the SQLite database.

dsi/core.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,36 @@ def get_table(self, table_name, dict_return = False):
621621
if output is not None and isinstance(output, (pd.DataFrame, OrderedDict)):
622622
return output
623623

624+
def get_schema(self):
625+
"""
626+
Returns the first loaded database's structural schema as several CREATE TABLE statements.
627+
628+
`return`: str
629+
Each table's CREATE TABLE statement is concatenated into one large string.
630+
"""
631+
if self.debug_level != 0:
632+
self.logger.info("-------------------------------------")
633+
self.logger.error(f'Getting the structural schema of the first loaded backend')
634+
if len(self.loaded_backends) == 0:
635+
if self.debug_level != 0:
636+
self.logger.error('Need to load a valid backend to be able to get its structural schema')
637+
raise NotImplementedError('Need to load a valid backend to be able to get its structural schema')
638+
backend = self.loaded_backends[0]
639+
parent_backend = backend.__class__.__bases__[0].__name__
640+
if not self.valid_backend(backend, parent_backend):
641+
if self.debug_level != 0:
642+
self.logger.error("First loaded backend needs to have data to get its structural schema")
643+
raise RuntimeError("First loaded backend needs to have data to get its structural schema")
644+
start = datetime.now()
645+
646+
output = backend.get_schema()
647+
648+
end = datetime.now()
649+
if self.debug_level != 0:
650+
self.logger.info(f"Runtime: {end-start}")
651+
652+
return output
653+
624654
def find(self, query_object):
625655
"""
626656
Find all instances of `query_object` across all tables, columns, and cells in the first loaded backend.

dsi/dsi.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -88,27 +88,33 @@ def list_backends(self):
8888
print("DuckDB : In-process SQL backend optimized for fast analytics on large datasets.\n")
8989
print()
9090

91-
def schema(self, filename):
91+
def schema(self, filename = None):
9292
"""
93-
Loads a relational database schema into DSI from a specified `filename`
93+
Either loads a relational database schema into DSI with a specified `filename` OR returns this database's structural schema.
9494
95-
`filename` : str
96-
Path to a JSON file describing the structure of a relational database.
95+
`filename` : str, optional
96+
Path to a JSON file describing the relationships of the tables in a database.
9797
The schema should follow the format described in :ref:`user_schema_example_label`
98-
99-
**Must be called before reading in any data files associated with the schema**
98+
99+
`return` : If filename = None, returns the structural schema of this database - table/col names and their units.
100+
**If loading a relational schema, this function must be called before reading in any associated data files**
100101
"""
101-
if not os.path.exists(filename):
102-
sys.exit("schema() ERROR: Input schema file must have a valid filepath. Please check again.")
102+
if filename:
103+
if not os.path.exists(filename):
104+
sys.exit("schema() ERROR: Input schema file must have a valid filepath. Please check again.")
103105

104-
fnull = open(os.devnull, 'w')
105-
with redirect_stdout(fnull):
106-
self.t.load_module('plugin', 'Schema', 'reader', filename=filename)
107-
self.schema_read = True
108-
pk_tables = set(t[0] for t in self.t.active_metadata["dsi_relations"]["primary_key"])
109-
fk_tables = set(t[0] for t in self.t.active_metadata["dsi_relations"]["foreign_key"] if t[0] != None)
110-
self.schema_tables = pk_tables.union(fk_tables)
111-
print(f"Successfully loaded the schema file: {filename}")
106+
fnull = open(os.devnull, 'w')
107+
with redirect_stdout(fnull):
108+
self.t.load_module('plugin', 'Schema', 'reader', filename=filename)
109+
self.schema_read = True
110+
pk_tables = set(t[0] for t in self.t.active_metadata["dsi_relations"]["primary_key"])
111+
fk_tables = set(t[0] for t in self.t.active_metadata["dsi_relations"]["foreign_key"] if t[0] != None)
112+
self.schema_tables = pk_tables.union(fk_tables)
113+
print(f"Successfully loaded the schema file: {filename}")
114+
else:
115+
fnull = open(os.devnull, 'w')
116+
with redirect_stdout(fnull):
117+
return self.t.get_schema()
112118

113119
def list_readers(self):
114120
"""

dsi/plugins/file_reader.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -769,9 +769,6 @@ def add_rows(self) -> None:
769769
return (ValueError, f"Error in reading {filename} data card. Please ensure all fields included match the template")
770770

771771
self.datacard_data["oceans11_datacard"] = temp_data
772-
773-
self.datacard_data["oceans11_datacard"]["remote"] = [""] * len(self.datacard_files)
774-
self.datacard_data["oceans11_datacard"]["local"] = [""] * len(self.datacard_files)
775772
self.set_schema_2(self.datacard_data)
776773

777774
class DublinCoreDatacard(FileReader):

dsi/plugins/file_writer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def get_rows(self, collection) -> None:
102102
html_table = f"{tableName} [label="
103103
html_table += f"<<TABLE CELLSPACING=\"0\"><TR><TD COLSPAN=\"{num_tbl_cols}\"><B>{tableName}</B></TD></TR>"
104104

105-
col_list = tableData.keys()
105+
col_list = list(tableData.keys())
106106
if tableName == "dsi_units":
107107
col_list = ["table_name", "column_name", "unit"]
108108
if self.max_cols is not None:

0 commit comments

Comments
 (0)