Skip to content

[GSOC] Add a metadata table to an existing Carsus output #433

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions carsus/io/cmfgen/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,25 @@ def cross_sections_squeeze(
ion_phixs_table = pd.concat(phixs_table_list)

return ion_phixs_table

@staticmethod
def generate_metadata(doi, reference, unit_mappings):
"""
Generates a metadata dictionary dynamically.

Parameters:
doi (str): The DOI reference.
reference (str): The source reference.
unit_mappings (dict): Dictionary mapping column names to units.

Returns:
dict: A metadata dictionary.
"""
return {
"doi": doi,
"reference": reference,
"units": unit_mappings,
}

def _get_levels_lines(
self,
Expand Down Expand Up @@ -980,11 +999,27 @@ def _get_levels_lines(
["atomic_number", "ion_charge", "level_index"]
)
self.cross_sections = cross_sections.sort_index()
# Define unit mappings for different columns
unit_mappings = {
"energy": "erg",
"energy_lower": "erg",
"energy_upper": "erg",
"wavelength": "nanometer",
}

# Generate metadata dynamically
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is dynamic if the values are hardcoded, though it does demonstrate your method works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took this as an example because the first GSoC objective stated to attach a DOI reference and physical units to one of the outputs. However, I agree that a more dynamic approach would be beneficial in future and plan to add parameters to the relevant functions so that metadata can be passed as a dictionary.

metadata = self.generate_metadata(
doi="https://doi.org/10.1086/177435",
reference="Verner et al. 1996",
unit_mappings=unit_mappings
)

self.levels = levels
self.lines = lines

return
# Attach metadata to DataFrames
levels.attrs.update(metadata)
lines.attrs.update(metadata)
return

def _get_collisions(
self, data, temperature_grid=None, drop_mismatched_labels=False
Expand Down
308 changes: 308 additions & 0 deletions docs/io/meta_data.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[\u001b[1m py.warnings\u001b[0m][\u001b[1;33mWARNING\u001b[0m] - /Users/pankajdhyani/Documents/GitHub/carsus/.venv/lib/python3.9/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020\n",
" warnings.warn(\n",
" (\u001b[1mwarnings.py\u001b[0m:109)\n",
"[\u001b[1m carsus.io.cmfgen.base\u001b[0m][ \u001b[1;37mINFO\u001b[0m] - Loading atomic data for Al 2. (\u001b[1mbase.py\u001b[0m:875)\n",
"{'doi': 'https://doi.org/10.1086/177435', 'reference': 'Verner et al. 1996', 'units': {'energy': 'erg', 'energy_lower': 'erg', 'energy_upper': 'erg', 'wavelength': 'meter'}}\n",
"{'doi': 'https://doi.org/10.1086/177435', 'reference': 'Verner et al. 1996', 'units': {'energy': 'erg', 'energy_lower': 'erg', 'energy_upper': 'erg', 'wavelength': 'meter'}}\n",
" energy j label \\\n",
"atomic_number ion_charge level_index \n",
"13 2 0 0.000000 0.0 3s21Se[0] \n",
" 1 37393.030000 0.0 3s3p3Po[0] \n",
" 2 37453.910000 1.0 3s3p3Po[1] \n",
" 3 37577.790000 2.0 3s3p3Po[2] \n",
" 4 59852.020000 1.0 3s3p1Po[1] \n",
"... ... ... ... \n",
" 151 151302.627136 391.5 3s28w1W \n",
" 152 151340.573335 1261.0 3s29w3W \n",
" 153 151340.573335 420.0 3s29w1W \n",
" 154 151374.788527 1349.5 3s30w3W \n",
" 155 151374.788527 449.5 3s30w1W \n",
"\n",
" method priority \n",
"atomic_number ion_charge level_index \n",
"13 2 0 meas 10 \n",
" 1 meas 10 \n",
" 2 meas 10 \n",
" 3 meas 10 \n",
" 4 meas 10 \n",
"... ... ... \n",
" 151 meas 10 \n",
" 152 meas 10 \n",
" 153 meas 10 \n",
" 154 meas 10 \n",
" 155 meas 10 \n",
"\n",
"[156 rows x 5 columns]\n",
" energy_lower \\\n",
"atomic_number ion_charge level_index_lower level_index_upper \n",
"13 2 0 2 0.000000 \n",
" 4 0.000000 \n",
" 17 0.000000 \n",
" 33 0.000000 \n",
" 46 0.000000 \n",
"... ... \n",
" 150 154 151302.627136 \n",
" 151 153 151302.627136 \n",
" 155 151302.627136 \n",
" 152 154 151340.573335 \n",
" 153 155 151340.573335 \n",
"\n",
" energy_upper \\\n",
"atomic_number ion_charge level_index_lower level_index_upper \n",
"13 2 0 2 37453.910000 \n",
" 4 59852.020000 \n",
" 17 106920.560000 \n",
" 33 125869.040000 \n",
" 46 134919.400000 \n",
"... ... \n",
" 150 154 151374.788527 \n",
" 151 153 151340.573335 \n",
" 155 151374.788527 \n",
" 152 154 151374.788527 \n",
" 153 155 151374.788527 \n",
"\n",
" gf \\\n",
"atomic_number ion_charge level_index_lower level_index_upper \n",
"13 2 0 2 0.000011 \n",
" 4 1.840000 \n",
" 17 0.003020 \n",
" 33 0.000567 \n",
" 46 0.001090 \n",
"... ... \n",
" 150 154 1921.372320 \n",
" 151 153 4411.254400 \n",
" 155 639.642080 \n",
" 152 154 14694.708900 \n",
" 153 155 4892.433400 \n",
"\n",
" j_lower \\\n",
"atomic_number ion_charge level_index_lower level_index_upper \n",
"13 2 0 2 0.0 \n",
" 4 0.0 \n",
" 17 0.0 \n",
" 33 0.0 \n",
" 46 0.0 \n",
"... ... \n",
" 150 154 1175.5 \n",
" 151 153 391.5 \n",
" 155 391.5 \n",
" 152 154 1261.0 \n",
" 153 155 420.0 \n",
"\n",
" j_upper \\\n",
"atomic_number ion_charge level_index_lower level_index_upper \n",
"13 2 0 2 1.0 \n",
" 4 1.0 \n",
" 17 1.0 \n",
" 33 1.0 \n",
" 46 1.0 \n",
"... ... \n",
" 150 154 1349.5 \n",
" 151 153 420.0 \n",
" 155 449.5 \n",
" 152 154 1349.5 \n",
" 153 155 449.5 \n",
"\n",
" wavelength \n",
"atomic_number ion_charge level_index_lower level_index_upper \n",
"13 2 0 2 266.9155 \n",
" 4 167.0787 \n",
" 17 93.5274 \n",
" 33 79.4477 \n",
" 46 74.1183 \n",
"... ... \n",
" 150 154 138500.0000 \n",
" 151 153 263500.0000 \n",
" 155 138500.0000 \n",
" 152 154 292200.0000 \n",
" 153 155 292200.0000 \n",
"\n",
"[3490 rows x 6 columns]\n"
]
}
],
"source": [
"from carsus.io.cmfgen import CMFGENReader, CMFGENEnergyLevelsParser, CMFGENOscillatorStrengthsParser\n",
"\n",
"levels_path = \"al2_osc_split.dat\" \n",
"lines_path = \"al2_osc_split.dat\" \n",
"\n",
"al2_lvl_parser = CMFGENEnergyLevelsParser(levels_path)\n",
"al2_osc_parser = CMFGENOscillatorStrengthsParser(lines_path)\n",
"\n",
"al2_lvl_data = {\n",
" (13, 2): {\n",
" \"levels\": al2_lvl_parser.base, # Energy levels\n",
" \"lines\": al2_osc_parser.base # Spectral lines (oscillator strengths)\n",
" }\n",
"}\n",
"\n",
"reader = CMFGENReader(al2_lvl_data)\n",
"# Print metadata from levels\n",
"print(reader.levels.attrs)\n",
"\n",
"# Print metadata from lines\n",
"print(reader.lines.attrs)\n",
"print(reader.levels) # Print levels DataFrame\n",
"print(reader.lines) \n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save DataFrames with Metadata to HDF5"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saved Carsus output to carsus_output.h5\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"# Assume self.levels and self.lines contain metadata in .attrs\n",
"hdf_filename = \"carsus_output.h5\"\n",
"\n",
"with pd.HDFStore(hdf_filename, \"w\") as store:\n",
" store.put(\"levels\", reader.levels)\n",
" store.put(\"lines\", reader.lines)\n",
"\n",
" # Attach metadata to HDF5 store\n",
" store.get_storer(\"levels\").attrs.metadata = reader.levels.attrs\n",
" store.get_storer(\"lines\").attrs.metadata = reader.lines.attrs\n",
"\n",
"print(f\"Saved Carsus output to {hdf_filename}\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded Levels Metadata: {'doi': 'https://doi.org/10.1086/177435', 'reference': 'Verner et al. 1996', 'units': {'energy': 'erg', 'energy_lower': 'erg', 'energy_upper': 'erg', 'wavelength': 'meter'}}\n",
"Loaded Lines Metadata: {'doi': 'https://doi.org/10.1086/177435', 'reference': 'Verner et al. 1996', 'units': {'energy': 'erg', 'energy_lower': 'erg', 'energy_upper': 'erg', 'wavelength': 'meter'}}\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"hdf_filename = \"carsus_output.h5\"\n",
"\n",
"with pd.HDFStore(hdf_filename, \"r\") as store:\n",
" levels = store[\"levels\"]\n",
" lines = store[\"lines\"]\n",
"\n",
" levels_metadata = store.get_storer(\"levels\").attrs.metadata\n",
" lines_metadata = store.get_storer(\"lines\").attrs.metadata\n",
"\n",
"print(\"Loaded Levels Metadata:\", levels_metadata)\n",
"print(\"Loaded Lines Metadata:\", lines_metadata)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded Metadata for Levels: {'doi': 'https://doi.org/10.1086/177435', 'reference': 'Verner et al. 1996', 'units': {'energy': 'erg', 'energy_lower': 'erg', 'energy_upper': 'erg', 'wavelength': 'meter'}}\n",
"Loaded Metadata for Lines: {'doi': 'https://doi.org/10.1086/177435', 'reference': 'Verner et al. 1996', 'units': {'energy': 'erg', 'energy_lower': 'erg', 'energy_upper': 'erg', 'wavelength': 'meter'}}\n"
]
}
],
"source": [
"def save_with_metadata(filename, df_dict):\n",
" \"\"\"\n",
" Saves multiple Pandas DataFrames with metadata to an HDF5 file.\n",
"\n",
" Parameters:\n",
" filename (str): Name of the HDF5 file.\n",
" df_dict (dict): Dictionary of {name: dataframe} pairs, where each dataframe has .attrs metadata.\n",
" \"\"\"\n",
" with pd.HDFStore(filename, \"w\") as store:\n",
" for name, df in df_dict.items():\n",
" store.put(name, df)\n",
" store.get_storer(name).attrs.metadata = df.attrs\n",
"\n",
"def load_with_metadata(filename):\n",
" \"\"\"\n",
" Loads DataFrames and their metadata from an HDF5 file.\n",
"\n",
" Parameters:\n",
" filename (str): Name of the HDF5 file.\n",
"\n",
" Returns:\n",
" dict: Dictionary of {name: (dataframe, metadata)} pairs.\n",
" \"\"\"\n",
" data = {}\n",
" with pd.HDFStore(filename, \"r\") as store:\n",
" for key in store.keys():\n",
" df = store[key]\n",
" metadata = store.get_storer(key.lstrip(\"/\")).attrs.metadata\n",
" data[key.lstrip(\"/\")] = (df, metadata)\n",
" return data\n",
"\n",
"# Example usage:\n",
"save_with_metadata(\"carsus_output.h5\", {\"levels\": reader.levels, \"lines\": reader.lines})\n",
"\n",
"loaded_data = load_with_metadata(\"carsus_output.h5\")\n",
"print(\"Loaded Metadata for Levels:\", loaded_data[\"levels\"][1]) # Check metadata\n",
"print(\"Loaded Metadata for Lines:\", loaded_data[\"lines\"][1]) # Check metadata\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading