geoschem · yantosca · Nov 4, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Moved calls to `RD_AOD` and `CALC_AOD` from `Init_Aerosol` rather than `Init_Photolysis`
 - Updated ResME CH4 reservoir emissions to apply seasonality via mask file
 
+
 ### Fixed
 - Simplified SOA representations and fixed related AOD and TotalOA/OC calculations in benchmark
 - Changed mass conservation adjustment in `vdiff_mod.F90` to use a mass tendency with units of `kg species/kg dry air`
@@ -41,6 +42,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fixed formatting error in `.github/workflows/stale.yml` that caused the Mark Stale Issues action not to run
 - Fixed emissions in GCHP carbon ExtData.rc so that data in molecules/cm2/s are converted to kg/m2/s
 
+### Removed
+- Removed dry-run checks for files that are no longer needed for Cloud-J v8 from `cldj_interface_mod.F90`
+
 ## [14.4.3] - 2024-08-13
 ### Added
 - Added tropopause pressure field in the satellite diagnostic (by @eamarais)

diff --git a/GeosCore/cldj_interface_mod.F90 b/GeosCore/cldj_interface_mod.F90
@@ -1323,9 +1323,6 @@ SUBROUTINE CloudJ_DryRun_Check( Input_Opt )
     !========================================================================
     amIRoot = Input_Opt%amIRoot
 
-    filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'CJ77_inp.dat'
-    CALL Check_File_For_DryRun( filePath, amIRoot )
-
     filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'FJX_spec.dat'
     CALL Check_File_For_DryRun( filePath, amIRoot )
 
@@ -1338,21 +1335,6 @@ SUBROUTINE CloudJ_DryRun_Check( Input_Opt )
     filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'FJX_scat-ssa.dat'
     CALL Check_File_For_DryRun( filePath, amIRoot )
 
-    filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'FJX_scat-UMa.dat'
-    CALL Check_File_For_DryRun( filePath, amIRoot )
-
-    filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'FJX_scat-geo.dat'
-    CALL Check_File_For_DryRun( filePath, amIRoot )
-
-    filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'atmos_std.dat'
-    CALL Check_File_For_DryRun( filePath, amIRoot )
-
-    filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'atmos_h2och4.dat'
-    CALL Check_File_For_DryRun( filePath, amIRoot )
-
-    filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'atmos_geomip.dat'
-    CALL Check_File_For_DryRun( filePath, amIRoot )
-
     filePath = TRIM( Input_Opt%CloudJ_Dir ) // 'FJX_j2j.dat'
     CALL Check_File_For_DryRun( filePath, amIRoot )
 

diff --git a/run/shared/download_data.py b/run/shared/download_data.py
@@ -112,8 +112,12 @@ def extract_pathnames_from_log(
         # Read data from the file line by line.
         # Add file paths to the data_list set.
         line = ifile.readline()
+
         while line:
 
+            # Replace double slashes with single slash
+            line = line.replace("CHEM_INPUTS//", "CHEM_INPUTS/")
+
             # Convert line to uppercase for string match
             upcaseline = line.upper()
 
@@ -455,13 +459,13 @@ def create_download_script(
         Contains output from function parse_args.
     """
 
-    # Extract mirror parameters
-    mirror_name = args["mirror"]
-    mirror = args["config"]["mirrors"][mirror_name]
-    is_s3_bucket = mirror["s3_bucket"]
-    remote_root = mirror["remote"]
-    quote = mirror["quote"]
-    cmd_prefix = mirror["command"]
+    # Extract portal parameters
+    portal_name = args["portal"]
+    portal = args["config"]["portals"][portal_name]
+    is_s3_bucket = portal["s3_bucket"]
+    remote_root = portal["remote"]
+    quote = portal["quote"]
+    cmd_prefix = portal["command"]
     if "@PATH@" in cmd_prefix:
         cmd_prefix = cmd_prefix.replace("@PATH@", paths["local_prefix"])
 
@@ -496,7 +500,7 @@ def create_download_script(
 
                 # If the file does not exist in the run directory,
                 # then copy it from the restart folder.
-                # This only has to be done if not using the amazon mirror.
+                # This only has to be done if not using the amazon portal.
                 if not is_s3_bucket:
                     if not os.path.exists(local_rst):
                         index3 = remote_rst.find("GEOSCHEM_RESTARTS")
@@ -669,8 +673,8 @@ def download_the_data(
         return
 
     # Print a message
-    if len(args["mirror"]) > 0:
-        print(f"Downloading data from {args['mirror']}")
+    if len(args["portal"]) > 0:
+        print(f"Downloading data from {args['portal']}")
 
     # Create script to download missing files from AWS S3
     create_download_script(paths, args)
@@ -686,7 +690,7 @@ def download_the_data(
 
     # Raise an exception if the data was not successfully downloaded
     if status != 0:
-        msg = f"Error downloading data from {args['mirror']}"
+        msg = f"Error downloading data from {args['portal']}"
         raise RuntimeError(msg)
 
 
@@ -701,24 +705,24 @@ def parse_args():
     args : dict
         args["config"] : Dict with global settings from download_data.yml
         args["dryrun_log"] Name of the GEOS-Chem dry-run log file
-        args["mirror"]: Name of the remote mirror for download
+        args["portal"]: Name of the remote portal for download
         args["skip_download"]: Are we skipping the download? (T/F)
     """
     dryrun_log = None
     dryrun_found = False
-    mirror_found = False
-    mirror_remote = None
+    portal_found = False
+    portal_remote = None
     skip_download = False
     skip_found = False
 
     # Read the YAML configuration file
     config = read_config_file("download_data.yml")
 
-    # Get a list of mirror names + short names
-    mirror_list = list(config["mirrors"].keys())
+    # Get a list of portal names + short names
+    portal_list = list(config["portals"].keys())
     short_name_list = []
-    for mir in mirror_list:
-        short_name_list.append(config["mirrors"][mir]["short_name"])
+    for mir in portal_list:
+        short_name_list.append(config["portals"][mir]["short_name"])
 
     # Parse command-line arguments (argument 0 is the program name)
     for i in range(1, len(sys.argv)):
@@ -730,13 +734,13 @@ def parse_args():
             dryrun_found = True
             continue
 
-        if not mirror_found:
-            for mir in mirror_list:
-                mirror = mir.lower()
-                short_name = config["mirrors"][mir]["short_name"].lower()
-                if arg in mirror or arg in short_name:
-                    mirror_remote = mirror
-                    mirror_found = True
+        if not portal_found:
+            for mir in portal_list:
+                portal = mir.lower()
+                short_name = config["portals"][mir]["short_name"].lower()
+                if arg in portal or arg in short_name:
+                    portal_remote = portal
+                    portal_found = True
                     continue
 
         if not skip_found:
@@ -750,14 +754,14 @@ def parse_args():
         msg = "The dryrun log file was not supplied!  Exiting ..."
         raise ValueError(msg)
 
-    if mirror_remote is None and not skip_download:
-        msg = "Mirror name missing or invalid!  Exiting ..."
+    if portal_remote is None and not skip_download:
+        msg = "Portal name missing or invalid!  Exiting ..."
         raise ValueError(msg)
 
     args = {
         "config": config,
         "dryrun_log": dryrun_log,
-        "mirror": mirror_remote,
+        "portal": portal_remote,
         "skip_download": skip_download
     }
     return args
@@ -770,7 +774,7 @@ def main():
 
     Calling sequence:
     -----------------
-        ./download_data.py log MIRROR-NAME
+        ./download_data.py log PORTAL-NAME
         ./download_data.py log -skip-download  # Print unique log & exit
     """
 

diff --git a/run/shared/download_data.yml b/run/shared/download_data.yml
@@ -1,29 +1,57 @@
 ---
 #
 # Configuration file for the download_data.py script.
-# You should not have to modify this file unless a new data mirror
+# You should not have to modify this file unless a new data portal
 # comes online, or the default restart files are updated.
 #
-mirrors:
-  amazon:
-    short_name: aws
+#
+# GEOS-Chem data portals
+portals:
+
+  # GEOS-Chem Input Data portal, download via AWS CLI
+  geoschem+aws:
+    short_name: ga
+    s3_bucket: True
+    remote: s3://geos-chem
+    command: 'aws s3 cp '
+    quote: ""
+
+  # GEOS-Chem Input Data portal, download via HTTP/wget
+  # NOTE: Use geoschemdata.wustl.edu as a backup
+  geoschem+http:
+    short_name: gh
+    s3_bucket: False
+    remote: https://geos-chem.s3-us-west-2.amazonaws.com
+    #remote: http://geoschemdata.wustl.edu/ExtData
+    command: 'wget -r -np -nH -R "*.html" -N -P @PATH@ '
+    quote: '"'
+
+  # GEOS-Chem Nested Input Data portal, download via AWS CLI
+  nested+aws:
+    short_name: na
     s3_bucket: True
     remote: s3://gcgrid
     command: 'aws s3 cp '
     quote: ""
+
+  # GEOS-Chem Nested Input Data portal, download via HTTP/wget
+  nested+http:
+    short_name: nh
+    s3_bucket: False
+    remote: https://gcgrid.s3.amazonaws.com/
+    command: 'aws s3 cp '
+    quote: ""
+
+  # GCAP 2.0 @ U. Rochester data portal, download via HTTP/wget
   rochester:
     short_name: ur
     s3_bucket: False
     remote: http://atmos.earth.rochester.edu/input/gc/ExtData
     command: 'wget -r -np -nH -R "*.html" -N --cut-dirs=2 -P @PATH@ '
     quote: '"'
-  washu:
-    short_name: wu
-    s3_bucket: False
-    remote: http://geoschemdata.wustl.edu/ExtData
-    command: 'wget -r -np -nH -R "*.html" -N -P @PATH@ '
-    quote: '"'
 
+
+# GEOS-Chem restart file paths
 restarts:
   root: GEOSCHEM_RESTARTS
   aerosol:
@@ -52,7 +80,7 @@ restarts:
     local:  GEOSChem.Restart.20190701_0000z.nc4
   tagco:
     remote: v2020-02/GEOSChem.Restart.tagCO.20190701_0000z.nc4
-    local:  GEOSChem.Restart.20190701_0000z.nc4 
+    local:  GEOSChem.Restart.20190701_0000z.nc4
   tago3:
     remote: GC_14.3.0/GEOSChem.Restart.fullchem.20190701_0000z.nc4
     local:  GEOSChem.Restart.20190701_0000z.nc4
@@ -65,4 +93,3 @@ restarts:
   transporttracers:
     remote: GC_14.2.0/GEOSChem.Restart.TransportTracers.20190101_0000z.nc4
     local:  GEOSChem.Restart.20190101_0000z.nc4
-