@@ -57,13 +57,13 @@ def rebuild_auth(self, prepared_request, response):
57
57
headers = prepared_request .headers
58
58
url = prepared_request .url
59
59
60
- if ' Authorization' in headers :
60
+ if " Authorization" in headers :
61
61
original_parsed = requests .utils .urlparse (response .request .url )
62
62
redirect_parsed = requests .utils .urlparse (url )
63
63
if (original_parsed .hostname != redirect_parsed .hostname ) and \
64
64
redirect_parsed .hostname != self .auth_host and \
65
65
original_parsed .hostname != self .auth_host :
66
- del headers [' Authorization' ]
66
+ del headers [" Authorization" ]
67
67
68
68
69
69
async def run (argv : list [str ]):
@@ -75,8 +75,8 @@ async def run(argv: list[str]):
75
75
raise v
76
76
77
77
settings = SettingsConf ().cfg
78
- edl = settings [' DAAC_ENVIRONMENTS' ][args .endpoint ][' EARTHDATA_LOGIN' ]
79
- cmr = settings [' DAAC_ENVIRONMENTS' ][args .endpoint ][' BASE_URL' ]
78
+ edl = settings [" DAAC_ENVIRONMENTS" ][args .endpoint ][" EARTHDATA_LOGIN" ]
79
+ cmr = settings [" DAAC_ENVIRONMENTS" ][args .endpoint ][" BASE_URL" ]
80
80
netloc = urlparse (f"https://{ edl } " ).netloc
81
81
provider_esconn_map = {"LPCLOUD" : get_hls_catalog_connection (logging .getLogger (__name__ )),
82
82
"ASF" : get_slc_catalog_connection (logging .getLogger (__name__ ))}
@@ -87,7 +87,7 @@ async def run(argv: list[str]):
87
87
update_url_index (es_conn , f .readlines (), None , None , None )
88
88
exit (0 )
89
89
90
- loglevel = ' DEBUG' if args .verbose else ' INFO'
90
+ loglevel = " DEBUG" if args .verbose else " INFO"
91
91
logging .basicConfig (level = loglevel )
92
92
logging .info ("Log level set to " + loglevel )
93
93
@@ -146,7 +146,7 @@ def create_parser():
146
146
provider = {"positionals" : ["-p" , "--provider" ],
147
147
"kwargs" : {"dest" : "provider" ,
148
148
"choices" : ["LPCLOUD" , "ASF" ],
149
- "default" : ' LPCLOUD' ,
149
+ "default" : " LPCLOUD" ,
150
150
"help" : "Specify a provider for collection search. Default is LPCLOUD." }}
151
151
152
152
collection = {"positionals" : ["-c" , "--collection-shortname" ],
@@ -183,11 +183,6 @@ def create_parser():
183
183
"script as a cron, this value should be equal to or greater than how often your "
184
184
"cron runs (default: 60 minutes)." }}
185
185
186
- isl_bucket = {"positionals" : ["-i" , "--isl-bucket" ],
187
- "kwargs" : {"dest" : "isl_bucket" ,
188
- "required" : True ,
189
- "help" : "The incoming storage location s3 bucket where data products will be downloaded." }}
190
-
191
186
transfer_protocol = {"positionals" : ["-x" , "--transfer-protocol" ],
192
187
"kwargs" : {"dest" : "transfer_protocol" ,
193
188
"choices" : ["s3" , "https" ],
@@ -233,6 +228,12 @@ def create_parser():
233
228
"action" : "store_true" ,
234
229
"help" : "Toggle for using temporal range rather than revision date (range) in the query." }}
235
230
231
+ temporal_start_date = {"positionals" : ["--temporal-start-date" ],
232
+ "kwargs" : {"dest" : "temporal_start_date" ,
233
+ "default" : None ,
234
+ "help" : "The ISO date time after which data should be retrieved. Only valid when --use-temporal is false/omitted. For Example, "
235
+ "--temporal-start-date 2021-01-14T00:00:00Z" }}
236
+
236
237
native_id = {"positionals" : ["--native-id" ],
237
238
"kwargs" : {"dest" : "native_id" ,
238
239
"help" : "The native ID of a single product granule to be queried, overriding other query arguments if present." }}
@@ -250,20 +251,20 @@ def create_parser():
250
251
"help" : "The native ID of a single product granule to be queried, overriding other query arguments if present." }}
251
252
252
253
full_parser = subparsers .add_parser ("full" )
253
- full_parser_arg_list = [verbose , endpoint , provider , collection , start_date , end_date , bbox , minutes , isl_bucket ,
254
+ full_parser_arg_list = [verbose , endpoint , provider , collection , start_date , end_date , bbox , minutes ,
254
255
transfer_protocol , dry_run , smoke_run , no_schedule_download , release_version , job_queue ,
255
- chunk_size , batch_ids , use_temporal , native_id ]
256
+ chunk_size , batch_ids , use_temporal , temporal_start_date , native_id ]
256
257
_add_arguments (full_parser , full_parser_arg_list )
257
258
258
259
query_parser = subparsers .add_parser ("query" )
259
- query_parser_arg_list = [verbose , endpoint , provider , collection , start_date , end_date , bbox , minutes , isl_bucket ,
260
+ query_parser_arg_list = [verbose , endpoint , provider , collection , start_date , end_date , bbox , minutes ,
260
261
dry_run , smoke_run , no_schedule_download , release_version , job_queue , chunk_size ,
261
- native_id , use_temporal ]
262
+ native_id , use_temporal , temporal_start_date ]
262
263
_add_arguments (query_parser , query_parser_arg_list )
263
264
264
265
download_parser = subparsers .add_parser ("download" )
265
- download_parser_arg_list = [verbose , file , endpoint , provider , isl_bucket , transfer_protocol , dry_run , smoke_run ,
266
- batch_ids , start_date , end_date , use_temporal ]
266
+ download_parser_arg_list = [verbose , file , endpoint , provider , transfer_protocol , dry_run , smoke_run ,
267
+ batch_ids , start_date , end_date , use_temporal , temporal_start_date ]
267
268
_add_arguments (download_parser , download_parser_arg_list )
268
269
269
270
return parser
@@ -289,7 +290,7 @@ def validate(args):
289
290
290
291
291
292
def _validate_bounds (bbox ):
292
- bounds = bbox .split (',' )
293
+ bounds = bbox .split ("," )
293
294
value_error = ValueError (
294
295
f"Error parsing bounds: { bbox } . Format is <W Longitude>,<S Latitude>,<E Longitude>,<N Latitude> without spaces" )
295
296
@@ -303,9 +304,9 @@ def _validate_bounds(bbox):
303
304
raise value_error
304
305
305
306
306
- def _validate_date (date , prefix = ' start' ):
307
+ def _validate_date (date , prefix = " start" ):
307
308
try :
308
- datetime .strptime (date , ' %Y-%m-%dT%H:%M:%SZ' )
309
+ datetime .strptime (date , " %Y-%m-%dT%H:%M:%SZ" )
309
310
except ValueError :
310
311
raise ValueError (
311
312
f"Error parsing { prefix } date: { date } . Format must be like 2021-01-14T00:00:00Z" )
@@ -364,10 +365,10 @@ def _get_tokens(edl: str, username: str, password: str) -> list[dict]:
364
365
def _revoke_expired_tokens (token_list : list [dict ], edl : str , username : str , password : str ) -> None :
365
366
for token_dict in token_list :
366
367
now = datetime .utcnow ().date ()
367
- expiration_date = datetime .strptime (token_dict [' expiration_date' ], "%m/%d/%Y" ).date ()
368
+ expiration_date = datetime .strptime (token_dict [" expiration_date" ], "%m/%d/%Y" ).date ()
368
369
369
370
if expiration_date <= now :
370
- _delete_token (edl , username , password , token_dict [' access_token' ])
371
+ _delete_token (edl , username , password , token_dict [" access_token" ])
371
372
del token_dict
372
373
373
374
@@ -380,7 +381,7 @@ def _create_token(edl: str, username: str, password: str) -> str:
380
381
response_content = create_response .json ()
381
382
382
383
if "error" in response_content .keys ():
383
- raise Exception (response_content [' error' ])
384
+ raise Exception (response_content [" error" ])
384
385
385
386
token = response_content ["access_token" ]
386
387
@@ -391,7 +392,7 @@ def _delete_token(edl: str, username: str, password: str, token: str) -> None:
391
392
url = f"https://{ edl } /api/users/revoke_token"
392
393
try :
393
394
resp = requests .post (url , auth = HTTPBasicAuth (username , password ),
394
- params = {' token' : token })
395
+ params = {" token" : token })
395
396
resp .raise_for_status ()
396
397
except Exception as e :
397
398
logging .warning (f"Error deleting the token: { e } " )
@@ -472,11 +473,6 @@ async def run_query(args, token, es_conn, cmr, job_id, settings):
472
473
release_version = args .release_version ,
473
474
provider = args .provider ,
474
475
params = [
475
- {
476
- "name" : "isl_bucket_name" ,
477
- "value" : f"--isl-bucket={ args .isl_bucket } " ,
478
- "from" : "value"
479
- },
480
476
{
481
477
"name" : "batch_ids" ,
482
478
"value" : "--batch-ids " + " " .join (chunk_batch_ids ) if chunk_batch_ids else "" ,
@@ -560,43 +556,48 @@ def query_cmr(args, token, cmr, settings, timerange: DateTimeRange, now: datetim
560
556
561
557
request_url = f"https://{ cmr } /search/granules.umm_json"
562
558
params = {
563
- ' page_size' : PAGE_SIZE ,
564
- ' sort_key' : "-start_date" ,
565
- ' provider' : args .provider ,
566
- ' ShortName' : args .collection ,
567
- ' token' : token ,
568
- ' bounding_box' : args .bbox ,
559
+ " page_size" : PAGE_SIZE ,
560
+ " sort_key" : "-start_date" ,
561
+ " provider" : args .provider ,
562
+ " ShortName" : args .collection ,
563
+ " token" : token ,
564
+ " bounding_box" : args .bbox ,
569
565
}
570
566
571
567
if args .native_id :
572
- params [' native-id' ] = args .native_id
568
+ params [" native-id" ] = args .native_id
573
569
574
570
# derive and apply param "temporal"
575
571
now_date = now .strftime ("%Y-%m-%dT%H:%M:%SZ" )
576
572
temporal_range = _get_temporal_range (timerange .start_date , timerange .end_date , now_date )
577
573
logging .info ("Temporal Range: " + temporal_range )
578
574
579
575
if args .use_temporal :
580
- params [' temporal' ] = temporal_range
576
+ params [" temporal" ] = temporal_range
581
577
else :
582
578
params ["revision_date" ] = temporal_range
583
579
580
+ # if a temporal start-date is provided, set temporal
581
+ if args .temporal_start_date :
582
+ logging .info (f"{ args .temporal_start_date = } " )
583
+ params ["temporal" ] = dateutil .parser .isoparse (args .temporal_start_date ).strftime ("%Y-%m-%dT%H:%M:%SZ" )
584
+
584
585
logging .info (f"{ request_url = } { params = } " )
585
586
product_granules , search_after = _request_search (args , request_url , params )
586
587
587
588
while search_after :
588
589
granules , search_after = _request_search (args , request_url , params , search_after = search_after )
589
590
product_granules .extend (granules )
590
591
591
- if args .collection in settings [' SHORTNAME_FILTERS' ]:
592
+ if args .collection in settings [" SHORTNAME_FILTERS" ]:
592
593
product_granules = [granule
593
594
for granule in product_granules
594
595
if _match_identifier (settings , args , granule )]
595
596
596
597
logging .info (f"Found { str (len (product_granules ))} total granules" )
597
598
598
599
for granule in product_granules :
599
- granule [' filtered_urls' ] = _filter_granules (granule , args )
600
+ granule [" filtered_urls" ] = _filter_granules (granule , args )
600
601
601
602
return product_granules
602
603
@@ -616,17 +617,17 @@ def _get_temporal_range(start: str, end: str, now: str):
616
617
617
618
618
619
def _request_search (args , request_url , params , search_after = None ):
619
- response = requests .get (request_url , params = params , headers = {' CMR-Search-After' : search_after }) \
620
+ response = requests .get (request_url , params = params , headers = {" CMR-Search-After" : search_after }) \
620
621
if search_after else requests .get (request_url , params = params )
621
622
622
623
results = response .json ()
623
- items = results .get (' items' )
624
- next_search_after = response .headers .get (' CMR-Search-After' )
624
+ items = results .get (" items" )
625
+ next_search_after = response .headers .get (" CMR-Search-After" )
625
626
626
627
collection_identifier_map = {"HLSL30" : "LANDSAT_PRODUCT_ID" ,
627
628
"HLSS30" : "PRODUCT_URI" }
628
629
629
- if items and ' umm' in items [0 ]:
630
+ if items and " umm" in items [0 ]:
630
631
return [{"granule_id" : item .get ("umm" ).get ("GranuleUR" ),
631
632
"provider" : item .get ("meta" ).get ("provider-id" ),
632
633
"production_datetime" : item .get ("umm" ).get ("DataGranule" ).get ("ProductionDateTime" ),
@@ -668,8 +669,8 @@ def _filter_granules(granule, args):
668
669
669
670
670
671
def _match_identifier (settings , args , granule ) -> bool :
671
- for filter in settings [' SHORTNAME_FILTERS' ][args .collection ]:
672
- if re .match (filter , granule [' identifier' ]):
672
+ for filter in settings [" SHORTNAME_FILTERS" ][args .collection ]:
673
+ if re .match (filter , granule [" identifier" ]):
673
674
return True
674
675
675
676
return False
@@ -861,7 +862,7 @@ def download_from_asf(
861
862
logging .info ("downloading associated orbit file" )
862
863
dataset_dir = extract_one_to_one (product , settings_cfg , working_dir = Path .cwd ())
863
864
stage_orbit_file_args = stage_orbit_file .get_parser ().parse_args ([
864
- f' --output-directory={ str (dataset_dir )} ' ,
865
+ f" --output-directory={ str (dataset_dir )} " ,
865
866
str (product_filepath )
866
867
])
867
868
stage_orbit_file .main (stage_orbit_file_args )
@@ -1088,7 +1089,7 @@ def _https_transfer(url, bucket_name, token, staging_area=""):
1088
1089
upload_end_time = datetime .utcnow ()
1089
1090
upload_duration = upload_end_time - upload_start_time
1090
1091
upload_stats = {"file_name" : file_name ,
1091
- "file_size (in bytes)" : r .headers .get (' Content-Length' ),
1092
+ "file_size (in bytes)" : r .headers .get (" Content-Length" ),
1092
1093
"upload_duration (in seconds)" : upload_duration .total_seconds (),
1093
1094
"upload_start_time" : _convert_datetime (upload_start_time ),
1094
1095
"upload_end_time" : _convert_datetime (upload_end_time )}
@@ -1147,7 +1148,7 @@ def _s3_download(url, s3, tmp_dir, staging_area=""):
1147
1148
file_name = PurePath (url ).name
1148
1149
target_key = str (Path (staging_area , file_name ))
1149
1150
1150
- source = url [len ("s3://" ):].partition ('/' )
1151
+ source = url [len ("s3://" ):].partition ("/" )
1151
1152
source_bucket = source [0 ]
1152
1153
source_key = source [2 ]
1153
1154
@@ -1167,5 +1168,5 @@ def _s3_upload(url, bucket_name, tmp_dir, staging_area=""):
1167
1168
return target_key
1168
1169
1169
1170
1170
- if __name__ == ' __main__' :
1171
+ if __name__ == " __main__" :
1171
1172
asyncio .run (run (sys .argv ))
0 commit comments