Skip to content

Commit 155c042

Browse files
committed
#1063: Updated consistent db file to 2025-01-22-2016-07-01_to_2024-12-31 version. Fixed a bug where we threw out the entire frame in the db if there are no sensing dates for that frame. Updated and cleaned upunit tests
1 parent bb06e9a commit 155c042

File tree

5 files changed

+24
-21
lines changed

5 files changed

+24
-21
lines changed

conf/settings.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ DEFAULT_DISP_S1_QUERY_GRACE_PERIOD_MINUTES: 210
6666
DISP_S1_K_FETCH_MULTIPLE: 2
6767

6868
# This file contains mapping of frame_id to burst_id and all its sensing times based on real CMR data
69-
DISP_S1_BURST_DB_S3PATH: "s3://opera-ancillaries/disp_frames/disp_s1_consistent_burst_db/opera-disp-s1-consistent-burst-ids-2024-10-14-2016-07-01_to_2024-09-04.json"
69+
DISP_S1_BURST_DB_S3PATH: "s3://opera-ancillaries/disp_frames/disp_s1_consistent_burst_db/opera-disp-s1-consistent-burst-ids-2025-01-22-2016-07-01_to_2024-12-31.json"
7070

7171
# This file contains list of blackout date ranges for DISP-S1 processing on a per-frame basis
7272
DISP_S1_BLACKOUT_DATES_S3PATH: "s3://opera-ancillaries/disp_frames/disp_s1_blackout_dates/sample_disp_s1_blackout.json"

data_subscriber/cslc_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,18 @@ def __init__(self):
2828
self.sensing_seconds_since_first = [] # Sensing time in seconds since the first sensing time
2929
self.sensing_datetime_days_index = [] # Sensing time in days since the first sensing time, rounded to the nearest day
3030

31-
def localize_anc_json(settings_field):
32-
'''Copy down a file from S3 whose path is defined in settings.yaml by settings_field'''
33-
31+
def get_s3_resource_from_settings(settings_field):
3432
settings = SettingsConf().cfg
3533
burst_file_url = urlparse(settings[settings_field])
3634
s3 = boto3.resource('s3')
3735
path = burst_file_url.path.lstrip("/")
3836
file = path.split("/")[-1]
37+
38+
return s3, path, file, burst_file_url
39+
def localize_anc_json(settings_field):
40+
'''Copy down a file from S3 whose path is defined in settings.yaml by settings_field'''
41+
42+
s3, path, file, burst_file_url = get_s3_resource_from_settings(settings_field)
3943
s3.Object(burst_file_url.netloc, path).download_file(file)
4044

4145
return file
@@ -135,10 +139,6 @@ def process_disp_frame_burst_hist(file):
135139

136140
for frame in j:
137141

138-
# If sensing time list is empty, skip this frame
139-
if len(j[frame]["sensing_time_list"]) == 0:
140-
continue
141-
142142
frame_to_bursts[int(frame)].frame_number = int(frame)
143143

144144
b = frame_to_bursts[int(frame)].burst_ids

tests/data_subscriber/test_cslc_query.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
forward_arguments = ["query", "-c", "OPERA_L2_CSLC-S1_V1", "--processing-mode=forward", "--start-date=2021-01-24T23:00:00Z",
1414
"--end-date=2021-01-25T00:00:00Z", "--grace-mins=60", "--k=4", "--m=4"]
1515

16-
BURST_MAP = Path(__file__).parent / "opera-disp-s1-consistent-burst-ids-2024-10-14-2016-07-01_to_2024-09-04.json"
16+
s3, path, file, burst_file_url = cslc_utils.get_s3_resource_from_settings("DISP_S1_BURST_DB_S3PATH")
17+
BURST_MAP = Path(__file__).parent / file
1718
frame_to_bursts, burst_to_frames, datetime_to_frames = cslc_utils.process_disp_frame_burst_hist(BURST_MAP)
1819

1920
def test_extend_additional_records():
@@ -40,7 +41,7 @@ def test_reprocessing_by_native_id(caplog):
4041
c_query = cslc_query.CslcCmrQuery(reproc_args, None, None, None, None,
4142
{"DEFAULT_DISP_S1_QUERY_GRACE_PERIOD_MINUTES": 60},BURST_MAP)
4243
c_query.query_cmr(None, datetime.utcnow())
43-
assert ("native_id='OPERA_L2_CSLC-S1_T027-056778-IW1_20231008T133102Z_20231009T204457Z_S1A_VV_v1.0' is not found in the DISP-S1 Burst ID Database JSON. Nothing to process"
44+
assert ("native_id=OPERA_L2_CSLC-S1_T027-056778-IW1_20231008T133102Z_20231009T204457Z_S1A_VV_v1.0 is not found in the DISP-S1 Burst ID Database JSON. Nothing to process"
4445
in caplog.text)
4546

4647

tests/data_subscriber/test_cslc_util.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,28 @@
1414
hist_arguments = ["query", "-c", "OPERA_L2_CSLC-S1_V1", "--processing-mode=historical", "--start-date=2021-01-24T23:00:00Z",\
1515
"--end-date=2021-01-24T23:00:00Z", "--frame-range=100,101"]
1616

17-
BURST_MAP = Path(__file__).parent / "opera-disp-s1-consistent-burst-ids-2024-10-14-2016-07-01_to_2024-09-04.json"
18-
disp_burst_map_hist, burst_to_frames, datetime_to_frames = cslc_utils.process_disp_frame_burst_hist(BURST_MAP)
17+
s3, path, file, burst_file_url = cslc_utils.get_s3_resource_from_settings("DISP_S1_BURST_DB_S3PATH")
18+
file = Path(__file__).parent / file
19+
disp_burst_map_hist, burst_to_frames, datetime_to_frames = cslc_utils.process_disp_frame_burst_hist(file)
1920

2021
frame_blackout_dates = process_disp_blackout_dates(Path(__file__).parent / "sample_disp_s1_blackout.json")
2122
blackout_dates_obj = DispS1BlackoutDates(frame_blackout_dates, disp_burst_map_hist, burst_to_frames)
2223

2324
#TODO: We may change the database json during production that could have different burst ids for the same frame
2425
#TODO: So we may want to create different versions of this unit test, one for each version of the database json
2526
def test_burst_map():
26-
assert len(disp_burst_map_hist.keys()) == 1428
27+
assert len(disp_burst_map_hist.keys()) == 1427
2728
burst_set = set()
2829
for burst in ['T042-088905-IW2', 'T042-088907-IW3', 'T042-088907-IW2', 'T042-088910-IW1', 'T042-088910-IW3', 'T042-088913-IW1', 'T042-088906-IW1', 'T042-088911-IW1', 'T042-088912-IW2', 'T042-088913-IW2', 'T042-088907-IW1', 'T042-088913-IW3', 'T042-088909-IW3', 'T042-088912-IW3', 'T042-088909-IW1', 'T042-088909-IW2', 'T042-088908-IW2', 'T042-088910-IW2', 'T042-088906-IW3', 'T042-088911-IW2', 'T042-088908-IW1', 'T042-088912-IW1', 'T042-088911-IW3', 'T042-088905-IW1', 'T042-088905-IW3', 'T042-088906-IW2', 'T042-088908-IW3']:
2930
burst_set.add(burst)
3031
assert disp_burst_map_hist[11114].burst_ids.difference(burst_set) == set()
3132
diff_time = disp_burst_map_hist[11114].sensing_datetimes[0] - dateutil.parser.isoparse("2016-08-10T14:07:13")
3233
assert diff_time.total_seconds() < 60
3334

34-
assert len(disp_burst_map_hist[46799].burst_ids) == 15
35-
assert len(disp_burst_map_hist[46799].sensing_datetimes) == 1
35+
assert len(disp_burst_map_hist[46799].burst_ids) == 16
36+
assert len(disp_burst_map_hist[46799].sensing_datetimes) == 0
3637

37-
assert len(disp_burst_map_hist[28498].burst_ids) == 23
38+
assert len(disp_burst_map_hist[28498].burst_ids) == 18
3839

3940
def test_split_download_batch_id():
4041
"""Test that the download batch id is correctly split into frame and acquisition cycle"""
@@ -90,7 +91,7 @@ def test_parse_cslc_native_id():
9091
burst_id, acquisition_dts, acquisition_cycles, frame_ids = \
9192
cslc_utils.parse_cslc_native_id("OPERA_L2_CSLC-S1_T050-105601-IW3_20160823T025448Z_20240614T120433Z_S1A_VV_v1.1", burst_to_frames, disp_burst_map_hist)
9293
assert frame_ids == [13200, 13201]
93-
assert disp_burst_map_hist[13200].sensing_datetimes[0] == dateutil.parser.isoparse("2016-08-23T02:54:48")
94+
assert disp_burst_map_hist[13200].sensing_datetimes[0] == dateutil.parser.isoparse("2017-08-18T02:54:31")
9495

9596
#TODO: 09-05-2024 Uncomment after the database file has been updated
9697
'''burst_id, acquisition_dts, acquisition_cycles, frame_ids = \
@@ -113,7 +114,7 @@ def test_build_ccslc_m_index():
113114
def test_determine_acquisition_cycle_cslc():
114115
"""Test that the acquisition cycle is correctly determined"""
115116
acquisition_cycle = cslc_utils.determine_acquisition_cycle_cslc(dateutil.parser.isoparse("20170227T230524"), 831, disp_burst_map_hist)
116-
assert acquisition_cycle == 12
117+
assert acquisition_cycle == 240
117118

118119
acquisition_cycle = cslc_utils.determine_acquisition_cycle_cslc(dateutil.parser.isoparse("20170203T230547"), 832, disp_burst_map_hist)
119120
assert acquisition_cycle == 216
@@ -130,7 +131,7 @@ def test_determine_k_cycle():
130131
cslc_dependency = CSLCDependency(10, 1, disp_burst_map_hist, args, token, cmr, settings, blackout_dates_obj) # m doesn't matter here
131132

132133
k_cycle = cslc_dependency.determine_k_cycle(dateutil.parser.isoparse("20170227T230524"), None, 831)
133-
assert k_cycle == 2
134+
assert k_cycle == 5
134135

135136
k_cycle = cslc_dependency.determine_k_cycle(dateutil.parser.isoparse("20160702T230546"), None, 832)
136137
assert k_cycle == 1
@@ -271,15 +272,15 @@ def test_nearest_sensing_datetime():
271272

272273
count, nearest_time = cslc_utils.get_nearest_sensing_datetime(disp_burst_map_hist[8882].sensing_datetimes,
273274
dateutil.parser.isoparse("2027-11-02T00:26:48"))
274-
assert nearest_time == dateutil.parser.isoparse("2024-08-28T00:27:00")
275+
assert nearest_time == dateutil.parser.isoparse("2024-12-26T00:26:57")
275276

276277
def test_calculate_historical_progress():
277278
end_date = dateutil.parser.isoparse("2018-07-01T00:00:00")
278279
frame_states = {'46288': 30, '46289': 30, '26690': 45, '26691': 45, '38500': 0}
279280

280281
progress, frame_completion, last_processed_datetimes \
281282
= cslc_utils.calculate_historical_progress(frame_states, end_date, disp_burst_map_hist)
282-
assert progress == 67
283+
assert progress == 69
283284
assert frame_completion == {'46288': 71, '46289': 71, '26690': 100, '26691': 100, '38500': 0}
284285
assert last_processed_datetimes == {'46288': datetime(2018, 1, 29, 13, 43, 14),
285286
'46289': datetime(2018, 1, 29, 13, 43, 36),

tests/scenarios/cslc_query_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,4 @@ def validate_hour(q_result_dict, test_range, validation_data):
265265
logging.info("TEST SUCCESS")
266266
else:
267267
logging.error("TEST FAILED")
268+
exit(-1)

0 commit comments

Comments
 (0)