forked from noaa-oar-arl/monetio
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_gml_ozonesonde.py
119 lines (90 loc) · 5.02 KB
/
test_gml_ozonesonde.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd
import pytest
from monetio import gml_ozonesonde
def test_discover_files():
files = gml_ozonesonde.discover_files()
assert len(files) > 0
assert set(files["location"].unique()) == set(gml_ozonesonde.LOCATIONS)
def test_read_100m():
url = r"https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/Boulder,%20Colorado/100%20Meter%20Average%20Files/bu1043_2023_12_27_17.l100"
df = gml_ozonesonde.read_100m(url)
assert len(df) > 0
assert df.attrs["ds_attrs"]["Station"] == "Boulder, CO"
assert df.attrs["ds_attrs"]["Station Height"] == "1743 meters"
assert df.attrs["ds_attrs"]["Flight Number"] == "BU1043"
assert df.attrs["ds_attrs"]["O3 Sonde ID"] == "2z43312"
assert df.attrs["ds_attrs"]["Background"] == "0.020 microamps (0.08 mPa)"
assert df.attrs["ds_attrs"]["Flowrate"] == "29.89 sec/100ml"
assert df.attrs["ds_attrs"]["RH Corr"] == "0.31 %"
assert df.attrs["ds_attrs"]["Sonde Total O3"] == "329 (65) DU"
assert df.attrs["ds_attrs"]["Sonde Total O3 (SBUV)"] == "325 (62) DU"
@pytest.mark.parametrize(
"url",
[
# Missing 'O3 Uncert'
r"https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/San%20Cristobal,%20Galapagos/100%20Meter%20Average%20Files/sc204_2002_02_01_03.l100",
# Missing 'O3 Uncert' + different header blocks (only 1)
r"https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/Narragansett,%20Rhode%20Island/100%20Meter%20Average%20Files/ri058_2004_08_05_18.l100",
],
)
def test_read_100m_nonstd(url):
df = gml_ozonesonde.read_100m(url)
assert len(df) > 0
def test_read_100m_bad_data_line():
url = r"https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/San%20Cristobal,%20Galapagos/100%20Meter%20Average%20Files/sc204_2002_01_31_12.l100"
# Level Press Alt Pottp Temp FtempV Hum Ozone Ozone Ozone Ptemp O3 # DN O3 Res
# Num hPa km K C C % mPa ppmv atmcm C 10^11/cc DU
# 0 -6331.0 0.008 0.0-3323.0 999.9 999-6666.00 10.529 0.0000 -91.8 1583.081 260
# 1 892.2 0.100 301.1 18.3 19.1 105 1.07 0.012 0.0009 32.3 2.649 259
with pytest.raises(ValueError, match="Expected 13 columns in data block"):
_ = gml_ozonesonde.read_100m(url)
def test_read_100m_bad_header_line():
url = r"https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/Boulder,%20Colorado/100%20Meter%20Average%20Files/bu913_2021_08_10_16.l100"
# Level Press Alt Pottp Temp FtempV Hum Ozone Ozone Ozone Ptemp O3 # DN O3 Res Ftemp Water
# Num hPa km K C C % mPa ppmv atmcm C 10^11/cc DU C ppmv
with pytest.raises(ValueError, match="Data block does not start with expected header"):
_ = gml_ozonesonde.read_100m(url)
def test_add_data():
dates = pd.date_range("2023-01-01", "2023-01-31 23:59", freq="H")
df = gml_ozonesonde.add_data(dates, n_procs=2)
assert len(df) > 0
assert df.attrs["var_attrs"]["o3"]["units"] == "ppmv"
latlon = df["latitude"].astype(str) + "," + df["longitude"].astype(str)
assert 1 < latlon.nunique() <= 10, "multiple sites; lat/lon doesn't change in profile"
# NOTE: Similar to the place folder names, but not all the same
assert df["siteid"].nunique() == latlon.nunique()
def test_add_data_location_sel():
dates = pd.date_range("2023-01-01", "2023-01-31 23:59", freq="H")
df = gml_ozonesonde.add_data(
dates,
location=["Boulder, Colorado", "South Pole, Antarctica"],
n_procs=2,
)
assert len(df) > 0
latlon = df["latitude"].astype(str) + "," + df["longitude"].astype(str)
assert latlon.nunique() == 2, "selected two locations"
@pytest.mark.parametrize(
"location",
["asdf", ["asdf", "blah"], ("asdf", "blah")],
)
def test_add_data_invalid_location(location):
dates = pd.date_range("2023-01-01", "2023-01-31 23:59", freq="H")
with pytest.raises(ValueError, match="Invalid location"):
_ = gml_ozonesonde.add_data(dates, location=location)
def test_same_location_and_launch_time():
# Two files with same file time and launch time:
# - https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/Boulder,%20Colorado/100%20Meter%20Average%20Files/bl774_2003_03_10_20.l100
# - https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/Boulder,%20Colorado/100%20Meter%20Average%20Files/bl775_2003_03_10_20.l100
# File time: 2003-03-10 20
# Launch time: 2003-03-10 20:41:11
dates = ["2003-03-10 20", "2003-03-10 21"]
df = gml_ozonesonde.add_data(dates, location="Boulder, Colorado", n_procs=2)
assert len(df) > 0
# Only one launch time
assert df["time"].nunique() == 1
# But multiple profiles
assert df["flight_number"].nunique() == 2
assert df.attrs["ds_attrs"]["urls"] == [
r"https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/Boulder,%20Colorado/100%20Meter%20Average%20Files/bl774_2003_03_10_20.l100",
r"https://gml.noaa.gov/aftp/data/ozwv/Ozonesonde/Boulder,%20Colorado/100%20Meter%20Average%20Files/bl775_2003_03_10_20.l100",
]