forked from noaa-oar-arl/monetio
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_ish_lite.py
104 lines (79 loc) · 3.09 KB
/
test_ish_lite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import pandas as pd
import pytest
from monetio import ish_lite
try:
import requests
r = requests.head("https://www1.ncdc.noaa.gov/pub/data/noaa/isd-lite/")
except Exception:
pytest.skip("NCEI server issues", allow_module_level=True)
def test_ish_read_history():
dates = pd.date_range("2020-09-01", "2020-09-02")
ish = ish_lite.ISH()
ish.dates = dates
ish.read_ish_history()
df = ish.history
assert len(df) > 0
assert {"latitude", "longitude", "begin", "end"} < set(df.columns)
for col in ["begin", "end"]:
assert df[col].dtype == "datetime64[ns]"
assert (df[col].dt.hour == 0).all()
assert df.station_id.nunique() == len(df), "unique ID for station"
# Ensure docstring info matches this
x = df.usaf.value_counts()
assert sorted(x[x == 2].index) == ["720481", "722158", "725244"]
assert x[x.index != "999999"].max() == 2
x = df.wban.value_counts()
assert sorted(x[x == 2].index) == ["13752", "23176", "24267", "41231", "41420"]
assert x[x.index != "99999"].max() == 2
assert (df.usaf == "999999").sum() > 100
assert (df.wban == "99999").sum() > 10_000
def test_ish_lite_one_site():
dates = pd.date_range("2020-09-01", "2020-09-02")
site = "72224400358" # "College Park AP"
df = ish_lite.add_data(dates, site=site)
assert (df.nunique()[["usaf", "wban"]] == 1).all(), "one site"
assert (df.usaf + df.wban).iloc[0] == site, "correct site"
assert (df.time.diff().dropna() == pd.Timedelta("1H")).all(), "hourly data"
assert len(df) == 25, "includes hour 0 on second day"
assert {
"usaf",
"wban",
"latitude",
"longitude",
"country",
"state",
} < set(df.columns), "useful site metadata"
assert {
"time",
"temp",
"dew_pt_temp",
"press",
"wdir",
"ws",
"sky_condition",
"precip_1hr",
"precip_6hr",
} < set(df.columns), "data columns"
assert (df.temp < 100).all(), "temp in degC"
@pytest.mark.parametrize("resample", [False, True])
def test_ish_lite_one_site_empty(resample):
dates = pd.date_range("2020-09-01", "2020-09-02")
site = "99816999999" # "Delaware Reserve"
df = ish_lite.add_data(dates, site=site, resample=resample)
assert df.empty
def test_ish_lite_resample():
dates = pd.date_range("2020-09-01", "2020-09-02")
site = "72224400358" # "College Park AP"
freq = "3H"
df = ish_lite.add_data(dates, site=site, resample=True, window=freq)
assert (df.time.diff().dropna() == pd.Timedelta(freq)).all()
assert len(df) == 8 + 1
@pytest.mark.parametrize("meta", ["country", "state", "site"])
def test_ish_lite_invalid_subset(meta):
dates = pd.date_range("2020-09-01", "2020-09-02")
with pytest.raises(ValueError, match="^No data URLs found"):
_ = ish_lite.add_data(dates, **{meta: "asdf"})
def test_ish_lite_error_on_multiple_subset_options():
dates = pd.date_range("2020-09-01", "2020-09-02")
with pytest.raises(ValueError, match="^Only one of "):
ish_lite.add_data(dates, site="72224400358", state="MD")