-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnsidcDownloader.py
168 lines (137 loc) · 5.05 KB
/
nsidcDownloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import requests
from datetime import datetime
from functools import partial
from string import Formatter
import pandas as pd
class nsidcDownloader:
url_template = (
"{protocol}://{server}/{datapool}/{dataset}.{version}/{date1:%Y.%m.%d}"
"/{dataset}-{projection}_{grid}{resolution}-{platform}_{sensor}"
"-{date2:%Y%j}-{channel}-{pass}-{algorithm}-{input}-{dataversion}.nc"
)
defaults = {
"protocol": "https",
"server": "n5eil01u.ecs.nsidc.org",
"datapool": "MEASURES",
"dataset": "NSIDC-0630",
"version": "001",
"projection": "EASE2",
"grid": "N",
"pass": "M",
"algorithm": "SIR",
"input": "CSU",
"dataversion": "v1.3",
}
def __init__(
self, username=None, password=None, folder=".", no_auth=False, **kwargs
):
"""
Snow Water Equivalence downloader.
Parameters
----------
username: str
NASA Earthdata username
password: str
NASA Earthdata password
kwargs: dict
keys to use as default in url_template
"""
# Get formatting keys of url_template
self.url_keys = [k[1] for k in Formatter().parse(self.url_template)]
# Function to format URL
self.format_url = partial(self.url_template.format)
# Set url template defaults to specified defaults
self.set_defaults(**self.defaults) # global defaults (in this class)
self.set_defaults(**kwargs) # instance defaults
# Auth Detauls
self.username = username
self.password = password
# Tet up session
self.session = requests.session()
if no_auth is False:
self.get_auth()
# Output
self.folder = folder
def set_defaults(self, **kwargs):
"""
Set defaults for url template
"""
for key in kwargs:
if key in self.url_keys:
self.format_url.keywords[key] = kwargs[key]
return self.format_url.keywords
def get_auth(self):
"""
Get download authentication
How authentication works:
(1) request to some sort of NSIDC URL
(2) get HTTP 302 response redirecting to urs.earthdata.nasa.gov OAuth
(3) login at urs.earthdata.nasa.gov oauth
(4) use oauth tokens from urs.earthdata.nasa.gov to request any download url
"""
test_url = "{protocol}://{server}/SMAP/".format(
**self.format_url.keywords
)
# User / pass to use with auth.
# Will attempt to use auth in ~/.netrc if not passed
if self.username and self.password:
self.session.auth = (self.username, self.password)
# Send test request, keep following the redirects and scoop
# up all of the cookies along the way in self.session
badconn = False
try:
req = self.session.get(test_url, allow_redirects=False)
badconn = False
while req.status_code == 302:
req = self.session.get(
req.headers["Location"], allow_redirects=False
)
except requests.ConnectionError:
badconn = True
pass
# If the final request is 401 (Bad Auth), throw exception
if badconn is True:
raise PermissionError("Server Down, try again later")
else:
if req.status_code == 401:
raise PermissionError("Bad NASA Earthdata Authentication!")
def download_file(self, folder=None, overwrite=False, **kwargs):
"""
Download a file of particular kwargs
"""
url = self.format_url(**kwargs)
# Dict of all the keywords:vals going into URL
all_keywords = {**kwargs, **self.format_url.keywords}
# Format output dir with keywords if there's none passed
if not folder:
folder = self.folder.format(all_keywords)
# Prepare file system
filename = url.split("/")[-1]
filepath = "{}/{}".format(folder, filename)
block_size = 1024
# print("{}".format(filename))
if os.path.exists(filepath):
if overwrite:
os.remove(filepath)
else:
print(" ** (skipping...) **")
print(filename)
return [filename, True]
# Download the dang thing
with self.session.get(url, stream=True) as r:
if r.status_code == 404:
raise FileNotFoundError("File Not Found: {}".format(url))
resp = r.ok
# Open file
with open(filepath, "wb") as f:
# Create dest folder if not exist
if not os.path.exists(folder):
os.makedirs(folder)
# Stream content to file in chunks
for chunk in r.iter_content(block_size):
f.write(chunk)
return [
filename,
resp,
] # changed to filename from filepath to fix another script