-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSTNMFSNOAA-BS.py
82 lines (66 loc) · 2.65 KB
/
STNMFSNOAA-BS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import requests
from bs4 import BeautifulSoup
import os
import time
# send payload to get list of species
payload = {'qwhocalled': 'monthly',
'qcommon': '',
'qreturn': 'Search',
'qselect': 'List Empty, Do a Search to Fill'}
r = requests.get(
'https://www.st.nmfs.noaa.gov/pls/webpls/FT_HELP.SPECIES',
params=payload)
soup = BeautifulSoup(r.content, "lxml")
species = [x.text for x in soup.findAll("option")]
# iterate through species
for sp in species:
if not os.path.exists(sp.replace(",", "").replace(
" ", "-").replace("/", "_")): # if need to restart script
# make directory for species
os.mkdir(sp.replace(",", "").replace(" ", "-").replace("/", "_"))
# send payload to get different states and regions
payload = {'qwhocalled': 'monthly',
'qcommon': '',
'qreturn': 'Return',
'qselect': sp}
r = requests.get(
'https://www.st.nmfs.noaa.gov/pls/webpls/FT_HELP.SPECIES',
params=payload)
soup = BeautifulSoup(r.content, "lxml")
states = [
x.text for x in soup.find(
"select", {
"name": "qstate"}).findAll("option")]
# iterate through different regions and states
for st in states:
payload = {'qspecies': sp,
'qreturn': 'Species Locator',
'qyearfrom': '1990',
'qyearto': '2015',
'qmonth': 'YEAR BY MONTH',
'qstate': st,
'qoutput_type': 'TABLE'}
r = requests.get(
'http://www.st.nmfs.noaa.gov/pls/webpls/MF_MONTHLY_LANDINGS.RESULTS',
params=payload)
# save html tables into folders
with open(sp.replace(",", "").replace(" ", "-").replace("/", "_") + "/" + st + ".html", "w") as f:
f.write(str(r.content))
# don't overload server
time.sleep(.1)
# get all species from main page
os.mkdir('ALL-SPECIES-COMBINED')
# iterate through different states and regions
for st in states:
payload = {'qspecies': 'ALL SPECIES COMBINED',
'qreturn': 'Species Locator',
'qyearfrom': '1990',
'qyearto': '2015',
'qmonth': 'YEAR BY MONTH',
'qstate': st,
'qoutput_type': 'TABLE'}
r = requests.get(
'https://www.st.nmfs.noaa.gov/pls/webpls/MF_MONTHLY_LANDINGS.RESULTS',
params=payload)
with open('ALL-SPECIES-COMBINED' + "/" + st + ".html", "w") as f:
f.write(str(r.content))