-
Notifications
You must be signed in to change notification settings - Fork 3
/
stationaritySummary_MannKendal_.py
executable file
·258 lines (224 loc) · 10.2 KB
/
stationaritySummary_MannKendal_.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#!/home/jmframe/programs/anaconda3/bin/python3
"""
Created on Thu May 9 2019
@author: Jonathan Frame
"""
import numpy as np
import pandas as pd
import os
import statistics as sts
import time
import sys
import collections
# Need to set print options to max for generating the results files.
np.set_printoptions(threshold=sys.maxsize)
#import matplotlib.pyplot as plt
# time the program
startProgramTime = time.time()
#Store all attributes. Really just need basin area, but the rest might come in handy.
att_path = "/home/NearingLab/data/camels_attributes_v2.0/camels_all.txt"
attributes = pd.read_csv(att_path, sep=";")
attributes.set_index('gauge_id', inplace=True)
#data directory path
dq_path = "/home/NearingLab/projects/jmframe/CAMELS_stationarity/usgs_streamflow/"
dp_path = "/home/NearingLab/projects/jmframe/CAMELS_stationarity/nldas_precipitation/"
# Setting nGauges to be one above the known number of gauges in CAMELS
nGauges = 673
count_id = 0
metrics = 5 #i.e., P, Q, P-Q, Q/P, (P-Q)/P
#Mann-Kendall Statistic
mk = np.zeros([nGauges,metrics+1])
# Loop through all the files in the discharge directory first.
for filename in os.listdir(dq_path):
startCatchmentTime = time.time()
q_path = dq_path + filename
# the known location of the gauge ids in the files
gauge_id = filename[0:8]
print("gauge_id: " + gauge_id)
# Now set a path to the corresponding precipitation gauge.
p_path = dp_path + gauge_id + "_lump_nldas_forcing_leap.txt"
# Confirm that there is a corresponding precipitation gauge to the discharge.
if int(gauge_id) in attributes.index:
catch_area = attributes.loc[int(gauge_id),'area_geospa_fabric'] # km^2
print("the catchment area is: ", catch_area, " km^2")
mk[count_id][0] = int(gauge_id)
else:
print("WARNING::this catchment is not in the CAMELS attributes: ")
print("Moving on to the next one...")
continue
# Load in the data. Note: Precipitation has headers.
dataQ = np.loadtxt(q_path, dtype={'names': ('id', 'year', 'month', 'day', \
'Q','status'), 'formats': ('i4', 'i4', 'i4', 'i4', 'f4', 'U10')})
# open the precipitation file and read line by line.
with open(p_path) as f:
lines = (line for line in f if not line.startswith('#'))
dataP = np.loadtxt(lines, dtype={'names': ('year', 'month', 'day', 'hour', \
'dayls', 'P','SRAD', 'SWE', 'Tmax', 'Tmin', 'Vp'), 'formats': ('i4', 'i4', \
'i4', 'i4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')}, skiprows=4)
#Total number of records
nQ = dataQ.shape[0]
nP = dataP.shape[0]
# Set start and end data information for the streamflow record forcing data.
yearStart = dataQ[1][1]
yearEnd = dataQ[nQ-1][1]
nYears = (yearEnd - yearStart) + 1
yearEnd = yearStart + 1 #switch from the whole record, to the individual year
#Indexes to get annual record for statistics
startQ = -1 #the beginning index of the annual record
endQ = -1 #the end index of the annual record
holdP = 0 #hold the index of the annual record
# don't give a good value if the record is not long enough.
if nYears < 10:
print("gauge_id: " + gauge_id)
print("not enough years in this record to generate stats")
for z in range(1, nMetrics):
mk[count_id][z] = -999
continue
print("number of years in the record: ", nYears)
# year, days on record, P, Q, P-Q, P/Q, (P-Q)/P,
yearStats = np.zeros([nYears, 7])
# Now loop through and calculate some stuff for individual years
# First go through the whole record, and figure out where the years are.
for y in range(0, nYears-1):
# print("Looping year: ", yearStart)
startYearTime = time.time() #time this loop for efficiency.
#These two loops get the index of the first and last record in the sample.
#keep track of where the previous years are in the loop with startQ and endQ
for i in range(endQ+1, nQ):
if dataQ[i][1] == yearStart:
startQ = i
yearStart = yearStart + 1 #advance the year for the next iteration
break #once the index is found, immediately end the loop.
# print('startQ: ', startQ)
for i in range(startQ, nQ):
if dataQ[i][1] == yearEnd:
endQ = i
yearEnd = yearEnd + 1
break
#print('endQ: ', endQ)
j = 0 #need a counter from zero for each year
m = (endQ - startQ) + 1 #number of valurd in the discharge range.
listQ = np.zeros(m) # list each record in sample for doing these statistics.
listP = np.zeros(m) # corresponding precipitation list.
# Now that we know where the year starts and ends, go through it day by day.
for i in range(startQ, endQ):
iYear = dataQ[i][1]
yearStats[y][0] = iYear
iMonth = dataQ[i][2]
iDay = dataQ[i][3]
Q = dataQ[i][4]
#Match the precipitation day, and save the data.
for iP in range(holdP, nP):
if dataP[i][0] == iYear and dataP[i][1] == iMonth and dataP[i][2] == iDay:
P = dataP[i][5]
holdP = iP #Hold this spot, to start looking from here next time
break
if iP == nP:
print("date: ", iYear, '-', iMonth,"-", iDay)
print("There is no corresponding precipitation for the Discharge")
P = 0
if Q > -1:
# convert discharge from cfs to mm
# mm/day=(ft3/sec)(m3/ft3)(sec/min)(min/hr)(hr/day)(1/km2)(1/m2)(mm/m)
Q = Q*(1/(3.28084**3))*60*60*24*(1/catch_area)*(1/1000**2)*1000
yearStats[y][1] = yearStats[y][1] + 1
listQ[j] = Q
listP[j] = P # I don't know if I should include precip if no discharge.
j = j + 1
else:
listQ = np.delete(listQ, [j])
listP = np.delete(listP, [j])
# print("date: ", iYear, '-', iMonth,"-", iDay, \
# "The Discharge record is bad and will not be used")
# if too many records are bad, then don't do any statistics
if yearStats[y][1] > 300:
yearStats[y][2] = sum(listP)
yearStats[y][3] = sum(listQ)
yearStats[y][4] = sum(listP) - sum(listQ)
yearStats[y][5] = sum(listQ) / max(1,sum(listP))
yearStats[y][6] = (sum(listP) - sum(listQ)) / max(1,sum(listP))
#print("total time for this year: ", time.time() - startYearTime)
# Printing the precipitation and discharge to individual files
orig_stdout = sys.stdout
f = open('pq/' + gauge_id + '.txt', 'w+')
sys.stdout = f
with np.printoptions(precision=2, suppress=True):
print('year nDays P Q P-Q Q/P (P-Q)/p')
for y in range(0, nYears-1):
print(yearStats[y][0],yearStats[y][1], yearStats[y][2], yearStats[y][3], \
yearStats[y][4], yearStats[y][5], yearStats[y][6])
sys.stdout = orig_stdout
f.close()
#print("total time for this catchment: ", time.time() - startCatchmentTime)
#https://vsp.pnnl.gov/help/Vsample/Design_Trend_Mann_Kendall.htm
# 1. List the data in the order in which they were collected over time
# Done above
# 2. Determine the sign of all n(n−1)/2 possible differences xj−xk, where j>k.
# These differences are: x2−x1,x3−x1,...,xn−x1,x3−x2,x4−x2,...,xn−xn−2,xn−xn−1
# 3 3. Let sgn(xj−xk) be an indicator function that takes on the values:
# 1, 0, or -1 according to the sign of xj−xk, that is,
# sgn(xj−xk) = 1 if xj−xk> 0
# sgn(xj−xk) = 0 if xj−xk = 0, or if the sign of xj−xk cannot be determined
# sgn(xj−xk) = -1 if xj−xk< 0
# 4. Compute S = sum(sum(sgn(xj-xK)))
# 5. n should be greater than 10
# 6. Compute the variance of S as follows:
# VAR(S) = (1/18)[n(n-1)(2n+5) - sum(t(t-1)(2t+5)]
s = 0
S = np.zeros(metrics)
var_s = np.zeros(metrics)
t = np.zeros(metrics)
for s in range(0, nYears-2):
for i in range(s+1, nYears-1):
for stat in range(0, metrics):
d = yearStats[i][stat+2] - yearStats[s][stat+2]
if d > 0:
S[stat] = S[stat] + 1
if d < 0:
S[stat] = S[stat] - 1
# Calculate the tie score
for stat in range(0, metrics):
statList = np.ndarray.tolist(yearStats[:,stat+2])
#print(statList)
statDict = {i:statList.count(i) for i in statList}
#print(statDict)
for i in statDict:
tp = statDict[i]
t[stat] = tp*(tp-1)*(2*tp-5)
if sum(t) > 0:
print("tie in gauge: " + gauge_id)
print("tie score: ", t)
for stat in range(0, metrics):
var_s[stat] = (1/18)*((nYears*(nYears-1)*(2*nYears+5))-t[stat])
if S[stat] > 0:
mk[count_id][stat+1] = (S[stat]-1)/np.sqrt(var_s[stat])
if S[stat] < 0:
mk[count_id][stat+1] = (S[stat]+1)/np.sqrt(var_s[stat])
print("Mann-Kendall Statistic")
with np.printoptions(precision=2, suppress=True):
print(mk[count_id])
# move the counter for the next gauge
count_id = count_id + 1
# Create an array with the Camels latitudes and longitudes, for the output.
camels_ll = np.genfromtxt('/home/NearingLab/data/camels_attributes_v2.0/gauge_id_lat_lon_name.txt', delimiter=';')
G = camels_ll.shape[0]
# Printing the results out to file.
# copying the sys.stdout
orig_stdout = sys.stdout
f = open('mann-kendal.txt', 'w+')
sys.stdout = f
with np.printoptions(precision=2, suppress=True):
for i in range(0, nGauges-1):
# Brute force method to line up the latitudes and longitudes with the mann-kendal results.
for iLL in range(0,G):
if mk[i][0] == camels_ll[iLL][0]:
lat = camels_ll[iLL][1]
lon = camels_ll[iLL][2]
break
if iLL == G:
lat = -9999
lon = -9999
print(mk[i][0], lat, lon, mk[i][1], mk[i][2], mk[i][3], mk[i][4], mk[i][5])
sys.stdout = orig_stdout
f.close()
print("total time for this program: ", time.time() - startProgramTime)