forked from rsvp/fecon235
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfecon235.py
370 lines (321 loc) · 15.3 KB
/
fecon235.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# Python Module for import Date : 2018-03-12
# vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
'''
_______________| fecon235.py : unifies lib modules for fecon235 project.
- Designed to be invoked by an IPython console or Jupyter notebook
for convenient command access.
CASUAL usage:
from fecon235.fecon235 import *
- User can always foo?? to access foo's origin.
- Unifies essential lib modules in one place, thus
frequently used commands can be generalized with shorter names.
CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
2018-03-11 Add foreinfl() to forecast Unified Inflation 1-year ahead.
2018-03-11 Add foreholt() function, generalizing yi_fred.holtfred(),
but retain holtfred() here for backward compatibility.
2017-06-27 Include module ys_prtf_boltzmann.py and group world4d.
2017-06-18 Add groupgemrat(), groupdiflog(), and covdiflog().
Include module ys_matrix.py.
2017-06-06 Include our module ys_mlearn.py
2017-05-16 Include our module ys_gauss_mix.py
2016-12-29 Modify forecast() to include optimize_holtforecast().
2016-12-19 Import lib.ys_opt_holt to optimize Holt-Winters alpha and beta.
2016-01-22 Include plotdf() in plot() as first candidate.
Rename cotr() to groupcotr(), then include smoothing.
2016-01-19 Add groupfun() to apply some function to group columns.
This was derived by generalizing grouppc.
Add cotr() for normalized COTR position indicators.
2016-01-11 Add forefunds() to forecast Fed Funds rate.
2016-01-08 For groupgeoret, sort results in a list, yearly default.
For grouppc, freq default as in pcent.
2016-01-05 Add groupget, grouppc, groupgeoret, groupholtf functions.
2015-12-20 python3 compatible: lib import fix.
2015-12-17 python3 compatible: fix with yi_0sys
2015-09-14 Add getstock and second argument maxi to get().
2015-09-03 Exception handling.
2015-08-31 First version unifies some commands.
TODO
[ ] - adopt Bloomberg Open Symbology for obscure financial instruments,
see http://bsym.bloomberg.com/sym/
Bloomberg Global ID is a random 12-character alpha-numeric.
'''
from __future__ import absolute_import, print_function
import pandas as pd
from .lib import yi_0sys as system
# CASUAL import style below intentionally for Jupyter notebooks
# and interactive settings (lib modules follow proper import protocol).
# We access essential modules and catch collisions in namespace:
from .lib.yi_1tools import *
from .lib.yi_fred import *
from .lib.yi_matrix import *
from .lib.yi_plot import *
from .lib.yi_quandl import *
# yi_quandl_api should NOT be imported.
from .lib.yi_simulation import *
from .lib.yi_stocks import *
from .lib.yi_timeseries import *
from .lib.ys_gauss_mix import *
from .lib.ys_mlearn import *
from .lib.ys_opt_holt import *
from .lib.ys_prtf_boltzmann import *
# GROUPS: specify our favorite series as a dictionary
# where key is name, and corresponding value is its data code:
group4d = { 'Zero10' : d4zero10, 'SPX' : d4spx, 'XAU' : d4xau,
'EURUSD' : d4eurusd, 'USDJPY' : d4usdjpy }
# For usage, see fred-georeturns.ipynb for details,
# in particular, functions like group*() in this module.
cotr4w = { 'Bonds' : w4cotr_bonds, 'Equities' : w4cotr_equities,
'Metals' : w4cotr_metals, 'USD' : w4cotr_usd }
# For usage, see qdl-COTR-positions.ipynb for details,
# "Market position indicators using CFTC COTR"
# COTR := Commitment of Traders Report.
world4d = { 'America' : 's4spy', 'Europe' : 's4ezu',
'Japan' : 's4ewj', 'Emerging' : 's4eem', 'Gold' : 's4gld' }
# For usage, see prtf-boltzmann-1.ipynb for details,
# Exchange Traded Funds (ETF) of daily frequency
# representing EQUITIES worldwide plus gold.
def get( code, maxi=0 ):
'''Unifies getfred, getqdl, and getstock for data retrieval.
code is fredcode, quandlcode, futures slang, or stock slang.
maxi should be an integer to set maximum number of data points,
where 0 implies the default value.
get() will accept the vendor code directly as string, e.g.
from FRED and Quandl, or use one of our abbreviated variables
documented in the appropriate module listed above.
The notebooks provide good code examples in action.
Futures slang is of the form 'f4spotyym' where
spot is the spot symbol in lower case,
yy is the last two digits of the year
m is the delivery month code,
so for December 2015 COMEX Gold: 'f4xau15z'
Stock slang can be also used for ETFs and mutual funds.
The general form is 's4symbol' where the symbol must be in
lower case, so for SPY, use 's4spy' as an argument.
'''
try:
df = getfred( code )
except:
try:
if maxi:
df = getqdl( code, maxi )
else:
df = getqdl( code )
except:
try:
if maxi:
df = getstock( code, maxi )
else:
df = getstock( code )
except:
raise ValueError('INVALID symbol string or code for fecon get()')
return df
def plot( data, title='tmp', maxi=87654321 ):
'''Unifies plotdf, plotfred and plotqdl for plotting data.
The "data" argument could also be fredcode or quandlcode,
but not stock slang -- a Dataframe is first choice,
yet (as of 2016-01-20) Series type is also acceptable.
Assumes date index; for numbered index or List, use plotn() instead.
'''
try:
plotdf( tail(data, maxi), title )
# 2016-01-20 plotdf now sports a todf pre-filter for convenience.
except:
try:
plotfred( data, title, maxi )
except:
try:
plotqdl( data, title, maxi )
except:
raise ValueError('INVALID argument or data for fecon plot()')
return
def forecast( data, h=12, grids=0, maxi=0 ):
'''Make h period ahead forecasts using holt* or optimize_holtforecast,
where "data" may be a DataFrame, fredcode, quandlcode, or stock slang.
(Supercedes: "Unifies holtfred and holtqdl for quick forecasting.")
'''
# Generalization of 2016-12-29 preserves and expands former interface.
if not isinstance( data, pd.DataFrame ):
try:
data = get( data, maxi )
# ^expecting fredcode, quandlcode, or stock slang
# to be retrieved as DataFrame.
except:
raise ValueError("fecon235.forecast(): INVALID data argument.")
if grids > 0:
# Recommend grids=50 for reasonable results,
# but TIME-CONSUMING for search grids > 49
# to FIND OPTIMAL alpha and beta by minBrute():
opt = optimize_holtforecast( data, h, grids=grids )
# See optimize_holtforecast() in module ys_opt_holt for details.
system.warn( str(opt[1]), stub="OPTIMAL alpha, beta, losspc, loss:" )
return opt[0]
else:
# QUICK forecasts when grids=0 ...
# by using FIXED defaults: alpha=ts.hw_alpha and beta=ts.hw_beta:
holtdf = holt( data )
system.warn("Holt-Winters parameters have NOT been optimized.")
return holtforecast( holtdf, h )
def foreholt( data, h=12, alpha=hw_alpha, beta=hw_beta, maxi=0 ):
'''Holt-Winters forecast h-periods ahead (data slang aware).'''
# "data" can be a fredcode, quandlcode, stock slang,
# OR a DataFrame which will be detected:
if not isinstance( data, pd.DataFrame ):
try:
data = get( data, maxi )
except:
raise ValueError("fecon235.forehalt(): INVALID data argument.")
# To find optimal parameter values for alpha and beta beforehand,
# use optimize_holtforecast() in module ys_opt_holt.
holtdf = holt( data, alpha, beta )
# Interim results will not be retained.
return holtforecast( holtdf, h )
def holtfred( data, h=24, alpha=hw_alpha, beta=hw_beta ):
'''Holt-Winters forecast h-periods ahead (fredcode aware).'''
# Retained for backward compatibility, esp. pre-2016 notebooks.
return foreholt( data, h, alpha, beta )
def groupget( ggdic=group4d, maxi=0 ):
'''Retrieve and create group dataframe, given group dictionary.'''
# Since dictionaries are unordered, create SORTED list of keys:
keys = [ key for key in sorted(ggdic) ]
# Download individual dataframes as values into a dictionary:
dfdic = { key : get(ggdic[key], maxi) for key in keys }
# ^Illustrates dictionary comprehension.
# Paste together dataframes into one large sorted dataframe:
groupdf = paste([ dfdic[key] for key in keys ])
# Name the columns:
groupdf.columns = keys
return groupdf
def groupfun( fun, groupdf, *pargs, **kwargs ):
'''Use fun(ction) column-wise, then output new group dataframe.'''
# In math, this is known as an "operator":
# a function which takes another function as argument.
# Examples of fun: pcent, normalize, etc. See grouppc() next.
# See groupget() to retrieve and create group dataframe.
keys = list(groupdf.columns)
# Compute individual columns as dataframes in a list:
out = [todf( fun(todf(groupdf[key]), *pargs, **kwargs) ) for key in keys]
# ^Python 2 and 3 compatible: apply() removed in Python 3.
# Paste together dataframes into one large dataframe:
outdf = paste( out )
# Name the columns:
outdf.columns = keys
return outdf
def grouppc( groupdf, freq=1 ):
'''Create overlapping pcent dataframe, given a group dataframe.'''
# See groupget() to retrieve and create group dataframe.
# Very useful to visualize as boxplot, see fred-georeturns.ipynb
return groupfun( pcent, groupdf, freq )
def groupdiflog( groupdf, lags=1 ):
'''Difference between lagged log(data) for columns in group dataframe.'''
# See groupget() to retrieve and create group dataframe.
return groupfun( diflog, groupdf, lags )
def covdiflog( groupdf, lags=1 ):
'''Covariance array for differenced log(column) from group dataframe.
For correlation array: apply yi_matrix.cov2cor() later.
'''
# See groupget() to retrieve and create group dataframe.
rates = groupdiflog( groupdf, lags )
V = rates.cov()
# ^Type of V is still pandas DataFrame, so convert to array.
# AVOID the np.matrix subclass; stick with np.ndarrays instead:
return V.values
def groupgeoret( groupdf, yearly=256, order=True ):
'''Geometric mean returns, non-overlapping, for group dataframe.
Argument "yearly" refers to annual frequency, e.g.
256 for daily trading days, 12 for monthly, 4 for quarterly.
___ATTN___ Use groupgemrat() instead for greater accuracy.
'''
keys = list(groupdf.columns)
# Use list comprehension to store lists from georet():
geo = [ georet(todf(groupdf[k]), yearly) + [k] for k in keys ]
# where each georet list gets appended with an identifying key.
if order:
geo.sort(reverse=True)
# Group is ordered in-place with respect to decreasing georet.
return geo
def groupgemrat( groupdf, yearly=256, order=False, n=2 ):
'''Geometric mean rates, non-overlapping, for group dataframe.
Argument "yearly" refers to annual frequency, e.g.
256 for daily trading days, 12 for monthly, 4 for quarterly.
Output is rounded to n-decimal places.
Algorithm takes KURTOSIS into account for greater accuracy.
'''
keys = list(groupdf.columns)
# Use list comprehension to store lists from gemrat():
gem = [ roundit(gemrat(todf(groupdf[k]), yearly), n, echo=False)
+ [k] for k in keys ]
# ^each gemrat list gets appended with an identifying key.
if order:
gem.sort(reverse=True)
# Group is ordered in-place with respect to decreasing gemrat.
return gem
def groupholtf( groupdf, h=12, alpha=ts.hw_alpha, beta=ts.hw_beta ):
'''Holt-Winters forecasts h-periods ahead from group dataframe.'''
# Tip: use all available (non-sliced) data for forecasting.
# This is essentially a Kalman filter with optimal alpha-beta,
# applied to each series individually, not jointly.
# cf. holtfred() which works given a single series dataframe.
forecasts = []
keys = list(groupdf.columns)
for k in keys:
kdf = todf( groupdf[k] )
holtdf = holt( kdf, alpha, beta )
forecastdf = holtforecast( holtdf, h )
forecasts.append( forecastdf )
keysdf = paste( forecasts )
keysdf.columns = keys
return keysdf
def groupcotr( group=cotr4w, alpha=0 ):
'''Compute latest normalized CFTC COTR position indicators.
Optionally specify alpha for Exponential Moving Average
which is a smoothing parameter: 0 < alpha < 1 (try 0.26)
COTR is the Commitment of Traders Report from US gov agency.
'''
# For detailed derivation, see qdl-COTR-positions.ipynb
positions = groupget( group )
norpositions = groupfun( normalize, positions )
# alpha default should skip SMOOTHING operation...
if alpha:
return groupfun( ema, norpositions, alpha )
else:
return norpositions
def forefunds( nearby='16m', distant='17m' ):
'''Forecast distant Fed Funds rate using Eurodollar futures.'''
# Long derivation is given in qdl-libor-fed-funds.ipynb
ffer = getfred('DFF')
# ^Retrieve Fed Funds effective rate, daily since 1954.
ffer_ema = ema( ffer['1981':], 0.0645 )
# ^Eurodollar futures debut.
# ^Exponentially Weighted Moving Average, 30-period.
libor_nearby = get( 'f4libor' + nearby )
libor_distant = get( 'f4libor' + distant )
libor_spread = todf( libor_nearby - libor_distant )
# spread in forward style quote since futures uses 100-rate.
return todf( ffer_ema + libor_spread )
def foreinfl( n=120, alpha=1.0, beta=0.3673 ):
'''Forecast Unified Inflation 1-year ahead per fred-inflation.ipynb.'''
# Holt-Winters parameters alpha and beta are optimized
# from the 1960-2018 dataset, consisting of 697 monthly points.
# Each "way" is an orthogonal method, to be averaged as way[0].
way = [-9, -9, -9, -9, -9]
inflall = get( m4infl ) # synthetic Unified Inflation, monthly.
infl = tail(inflall, n)
# ^Default n=120 months, i.e. last 10 years.
way[1] = str(infl.index[-1]).replace(" 00:00:00", "")
# ^Most recent month for CPI, CPIc, PCE, PCEc data.
gm = gemrat( infl, yearly=12 )
way[2] = gm[0] # Geometric Mean Rate over n months.
hw = foreholt( infl, 12, alpha, beta ) # Holt-Winters model.
way[3] = (tailvalue(hw) - 1) * 100 # Convert forecasted level to rate.
bond10 = get(m4bond10)
tips10 = get(m4tips10)
bei = todf(bond10 - tips10) # 10-year BEI Break-even Inflation.
# ^Bond market data will be more recent than m4infl.
way[4] = tailvalue(bei)
# Final forecast is the AVERAGE of orthogonal ways:
way[0] = sum(way[2:]) / len(way[2:])
# "way" in SUMMARY is thus: [Average, "infl-date", GMR, HW, BEI]
# e.g. [2.2528, '2018-01-01', 1.5793, 3.0791, 2.1000]
return way
if __name__ == "__main__":
system.endmodule()