-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathARMA Lab 2 Extras.Rmd
executable file
·265 lines (207 loc) · 8.6 KB
/
ARMA Lab 2 Extras.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
---
title: ARMA Models Lab 2 Extras
output:
html_document:
toc: true
toc_float: true
---
This goes into some of the material in Lab 2 in more depth.
## ----diff1---------------------------------------------------------------
par(mfrow=c(1,2))
plot(anchovy, type="l")
plot(diff(anchovy), type="l")
## ----diff2---------------------------------------------------------------
diff.anchovy = diff(anchovy)
kpss.test(diff.anchovy)
## ----test.dickey.fuller.diff---------------------------------------------
test=ur.df(diff.anchovy, type="none", lags=2)
summary(test)
# A random walk
A random walk is non-stationary. The variance increases with time. We can see this is we plot many random walks on top of each other.
A single random walk can be made with a simple for loop or with a `cumsum` on white noise.
```{r rw}
# this is a random walk
rw <- rep(0,TT)
for(i in 2:TT) rw[i] <- rw[i-1]+rnorm(1, sd=sd)
# and this is a random walk
rw <- cumsum(rnorm(TT, sd=sd))
```
Let's plot 100 random walks on top of each other. Notice how the variance (range of values on the y-axis grows) the farther we get from the start.
```{r rws}
nsim <- 100
plot(rw, type="l", ylim=c(-4,4))
for(i in 1:nsim){
rw <- cumsum(rnorm(TT, sd=sd))
lines(rw)
}
```
*Optional* We can make a similar plot with ggplot.
```{r rw.ggplot}
rw = rep(0,TT)
for(i in 2:TT) rw[i]=rw[i-1]+err[i]
dat = data.frame(t=1:TT, rw=rw)
p1 = ggplot(dat, aes(x=t, y=rw)) + geom_line() +
ggtitle("Random Walk") + xlab("") + ylab("value")
rws = apply(matrix(rnorm(TT*nsim),TT,nsim),2,cumsum)
rws = data.frame(rws)
rws$id = 1:TT
rws2=melt(rws, id.var="id")
p2 = ggplot(rws2, aes(x=id,y=value,group=variable)) +
geom_line() + xlab("") + ylab("value") +
ggtitle("The variance of a random walk process grows in time")
grid.arrange(p1, p2, ncol = 1)
```
## *Optional* Adding an intercept to a random walk model
When we add an intercept to a random walk, it behaves very differently than a stationary process with intercept added.
$$x_t = x_{t-1} + \alpha + e_t$$
looks very different than
$$x_t = \beta_1 x_{t-1} + \alpha + e_t$$
Adding an intercept to a random walk leads to an upward linear drift. While in the AR(1) model, it leads to a flat level of $\alpha/(1-\beta_1)$.
```{r rwt.vs.ar1t}
TT <- 100
alpha <- 1
rwi <- rep(0,TT)
err <- rnorm(TT, sd=sd)
for(i in 2:TT) rwi[i] <- rwi[i-1] + intercept + err[i]
ar1i <- rep(intercept/(1-beta1),TT)
for(i in 2:TT) ar1i[i] <- beta1 * ar1i[i-1] + intercept + err[i]
par(mfrow=c(1,2))
plot(rwi, type="l")
plot(ar1i, type="l")
```
## ----fig.stationarity4, fig.height = 4, fig.width = 8, fig.align = "center", echo=FALSE----
require(ggplot2)
require(gridExtra)
dat = data.frame(t=1:TT, y=cumsum(rnorm(TT)))
dat$yi = cumsum(rnorm(TT,intercept,1))
dat$yti = cumsum(rnorm(TT,intercept+trend*1:TT,1))
p1 = ggplot(dat, aes(x=t, y=y)) + geom_line() + ggtitle("Random Walk")
p2 = ggplot(dat, aes(x=t, y=yi)) + geom_line() + ggtitle("with non-zero mean added")
p3 = ggplot(dat, aes(x=t, y=yti)) + geom_line() + ggtitle("with linear trend added")
grid.arrange(p1, p2, p3, ncol = 1)
## ----fig.vis, fig.height = 4, fig.width = 8, fig.align = "center", echo=FALSE----
require(ggplot2)
dat = subset(landings, Species %in% c("Anchovy", "Sardine") &
Year <= 1989)
dat$log.metric.tons = log(dat$metric.tons)
ggplot(dat, aes(x=Year, y=log.metric.tons)) +
geom_line()+facet_wrap(~Species)
## ----fig.df, fig.height = 6, fig.width = 8, fig.align = "center", echo=FALSE----
require(ggplot2)
require(gridExtra)
#####
ys = matrix(0,TT,nsim)
for(i in 2:TT) ys[i,]=ys[i-1,]+rnorm(nsim)
rws = data.frame(ys)
rws$id = 1:TT
library(reshape2)
rws2=melt(rws, id.var="id")
p1 = ggplot(rws2, aes(x=id,y=value,group=variable)) +
geom_line() + xlab("") + ylab("value") +
ggtitle("Null Non-stationary", subtitle="White noise")
ys = matrix(0,TT,nsim)
for(i in 2:TT) ys[i,]=theta*ys[i-1,]+rnorm(nsim)
ar1s = data.frame(ys)
ar1s$id = 1:TT
library(reshape2)
ar1s2=melt(ar1s, id.var="id")
p2 = ggplot(ar1s2, aes(x=id,y=value,group=variable)) +
geom_line() + xlab("") + ylab("value") +
ggtitle("Alternate Stationary", subtitle="AR1")
#####
ys = matrix(intercept,TT,nsim)
for(i in 2:TT) ys[i,]=intercept+ys[i-1,]+rnorm(nsim)
rws = data.frame(ys)
rws$id = 1:TT
library(reshape2)
rws2=melt(rws, id.var="id")
p3 = ggplot(rws2, aes(x=id,y=value,group=variable)) +
geom_line() + xlab("") + ylab("value") +
ggtitle("", subtitle="White noise + drift")
ys = matrix(intercept/(1-theta),TT,nsim)
for(i in 2:TT) ys[i,]=intercept+theta*ys[i-1,]+rnorm(nsim)
ar1s = data.frame(ys)
ar1s$id = 1:TT
library(reshape2)
ar1s2=melt(ar1s, id.var="id")
p4 = ggplot(ar1s2, aes(x=id,y=value,group=variable)) +
geom_line() + xlab("") + ylab("value") +
ggtitle("Alternate", subtitle="AR1 + non-zero level")
#####
ys = matrix(intercept+trend*1,TT,nsim)
for(i in 2:TT) ys[i,]=intercept+trend*i+ys[i-1,]+rnorm(nsim)
rws = data.frame(ys)
rws$id = 1:TT
library(reshape2)
rws2=melt(rws, id.var="id")
p5 = ggplot(rws2, aes(x=id,y=value,group=variable)) +
geom_line() + xlab("") + ylab("value") +
ggtitle("", subtitle="White noise + exponential drift")
ys = matrix((intercept+trend*1)/(1-theta),TT,nsim)
for(i in 2:TT) ys[i,]=intercept+trend*i+theta*ys[i-1,]+rnorm(nsim)
ar1s = data.frame(ys)
ar1s$id = 1:TT
library(reshape2)
ar1s2=melt(ar1s, id.var="id")
p6 = ggplot(ar1s2, aes(x=id,y=value,group=variable)) +
geom_line() + xlab("") + ylab("value") +
ggtitle("Alternate", subtitle="AR1 + linear trend")
#####
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2)
## ----dickey.fuller, message=FALSE, warning=FALSE-------------------------
require(urca)
test = ur.df(anchovy, type="none", lags=0)
test
## ----teststat------------------------------------------------------------
attr(test, "teststat")
## ----cval----------------------------------------------------------------
attr(test,"cval")[2]
## ----dickey.fuller2, message=FALSE, warning=FALSE------------------------
require(tseries)
adf.test(anchovy, alternative="stationary")
## ----dickey.fuller.ur.df, message=FALSE, warning=FALSE-------------------
require(urca)
k = trunc((length(anchovy)-1)^(1/3))
test = ur.df(anchovy, type="trend", lags=k)
test
## ----kpss.test, message=FALSE, warning=FALSE-----------------------------
require(tseries)
kpss.test(anchovy, null="Trend")
## ----diff1---------------------------------------------------------------
par(mfrow=c(1,2))
plot(anchovy, type="l")
plot(diff(anchovy), type="l")
## ----diff2---------------------------------------------------------------
diff.anchovy = diff(anchovy)
kpss.test(diff.anchovy)
## ----test.dickey.fuller.diff---------------------------------------------
test=ur.df(diff.anchovy, type="none", lags=2)
summary(test)
## Dickey-Fuller test
The null hypothesis for this test is that the data are a random walk (non-stationary). You want the null hypothesis to be rejected.
### Dickey-Fuller test on white noise
Let's first do the test on data we know is stationary, white noise. We have to make choose the `type` and `lags`. If you have no particular reason to not include an intercept and trend, then use `type="trend"`. This allows both intercept and trend. Next you need to chose the `lags`. The default of `lags=1` is generally ok but we will use `lags=0` to fit a simpler model given that we don't have many years of data.
It is fitting this model to the data and you are testing if `z.lag.1` is 0.
`x(t) - x(t-1) = z.lag.1 * x(t-1) + intercept + tt * t`
If you see `***` or `**` on the coefficients list for `z.lag.1`, it indicates that `z.lag.1` is significantly different than 0 and this supports the assumption of stationarity.
The `intercept` and `tt` estimates indicate where there is a non-zero level (intercept) or linear trend (tt).
```{r df.wn}
require(urca)
wn <- rnorm(TT)
test <- ur.df(wn, type="trend", lags=0)
summary(test)
```
The coefficient part of the summary indicates that `z.lag.1` is different than 0 (so stationary) an no support for intercept or trend.
Scroll down and notice that the test statistic is LESS than the critical value for `tau3` at 5 percent. This means the null hypothesis is rejected at $\alpha=0.05$, a standard level for significance testing.
### Dickey-Fuller on the anchovy data
```{r df.anchovy}
test = ur.df(dat, type="trend", lags=0)
summary(test)
```
Scroll down to the critical values. The test-statistic is greater than the critical values. This means that we cannot reject the null hypothesis that the data contain a random walk.
## ----dickey.fuller2, message=FALSE, warning=FALSE------------------------
## ----dickey.fuller.ur.df, message=FALSE, warning=FALSE-------------------
require(urca)
k = trunc((length(anchovy)-1)^(1/3))
test = ur.df(anchovy, type="trend", lags=k)
test