-
Notifications
You must be signed in to change notification settings - Fork 1
/
compute_sp500_returns.R
137 lines (120 loc) · 4.45 KB
/
compute_sp500_returns.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# install.packages("tidyverse")
# install.packages("tidyquant")
# install.packages("timetk")
# install.packages("lubridate")
# install.packages("vtable")
library(tidyverse)
library(tidyquant)
library(timetk)
library(lubridate)
library(vtable)
# Helper functions --------------------------------------------------------
# Functions to collapse from daily to quarterly
aggregation_funs <- function(x) {
tibble(
avg = na_if(mean(x, na.rm=TRUE),NaN),
end_of_period = last(x, na_rm = TRUE),
sd = sd(x, na.rm=TRUE)
)
}
# Functions to go from quarterly to annual
mean_4q <- slidify(mean, .period = 4, .align = "right")
cumret_4q <- slidify(\(x) reduce(x, \(cumret,ret) cumret*(1+ret/4), .init=1), .period = 4, .align = "right")
cumsum_4q <- slidify(\(x) reduce(x, \(cumsum,ret) cumsum+ret/4), .period = 4, .align = "right")
# Download data -----------------------------------------------------------
yahoo_raw <- tq_get(
"^GSPC",
get = "stock.prices",
from = "1950-01-01",
to = "2023-01-01"
)
# Compute S&P 500 returns ------------------------------------------------------
# keep only adjusted end-of-day price
price <- yahoo_raw %>%
select(symbol,date,adjusted)
returns <-
map(
list("arithmetic", "log"),
\(type)
price %>%
tq_transmute(
select = adjusted,
mutate_fun = allReturns,
type = type
) %>%
rename_with(\(name) if (type == "log") paste0(name, "_log") else name)
) %>%
bind_cols() %>%
# remove weekly returns
select(!c(date_log) & !contains("weekly")) %>%
# rename variables
rename_if(is.numeric,\(x) paste0(x,"_ret")) %>%
# annualize
mutate(
across(contains("daily"),\(x) 252*x),
across(contains("monthly"),\(x) 12*x),
across(contains("quarterly"),\(x) 4*x)
)
# Aggregate to quarterly frequency ----------------------------------------
## Returns ----------------------------------------
returns_aggregated_quarterly <- summarize_by_time(
returns,
.date_var = date,
.by = "quarter",
across(where(is.numeric),
\(x) aggregation_funs(x),
.unpack = TRUE
),
.type = "ceiling"
) %>%
# Shift to the last day of the period
mutate(date = subtract_time(date, "1 day")) %>%
# Remove column if all NA
select_if(~!all(is.na(.)))
# Rename identical columns with the same name
duplicated_cols <- duplicated(as.matrix(returns_aggregated_quarterly),MARGIN=2)
names(returns_aggregated_quarterly)[duplicated_cols]<-str_remove_all(names(returns_aggregated_quarterly)[duplicated_cols],"_end_of_period|_avg")
# Remove identical columns
returns_aggregated_quarterly <- as_tibble(unique(as.matrix(returns_aggregated_quarterly),MARGIN=2, fromLast = TRUE )) %>%
mutate(across(!c("date"),as.numeric)) %>%
mutate(across(any_of("date"),as_date))
# Order columns
returns_aggregated_quarterly <- returns_aggregated_quarterly %>% relocate(sort(names(.))) %>% relocate(any_of(c("date")))
## Prices ----------------------------------------
price_aggregated_quarterly <- summarize_by_time(
price,
.date_var = date,
.by = "quarter",
across(adjusted,last,.unpack = TRUE),
.type = "ceiling"
) %>%
# Shift to the last day of the period
mutate(date = subtract_time(date, "1 day")) %>%
# Add lags
tk_augment_lags(adjusted, .lags=c(1,4))
# merge returns and price
ts_q <- full_join(returns_aggregated_quarterly,price_aggregated_quarterly, by=c("date"))
# Compute annual returns at quarterly frequency in four different ways --------------------------------------------------------------------
annual_returns <- ts_q %>%
mutate(
# 1. Average daily returns over a year,
annual_ret_from_daily_avg = mean_4q(daily_ret_avg),
# 2. Quarterly returns computed from average daily returns, cumulated over a year
annual_cumret_from_quart_daily_avg = cumret_4q(daily_ret_avg)-1,
# 3. Quarterly returns averaged over a year
annual_avgret_from_quart = mean_4q(quarterly_ret),
# 4. Quarterly returns computed from average daily returns, cumulated over a year
annual_ret = adjusted/adjusted_lag4-1
) %>%
select(
date,
annual_ret_from_daily_avg, # 1
annual_cumret_from_quart_daily_avg, # 2
annual_avgret_from_quart, # 3
annual_ret # 4
)
# Show summary stats in viewer ------------------------------------------------------
annual_returns %>% sumtable
print(cor(data.matrix( annual_returns %>% select(-date) ), use="pairwise.complete.obs"),digits=3)
# Nice manners: keep only variable with final results
rm(list=setdiff(ls(), "annual_returns"))