-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy path01a_stocks_clean_crsp.Rmd
92 lines (78 loc) · 3.98 KB
/
01a_stocks_clean_crsp.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
---
title: "Identifying Price Informativeness"
author: "Eduardo Davila^[Yale] & Cecilia Parlatore^[NYU Stern]"
date: "`r Sys.Date()`"
output:
html_document: default
pdf_document: default
---
```{r}
library(here); library(tidyverse); library(lubridate); library(data.table)
path <- here::here(); print(path); setwd(path); rm(path)
source("functions/fn_sp500_dummy.R")
load("input/data_raw/raw_sp500.RData")
load("input/data_raw/raw_crsp.RData")
```
# Building CRSP
- Note that the CRSP data is monthly, while the compustat data is quarterly/annual. Variables are in millions of dollars.
- shrout is in thousands of shares.
- hexcd doesn't change (header exchange code): Exchange Code - Header displays the Exchange Code on which a security was last listed.
- Combines permcos with multiple permnos
- shrout: measured in thousands of shares
- vol: sum of the trading volumes during the month, reported in units of 100 (not adjusted for splits, etc)
<!-- Example: Berkshire Hathaway, permnos 17778 and 83443, permnco 540 -->
```{r}
fn_clean_crsp <- function(crsp){
output <- crsp %>%
# filter(shrcd == 10 | shrcd == 11) %>% # needed when using csv data
filter(hexcd %in% c(1, 2, 3)) %>%
filter(exchcd %in% c(1, 2, 3)) %>%
mutate(date = ymd(date),
year = year(date),
month = month(date)) %>%
mutate(prc = ifelse(is.na(prc), dlprc, prc),
ret = ifelse(is.na(ret), dlret, ret)) %>%
mutate(mcap_permno = abs(prc)*shrout/1000) %>% # mcap for each permno in millions
group_by(year, month, permco) %>%
mutate(mcap = sum(mcap_permno)) %>% # to find market cap, merge permnos with same permnco
arrange(year, month, permco, desc(mcap_permno)) %>% # sort from largest to smallest permno
group_by(year, month, permco) %>%
slice(1) %>% # keep only permno with largest marketcap
ungroup() %>% # distinct(year, date, permno) %>%
mutate(prc = abs(prc), # negative prices means security wasn't traded
ashrout = shrout*cfacshr,
adjprc = prc/cfacpr,
turnover = vol/shrout*10, # volume is on hundred of shares/shrout is thousands, the *10 makes it thousands
sic1 = siccd %/% 1000,
sic2 = siccd %/% 100,
sic1 = factor(sic1),
hexcd = factor(hexcd),
exchcd = factor(exchcd)) %>%
mutate(sic1 = fct_recode(sic1,
"Agriculture/Mining/Construction" = "0",
"Agriculture/Mining/Construction" = "1",
"Manufacturing" = "2",
"Manufacturing" = "3",
"Transportation/Communications" = "4",
"Wholesale/Retail trade" = "5",
"Finance/Insurance" = "6",
"Services" = "7",
"Services" = "8",
"Public administration" = "9")) %>%
mutate(hexcd = fct_recode(hexcd,
"NYSE" = "1",
"AMEX" = "2",
"NASDAQ" = "3")) %>%
mutate(exchcd = fct_recode(exchcd,
"NYSE" = "1",
"AMEX" = "2",
"NASDAQ" = "3")) %>%
select(permno, date, year, month, mcap, adjprc, turnover, hexcd, exchcd,
comnam, siccd, sic1, sic2, shrout, ashrout, prc, ret, retx, vwretd) %>% #vwretd, vwretx, ewretx, ewretd, sprtrn
arrange(permno, year, month) %>%
fn_sp500_dummy(sp500)
return(output)
}
crsp <- fn_clean_crsp(crsp_m)
save(crsp, file = "intermediate/clean_crsp.RData")
```