-
Notifications
You must be signed in to change notification settings - Fork 0
/
deflection_of_veel_analysis.R
67 lines (55 loc) · 2.28 KB
/
deflection_of_veel_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
library(dplyr)
library(readxl)
library(tidyverse)
library(effects)
library(ggplot2)
library(tools)
library(broom)
library(car)
library(stylo)
#### read the data ####
# read the dataset with the population sizes:
Datasteden <- read.csv(file = "StedenBelgiëNederland19eEn20eEeuw.csv",
header = TRUE)
# create histogram of the amount of cities per population size in Datasteden:
hist(Datasteden$BAIy1850,
xlab='Populatiegrootte',
ylab='Aantal steden',
main='Histogram van de steden per populatiegrootte')
# same frequencies but log-transformed:
hist(log(Datasteden$BAIy1850),
xlab='Populatiegrootte op de logaritmische schaal',
ylab='Aantal steden',
main='Histogram van de steden per populatiegrootte (loggetransformeerd)')
# read the data of the deflection of veel:
Veel <- read_xlsx("The deflection of veel.xlsx",
col_names = TRUE)
# add population counts:
Veel <- left_join(Veel, Datasteden, by = c("BIRTHPLACE" = "CITY"))
# throw away rows without birthplace:
Veel <- Veel %>% drop_na(BAIy1850)
Veel$Change <- as.factor(Veel$Change)
Veel <- droplevels(Veel[!Veel$Change=="NA",])
# add log transformed population counts to the data set:
symbox(Veel$BAIy1850)
Veel$logBAIy1850=log(Veel$BAIy1850)
#### Analysis per bin of cities: 3 bins ####
Veel$Change2 <- as.numeric(Veel$Change)-1 # rescale to numeric for ggplot
# create bins:
Veel <- Veel %>% mutate(logBAIy1850_bin_3 = cut_interval(Veel$logBAIy1850,
n = 3))
Model_bins <- glm(Change ~ year*logBAIy1850_bin_3,
data=Veel, family = binomial(link="logit"))
print(summary(Model_bins))
plot(allEffects(Model_bins))
print(ggplot(data = Veel, aes(x = year, y = Change2, color=logBAIy1850_bin_3)) +
geom_smooth(method = "glm",
method.args = list(family = binomial))+
labs(x="Jaar", y="vele vs veel") +
guides(col=guide_legend(title="Populatiegrootte")) +
scale_color_discrete(labels = c("Klein", "Middelgroot", "Groot")) +
scale_x_continuous(limits = c(1850, 1990),
breaks = seq(1850, 1990, by = 10)) +
scale_y_continuous(limits = c(0,1),
n.breaks = 10)+
theme_bw())