Skip to content

Commit e821ff2

Browse files
committed
improve DB input
1 parent 17fbb56 commit e821ff2

File tree

3 files changed

+36
-20
lines changed

3 files changed

+36
-20
lines changed

src/000_run_pipeline.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ source("src/00_install_R_packages.R", echo = TRUE)
1313
source("src/01_download_data.R", echo = TRUE)
1414
source("src/02_cars_to_db.R", echo = TRUE)
1515
source("src/02_bikes_to_db.R", echo = TRUE)
16-
source("src/03_temporal_features.R", echo = TRUE)
16+
# TODO do we need this table / probably convert to weather table?
17+
# source("src/03_temporal_features.R", echo = TRUE)

src/02_bikes_to_db.R

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
# load libraries ####
88
# use 00_install_R_packages.R for installing missing packages
9-
sapply(c("dplyr", "DBI", "RSQLite", "tidyr", "lubridate"),
9+
sapply(c("dplyr", "DBI", "RSQLite", "tidyr", "chron", "lubridate"),
1010
require, character.only = TRUE)
1111

1212
file <- "data/raw/Fahrradzaehlstellen-Stundenwerte.csv"
@@ -22,28 +22,38 @@ bikes <-
2222
# wide to long format
2323
gather(location, count, -date, -hour, -weather, -temperature, -windspeed) %>%
2424
mutate(date = as.character(dmy(date))) %>%
25-
mutate(hour = as.integer(substring(hour, 1, 2))) %>%
26-
mutate(vehicle = "bike") #%>%
25+
mutate(year = as.integer(year(date))) %>%
26+
mutate(month = as.integer(month(date))) %>%
27+
mutate(day = as.integer(day(date))) %>%
28+
mutate(weekday = wday(date, label = T, abbr = T)) %>%
29+
mutate(weekend = is.weekend(date)) %>%
30+
mutate(hour = as.integer(substring(hour, 1, 2))) %>%
31+
mutate(vehicle = "bike")
2732

2833
# write 'bikes' to SQLite database
2934
dir.create("data/database", showWarnings = F)
3035
con <- dbConnect(SQLite(), dbname = "data/database/traffic_data.sqlite")
3136
dbWriteTable(con, "bikes", bikes, row.names = F, overwrite = T)
3237

3338
dbExecute(con, "CREATE INDEX timestamp_bikes on bikes (date, hour)")
39+
dbExecute(con, "CREATE INDEX year_month_day_bikes on bikes (year, month, day, hour)")
3440

41+
# TODO: make the weather data an own table
3542
# add the same weather to cars table
36-
cars <- dbGetQuery(conn = con, "SELECT location, count, date, hour, vehicle FROM cars")
37-
38-
weather_from_bikes <-
39-
bikes %>%
40-
select(date, hour, weather, windspeed, temperature) %>%
41-
filter(weather != "")
42-
43-
cars <-
44-
cars %>%
45-
inner_join(., weather_from_bikes, by = c("date", "hour"))
46-
47-
dbWriteTable(con, "cars", cars, row.names = F, overwrite = T)
43+
# cars <- dbGetQuery(conn = con, "SELECT location, count, date, hour, vehicle FROM cars")
44+
#
45+
# weather_from_bikes <-
46+
# bikes %>%
47+
# select(date, hour, weather, windspeed, temperature) %>%
48+
# filter(weather != "")
49+
#
50+
# cars <-
51+
# cars %>%
52+
# inner_join(., weather_from_bikes, by = c("date", "hour"))
53+
#
54+
# dbWriteTable(con, "cars", cars, row.names = F, overwrite = T)
55+
56+
# for better performance, DB is read-only in shiny-app
57+
dbExecute(con, "PRAGMA synchronous=OFF; PRAGMA journal_mode=OFF;")
4858

4959
dbDisconnect(con)

src/02_cars_to_db.R

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
# load libraries ####
88
# use 00_install_R_packages.R for installing missing packages
9-
sapply(c("dplyr", "assertthat", "lubridate", "tidyr", "DBI", "RSQLite"),
9+
sapply(c("dplyr", "assertthat", "lubridate", "chron", "tidyr", "DBI", "RSQLite"),
1010
require, character.only = TRUE)
1111

1212
process_df <- function(df) {
@@ -38,9 +38,8 @@ process_df <- function(df) {
3838
# filter to only add relevant location to the database
3939
# as of now: Roxel and all locations where also bicycles are counted
4040
relevant_locations <-
41-
c("24020", "24100", "24140", "24010", "24120", "24130", "24030", # Roxel
42-
# locations where (closeby) also bicycles are counted, in the same order as http://www.stadt-muenster.de/verkehrsplanung/verkehr-in-zahlen/radverkehrszaehlungen.html
43-
"01080", # Neutor
41+
c(# locations where (closeby) also bicycles are counted, in the same order as http://www.stadt-muenster.de/verkehrsplanung/verkehr-in-zahlen/radverkehrszaehlungen.html
42+
"01080", # Neutor
4443
"04050", # Wolbecker Straße / Servatiiplatz
4544
"03052", # Hüfferstraße
4645
"07030", # Hammer Straße
@@ -61,6 +60,11 @@ process_df <- function(df) {
6160
df <-
6261
df %>%
6362
gather(hour, count, -location, -date) %>%
63+
mutate(year = as.integer(year(date))) %>%
64+
mutate(month = as.integer(month(date))) %>%
65+
mutate(day = as.integer(day(date))) %>%
66+
mutate(weekday = wday(date, label = T, abbr = T)) %>%
67+
mutate(weekend = is.weekend(date)) %>%
6468
# 'hour' to integer format
6569
mutate(hour = substring(hour, 2)) %>%
6670
mutate(hour = as.integer(hour)) %>%
@@ -92,5 +96,6 @@ for (raw_file in raw_files) {
9296
}
9397

9498
dbExecute(con, "CREATE INDEX timestamp_cars on cars (date, hour)")
99+
dbExecute(con, "CREATE INDEX year_month_day_cars on cars (year, month, day, hour)")
95100
dbExecute(con, "CREATE INDEX location_cars on cars (location)")
96101
dbDisconnect(con)

0 commit comments

Comments
 (0)