diff --git a/R/dst_correct_url.R b/R/dst_correct_url.R index 2461fa0..60d4cf1 100644 --- a/R/dst_correct_url.R +++ b/R/dst_correct_url.R @@ -2,24 +2,36 @@ #' #' @param url A build url. dst_correct_url <- function(url) { - url <- stringr::str_replace_all(string = url, - pattern = "%C6", - replacement = "%C3%86") # ? - url <- stringr::str_replace_all(string = url, - pattern = "%D8", - replacement = "%C3%98") # ? - url <- stringr::str_replace_all(string = url, - pattern = "%C5", - replacement = "%C3%85") # ? - url <- stringr::str_replace_all(string = url, - pattern = "%E6", - replacement = "%C3%A6") # ? - url <- stringr::str_replace_all(string = url, - pattern = "%F8", - replacement = "%C3%B8") # ? - url <- stringr::str_replace_all(string = url, - pattern = "%E5", - replacement = "%C3%A5") # ? + url <- stringr::str_replace_all( + string = url, + pattern = "%C6", + replacement = "%C3%86" + ) # ? + url <- stringr::str_replace_all( + string = url, + pattern = "%D8", + replacement = "%C3%98" + ) # ? + url <- stringr::str_replace_all( + string = url, + pattern = "%C5", + replacement = "%C3%85" + ) # ? + url <- stringr::str_replace_all( + string = url, + pattern = "%E6", + replacement = "%C3%A6" + ) # ? + url <- stringr::str_replace_all( + string = url, + pattern = "%F8", + replacement = "%C3%B8" + ) # ? + url <- stringr::str_replace_all( + string = url, + pattern = "%E5", + replacement = "%C3%A5" + ) # ? return(url) } diff --git a/R/dst_date_parse.R b/R/dst_date_parse.R index 0f3fab9..93ec47a 100644 --- a/R/dst_date_parse.R +++ b/R/dst_date_parse.R @@ -1,6 +1,3 @@ - - - #' Helper function to parse the dates from the statbank. #' #' @param dst_date A vector of length one or more with date formats like @@ -9,63 +6,72 @@ dst_date_parse <- function(dst_date) { tz <- "Europe/Copenhagen" - if (all(stringr::str_detect(dst_date, - "[0-9]{4}+[M]{1}+[0-1]{1}+[0-9]{1}+[D]{1}+(([0-2]{1}+[0-9]{1})|([3]{1}+[0-1]{1}))" # nolint - )) && all(stringr::str_length(string = dst_date) == 10)) { + if (all(stringr::str_detect( + dst_date, + "[0-9]{4}+[M]{1}+[0-1]{1}+[0-9]{1}+[D]{1}+(([0-2]{1}+[0-9]{1})|([3]{1}+[0-1]{1}))" # nolint + )) && all(stringr::str_length(string = dst_date) == 10)) { # Daily - dst_date <- lubridate::ymd(paste0( - stringr::str_sub(dst_date, start = 1L, end = 4L), - "-", - stringr::str_sub(dst_date, start = -5L, end = -4L), - "-", - stringr::str_sub(dst_date, start = -2L) - ), - tz = tz) + dst_date <- lubridate::ymd( + paste0( + stringr::str_sub(dst_date, start = 1L, end = 4L), + "-", + stringr::str_sub(dst_date, start = -5L, end = -4L), + "-", + stringr::str_sub(dst_date, start = -2L) + ), + tz = tz + ) } else if ( # nolint start - all(stringr::str_detect(dst_date, - "^[0-9]{4}+[M]{1}+(([0]{1}+[0-9]{1})|([1]{1}+[0-2]{1}))")) && - all(stringr::str_length(string = dst_date) == 7)) { + all(stringr::str_detect( + dst_date, + "^[0-9]{4}+[M]{1}+(([0]{1}+[0-9]{1})|([1]{1}+[0-2]{1}))" + )) && + all(stringr::str_length(string = dst_date) == 7)) { # nolint end # Monthly - dst_date <- lubridate::ymd(paste0( - stringr::str_sub(dst_date, start = 1L, end = 4L), - "-", - stringr::str_sub(dst_date, start = -2L), - "-", - "-01" - ), - tz = tz) + dst_date <- lubridate::ymd( + paste0( + stringr::str_sub(dst_date, start = 1L, end = 4L), + "-", + stringr::str_sub(dst_date, start = -2L), + "-", + "-01" + ), + tz = tz + ) } else if ( # nolint start all(stringr::str_detect(dst_date, "^[0-9]{4}+([Q]{1}|[K]{1})+[1-4]{1}")) && - all(stringr::str_length(string = dst_date) == 6)) { + all(stringr::str_length(string = dst_date) == 6)) { # nolint end # Quarterly - dst_date <- lubridate::ymd(paste0( - stringr::str_sub(dst_date, start = 1L, end = 4L), - "-", - as.numeric(stringr::str_sub(dst_date, start = -1L)) * 3 - 2, - "-", - "-01" - ), - tz = tz) + dst_date <- lubridate::ymd( + paste0( + stringr::str_sub(dst_date, start = 1L, end = 4L), + "-", + as.numeric(stringr::str_sub(dst_date, start = -1L)) * 3 - 2, + "-", + "-01" + ), + tz = tz + ) } else if (all(stringr::str_detect(dst_date, "^[0-9]{4}")) && - all(stringr::str_length(dst_date) == 4)) { + all(stringr::str_length(dst_date) == 4)) { # nolint # Yearly dst_date <- lubridate::ymd(paste0(stringr::str_sub( - dst_date, start = 1L, end = 4L + dst_date, + start = 1L, end = 4L ), "-01-01"), tz = tz) } else if ( # nolint start all(stringr::str_detect(dst_date, "^[0-9]{4}+[H]{1}+[1-2]{1}")) & - all(stringr::str_length(dst_date) == 6)) { + all(stringr::str_length(dst_date) == 6)) { # nolint end # Half yearly dst_date[stringr::str_sub(dst_date, start = -1L) == 1] <- paste0(stringr::str_sub(dst_date[stringr::str_sub(dst_date, start = -1L) == 1], start = 1L, end = 4L), "-01-01") # nolint dst_date[stringr::str_sub(dst_date, start = -1L) == 2] <- paste0(stringr::str_sub(dst_date[stringr::str_sub(dst_date, start = -1L) == 2], start = 1L, end = 4L), "-07-01") # nolint dst_date <- lubridate::ymd(dst_date, tz = tz) - } else { stop("None of the regular expressions were matched. Please inspect the dates.") # nolint } diff --git a/R/dst_find_val_id.R b/R/dst_find_val_id.R index 09021ae..ce7b79d 100644 --- a/R/dst_find_val_id.R +++ b/R/dst_find_val_id.R @@ -9,7 +9,7 @@ dst_find_val_id <- function(meta_data, variable, values_text = NULL) { names(meta_data$values) <- toupper(names(meta_data$values)) if (!is.null(values_text)) { - ids <- list(meta_data$values[[variable]]$id[meta_data$values[[variable]]$text %in% values_text]) #nolint + ids <- list(meta_data$values[[variable]]$id[meta_data$values[[variable]]$text %in% values_text]) # nolint names(ids) <- variable # Test that all the values_text can be matched. If not, stop. @@ -26,7 +26,6 @@ dst_find_val_id <- function(meta_data, variable, values_text = NULL) { ) } # nolint end - } else { ids <- list("*") names(ids) <- variable @@ -34,7 +33,7 @@ dst_find_val_id <- function(meta_data, variable, values_text = NULL) { if (length(ids) > 1) { warning( - "Results couldn't be transformed to a vector as results are in a list bigger than length 1." #nolint + "Results couldn't be transformed to a vector as results are in a list bigger than length 1." # nolint ) } else { ids <- ids[[variable]] diff --git a/R/dst_get_all_data.R b/R/dst_get_all_data.R index 7fc197a..9db3892 100644 --- a/R/dst_get_all_data.R +++ b/R/dst_get_all_data.R @@ -13,7 +13,6 @@ #' @family Data retrival functions #' @author Aleksander Bang-Larsen dst_get_all_data <- function(table, lang = "da", parse_dst_tid = TRUE) { - # Get metadata for table metadata <- dst_meta(table) diff --git a/R/dst_get_data.R b/R/dst_get_data.R index 5b3a2ce..9681877 100644 --- a/R/dst_get_data.R +++ b/R/dst_get_data.R @@ -42,8 +42,9 @@ dst_get_data <- function(table, ## If query is NULL, then use ... as query if (is.null(query)) { query <- list(...) - if (length(query) == 0) + if (length(query) == 0) { stop("You need to build a query in ... or supply one to 'query'") + } } # Force the names to be uppercase to match requirements from API @@ -65,9 +66,11 @@ dst_get_data <- function(table, ## Insert request into url dst_url$query <- query - dst_url$query <- lapply(X = dst_url$query, - FUN = paste, - collapse = ",") + dst_url$query <- lapply( + X = dst_url$query, + FUN = paste, + collapse = "," + ) dst_url <- httr::build_url(dst_url) dst_url <- dst_correct_url(dst_url) diff --git a/R/dst_get_tables.R b/R/dst_get_tables.R index 9e51b85..8cb406a 100644 --- a/R/dst_get_tables.R +++ b/R/dst_get_tables.R @@ -9,9 +9,11 @@ dst_get_tables <- function(lang = "da") { } # prepare the url - dst_url <- paste0("http://api.statbank.dk/v1/tables?lang=", - lang, - "&format=JSON") + dst_url <- paste0( + "http://api.statbank.dk/v1/tables?lang=", + lang, + "&format=JSON" + ) # get data dst_tables <- httr::GET(url = dst_url) diff --git a/R/dst_meta.R b/R/dst_meta.R index d3b7061..6a8a192 100644 --- a/R/dst_meta.R +++ b/R/dst_meta.R @@ -21,8 +21,10 @@ dst_meta <- function(table, ..., lang = "da") { meta <- httr::GET(url = dkstat_url) ## Parse from JSON - meta <- jsonlite::fromJSON(txt = httr::content(meta, as = "text"), - simplifyDataFrame = TRUE) + meta <- jsonlite::fromJSON( + txt = httr::content(meta, as = "text"), + simplifyDataFrame = TRUE + ) ## Structure results meta <- dst_meta_parse(meta, lang) diff --git a/R/dst_meta_parse.R b/R/dst_meta_parse.R index e41201f..58e7c5c 100644 --- a/R/dst_meta_parse.R +++ b/R/dst_meta_parse.R @@ -31,9 +31,11 @@ dst_meta_parse <- function(meta, lang) { if (lang == "en") { test <- grepl(pattern = "Tid", names(values)) if (sum(test) > 0) { - values$Tid$id <- sub(pattern = "Q", - replacement = "K", - x = values$Tid$id) + values$Tid$id <- sub( + pattern = "Q", + replacement = "K", + x = values$Tid$id + ) } } diff --git a/R/dst_query_match.R b/R/dst_query_match.R index 7428631..0268f94 100644 --- a/R/dst_query_match.R +++ b/R/dst_query_match.R @@ -22,7 +22,7 @@ dst_query_match <- function(table, lang, meta_data, query, format) { # names in the meta data if (!(all(names(query) %in% toupper(names(meta_data$values))))) { stop(paste0( - "All the variable names in your query couldn't be matched to the meta data.\n" #nolint + "All the variable names in your query couldn't be matched to the meta data.\n" # nolint )) } diff --git a/README.Rmd b/README.Rmd index 0a2cd3d..f89bd24 100644 --- a/README.Rmd +++ b/README.Rmd @@ -124,8 +124,10 @@ You need to build your query based on the text column that each variable contain ```{r} -aulaar <- dst_get_data(table = "AULAAR", KØN = "Total", PERPCT = "Per cent of the labour force", Tid = 2013, - lang = "en") +aulaar <- dst_get_data( + table = "AULAAR", KØN = "Total", PERPCT = "Per cent of the labour force", Tid = 2013, + lang = "en" +) str(aulaar) ``` @@ -138,16 +140,17 @@ request the meta data for the table and this will be very ineffecient. Let's query the statbank using more than one value for each variable. ```{r} - folk1a_meta <- dst_meta("folk1a", lang = "da") -str(dst_get_data(table = "folk1a", - Tid = "*", - CIVILSTAND = "*", - ALDER = "*", - OMRÅDE = c("Hele landet", "København", "Dragør", "Albertslund"), - lang = "da", - meta_data = folk1a_meta)) +str(dst_get_data( + table = "folk1a", + Tid = "*", + CIVILSTAND = "*", + ALDER = "*", + OMRÅDE = c("Hele landet", "København", "Dragør", "Albertslund"), + lang = "da", + meta_data = folk1a_meta +)) ``` @@ -158,16 +161,16 @@ You might have noticed that I use the * as a value in the TID variable. You can to writing all the text values for the variable. ```{r} - -my_query <- list(OMRÅDE = c("Hele landet", "København", "Frederiksberg", "Odense"), - CIVILSTAND = "Ugift", - TID = "*") +my_query <- list( + OMRÅDE = c("Hele landet", "København", "Frederiksberg", "Odense"), + CIVILSTAND = "Ugift", + TID = "*" +) str(dst_get_data(table = "folk1a", query = my_query, lang = "da")) str(dst_get_data(table = "AUP01", OMRÅDE = c("Hele landet"), TID = "*", lang = "da")) - ``` If you run into problems, then try to set the parse_dst_tid parameter to FALSE as there diff --git a/README.md b/README.md index 374b1f0..69d22a3 100644 --- a/README.md +++ b/README.md @@ -227,8 +227,10 @@ You need to build your query based on the text column that each variable contains in the meta_data\$values list. ``` r -aulaar <- dst_get_data(table = "AULAAR", KØN = "Total", PERPCT = "Per cent of the labour force", Tid = 2013, - lang = "en") +aulaar <- dst_get_data( + table = "AULAAR", KØN = "Total", PERPCT = "Per cent of the labour force", Tid = 2013, + lang = "en" +) str(aulaar) ``` @@ -250,13 +252,15 @@ Let’s query the statbank using more than one value for each variable. ``` r folk1a_meta <- dst_meta("folk1a", lang = "da") -str(dst_get_data(table = "folk1a", - Tid = "*", - CIVILSTAND = "*", - ALDER = "*", - OMRÅDE = c("Hele landet", "København", "Dragør", "Albertslund"), - lang = "da", - meta_data = folk1a_meta)) +str(dst_get_data( + table = "folk1a", + Tid = "*", + CIVILSTAND = "*", + ALDER = "*", + OMRÅDE = c("Hele landet", "København", "Dragør", "Albertslund"), + lang = "da", + meta_data = folk1a_meta +)) ``` ## 'data.frame': 172720 obs. of 5 variables: @@ -275,9 +279,11 @@ You can use the star as a alternative to writing all the text values for the variable. ``` r -my_query <- list(OMRÅDE = c("Hele landet", "København", "Frederiksberg", "Odense"), - CIVILSTAND = "Ugift", - TID = "*") +my_query <- list( + OMRÅDE = c("Hele landet", "København", "Frederiksberg", "Odense"), + CIVILSTAND = "Ugift", + TID = "*" +) str(dst_get_data(table = "folk1a", query = my_query, lang = "da")) ```