Merge pull request #40 from rOpenGov/24-FV22TOTA

Fix #24
rOpenGov · Nov 28, 2024 · 7570d7d · 7570d7d
2 parents c52b30f + f52d86f
commit 7570d7d
Show file tree

Hide file tree

Showing 13 changed files with 97 additions and 49 deletions.
diff --git a/.lintr b/.lintr
@@ -1,4 +1,5 @@
-linters: linters_with_defaults() # see vignette("lintr")
+linters: linters_with_defaults(
+  cyclocomp_linter(complexity_limit = 17L))
 encoding: "UTF-8"
 exclusions: list(
   "tests")
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -25,6 +25,8 @@ Suggests:
     testthat
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
+Config/testthat/edition: 3
+Config/testthat/parallel: true
 RoxygenNote: 7.3.2
 X-schema.org-isPartOf: http://ropengov.org/
 X-schema.org-keywords: ropengov

diff --git a/R/dst_determine_overlaps.R b/R/dst_determine_overlaps.R
@@ -0,0 +1,26 @@
+#' Helper function to determine wether or not to include the id in a variable
+#' option
+#'
+#' @param meta_data Meta data object for the table of inquiry
+#' @noRd
+dst_determine_overlaps <- function(meta_data) {
+  # Get variable names
+  var_names <- get_vars(meta_data)
+
+  # Get options for all variable names
+  options <- get_var_options(meta_data, var_names)
+
+  # Index over all vars to determine if there is duplicates
+  dup <- list()
+
+  for (i in seq_along(var_names)) {
+    dup[i] <- length(
+      options[[var_names[i]]]
+    ) == length(
+      unique(options[[var_names[i]]])
+    )
+  }
+
+  # If any of the option/vars include duplicates, we should include the id
+  return(any(unlist(dup)))
+}
diff --git a/R/dst_get_data.R b/R/dst_get_data.R
@@ -13,7 +13,9 @@
 #'   requested.
 #' @param format character value. "CSV" or "BULK". If you choose BULK then you
 #'   need to select a value for each of the parameters.
-#' @param value_presentation For now, "value" or "default"
+#' @param value_presentation For now, "value" or "default". When a table with
+#'   observations that have the same name, this is automatically changed to
+#'   CodeAndValue.
 #' @export
 #' @family Data retrival functions
 #' @importFrom utils read.csv read.csv2
@@ -47,6 +49,11 @@ dst_get_data <- function(table,
     }
   }
 
+  # If meta_data is NULL then get it automatically
+  if (is.null(meta_data)) {
+    meta_data <- dst_meta(table, lang = lang)
+  }
+
   # Force the names to be uppercase to match requirements from API
   names(query) <- toupper(names(query))
   dst_names <- names(query)
@@ -60,6 +67,11 @@ dst_get_data <- function(table,
     format = format
   )
 
+  # If overlaps in values are detected use CodeAndValue as presentation
+  if (dst_determine_overlaps(meta_data)) {
+    value_presentation <- "CodeAndValue"
+  }
+
   query$valuePresentation <- value_presentation
   query$lang <- lang
 
@@ -108,8 +120,14 @@ dst_get_data <- function(table,
   } else {
     stop("You haven't selected an appropiate language ('da' or 'en'")
   }
+
   names(dst_data) <- c(dst_names, "value")
 
+  # Remove the code
+  if (dst_determine_overlaps(meta_data)) {
+    dst_data$TID <- sapply(stringr::str_split(dst_data$TID, "\\s+"), `[`, 2)
+  }
+
   # Parse the dates if param is TRUE
   if (parse_dst_tid) {
     dst_data$TID <- dst_date_parse(dst_date = dst_data$TID)

diff --git a/data-raw/dst_tables.R b/data-raw/dst_tables.R
@@ -1,7 +1,3 @@
 tables <- dst_get_tables(lang = "en")
 
 usethis::use_data(tables, overwrite = TRUE)
-
-# tables_da <- dst_get_tables(lang = "da")
-#
-# usethis::use_data(tables_da, overwrite = TRUE)
diff --git a/man/dst_get_data.Rd b/man/dst_get_data.Rd
diff --git a/tests/testthat/test-dst_date_parse.R b/tests/testthat/test-dst_date_parse.R
@@ -1,51 +1,49 @@
-context("dst_date_parse")
-
 test_that("dst_date_parse gives the correct class.", {
-  
+
   exp_dates <- c("POSIXct", "POSIXt")
-  
+
   # Daily
   expect_equal(class(dst_date_parse(dst_date = "2000M01D01")), exp_dates)
   expect_equal(class(dst_date_parse(dst_date = c("2000M01D20", "2000M02D21", "2000M03D23", "2000M04D24"))), exp_dates)
-  
+
   # Monthly
   expect_equal(class(dst_date_parse(dst_date = "2000M01")), exp_dates)
   expect_equal(class(dst_date_parse(dst_date = c("2000M01", "2000M02", "2000M03", "2000M04", "2000M10", "2000M11"))), exp_dates)
-  
+
   # Quarterly
   expect_equal(class(dst_date_parse(dst_date = "2000Q1")), exp_dates)
   expect_equal(class(dst_date_parse(dst_date = "2000Q2")), exp_dates)
   expect_equal(class(dst_date_parse(dst_date = "2000Q3")), exp_dates)
   expect_equal(class(dst_date_parse(dst_date = "2000Q4")), exp_dates)
   expect_equal(class(dst_date_parse(dst_date = c("2000Q1", "2000Q2", "2000Q3", "2000Q4"))), exp_dates)
-  
+
   # Yearly
   expect_equal(class(dst_date_parse(dst_date = "2000")), exp_dates)
   expect_equal(class(dst_date_parse(dst_date = c("2000", "2000", "2000", "2000"))), exp_dates)
-  
+
   expect_equal(class(dst_date_parse(dst_date = c("2015H1", "2015H2"))), exp_dates)
   expect_equal(as.character(dst_date_parse(dst_date = c("2015H1", "2015H2"))), c("2015-01-01", "2015-07-01"))
-  
+
 })
 
 
 test_that("Test that dst_date_parse stops when the input is bad.", {
-  
+
   # Daily
   expect_error(dst_date_parse(dst_date = "2000M01D35"))
   expect_error(dst_date_parse(dst_date = "2000M10D40"))
-  
+
   # Monthly
   expect_error(dst_date_parse(dst_date = "20000M01"))
   expect_error(dst_date_parse(dst_date = "2000M101"))
   expect_error(dst_date_parse(dst_date = "2000M13"))
-  
+
   # Quarterly
   expect_error(dst_date_parse(dst_date = "2000Q0"))
   expect_error(dst_date_parse(dst_date = "2000Q5"))
-  
+
   # Yearly
   expect_error(dst_date_parse(dst_date = "20000"))
   expect_error(dst_date_parse(dst_date = "200"))
-  
+
 })
diff --git a/tests/testthat/test-dst_determine_overlaps.R b/tests/testthat/test-dst_determine_overlaps.R
@@ -0,0 +1,3 @@
+test_that("Overlap detection works", {
+  expect_true(dst_determine_overlaps(dst_meta("FV22TOTA")))
+})
diff --git a/tests/testthat/test-dst_get_data.R b/tests/testthat/test-dst_get_data.R
@@ -1,28 +1,25 @@
-
-context("dst_get_data")
-
 test_that("dst_get_data returns an error when the limit is reached.", {
-  
+
   expect_error(dst_get_data(table = "folk1",
-                            query = list(CIVILSTAND = "*", 
-                                            STATSB = "*", 
+                            query = list(CIVILSTAND = "*",
+                                            STATSB = "*",
                                             HERKOMST = c("Personer med dansk oprindelse", "Efterkommere"),
                                             TID = "*",
                                             ALDER = "*"), lang = "da"))
-  
+
 })
 
 
 test_that("dst_get_data is parsing the data correctly when 'en' and 'da' are selected as language. The API returns decimal numbers with both , and .", {
-  
+
   expect_equal(class(dst_get_data("AUP01", ALDER = "*", TID = "*", lang = "da", format = "CSV")$value), "numeric")
-  
+
 })
 
 test_that("dst_get_data fails with the BULK format when not all parameters have values.", {
-  
+
   expect_error(dst_get_data("AUP01", ALDER = "*", TID = "*", lang = "da", format = "BULK"))
-  
+
 })
 
 
diff --git a/tests/testthat/test-dst_get_tables.R b/tests/testthat/test-dst_get_tables.R
@@ -1,15 +1,12 @@
-
-context("dst_get_tables")
-
 test_that("dst_get_tables return a data.frame",{
-  
+
   expect_equal(class(dst_get_tables(lang = "da")), "data.frame")
   expect_equal(class(dst_get_tables(lang = "en")), "data.frame")
-  
+
 })
 
 test_that("dst_get_tables failes with wrong language input",{
-  
+
   expect_error(dst_get_tables(lang = "uk"))
   expect_error(dst_get_tables(lang = "no"))
 })

diff --git a/tests/testthat/test-dst_query_match.R b/tests/testthat/test-dst_query_match.R
@@ -1,8 +1,4 @@
-
-
-context("dst_query_match")
-
 test_that("The function fails when mandatory values are not supplied.",{
-  
+
   expect_error(dst_query_match(table = "NRHP", meta_data = NULL, lang = "en", query = list(TRANSAKT = "P.1 Output", Tid = "1993")))
-})
+})
diff --git a/tests/testthat/test-dst_value_limit.R b/tests/testthat/test-dst_value_limit.R
@@ -1,8 +1,3 @@
-
-
-
-context("dst_value_limit")
-
 test_that("dst_value_limit returns the correct value.", {
 
   expect_equal(dst_value_limit(query = list(CIVILSTAND = "*",

diff --git a/tests/testthat/test-fv22tota.R b/tests/testthat/test-fv22tota.R
@@ -0,0 +1,17 @@
+test_that("Tables with identifyers with the same name but different ids are parsed properly", {
+  # This tests that the issue in https://github.com/rOpenGov/dkstat/issues/24
+  # does not occur
+
+  table <- dst_get_data(
+    table = "FV22TOTA",
+    VALRES = "*",
+    OMRÅDE = "*",
+    Tid = "*",
+    lang = "da",
+    format = "BULK"
+  )
+
+  expect_equal(nrow(unique(table)), nrow(table))
+  expect_equal(nrow(table) - nrow(unique(table)), 0)
+
+})