Skip to content

Commit

Permalink
Fix #154 (#183)
Browse files Browse the repository at this point in the history
* Fix #154

* Check params

* Add test for check

* Add tests

* Update NEWS [no ci]
  • Loading branch information
chainsawriot authored Dec 31, 2023
1 parent 2a703c9 commit 43b1821
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 48 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# readODS 2.2.0

* Fix #151 - Now `read_ods()` and `list_ods_sheets()` can also be used to process flat ods files. `read_fods()` and `list_fods_sheets()` are still available, but not as the so-called "common interface."
* Fix #154 - `read_ods()` and `read_fods()` have two arguments - `trim_ws` and `n_max` which are the same as the arguments of `readxl::read_excel()`.

# readODS 2.1.1

Expand Down
114 changes: 70 additions & 44 deletions R/read_ods.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,26 +32,30 @@
return(g)
}



## Based on readxl, although the implementation is different.
## If max row is -1, read to end of row.
## Row and column-numbers are 1-based
.standardise_limits <- function(range, skip) {
.standardise_limits <- function(range, skip, n_max) {
if(is.null(range)) {
skip <- check_nonnegative_integer(x = skip, argument = "skip")
n_max <- check_nonnegative_integer(x = n_max, argument = "n_max")
if (n_max == Inf) {
max_row <- -1
} else {
max_row <- n_max + 1
}
limits <- c(
min_row = skip + 1,
max_row = -1,
max_row = max_row,
min_col = 1,
max_col = -1
)
} else {
if(skip != 0) {
warning("Range and non-zero value for skip given. Defaulting to range.", call. = FALSE)
if(skip != 0 || n_max != Inf) {
warning("Range and non-default value for skip or n_max given. Defaulting to range.", call. = FALSE)
}
tryCatch({
limits <- cellranger::as.cell_limits(range)
limits <- cellranger::as.cell_limits(range)
}, error = function(e) {
stop("Invalid `range`")
})
Expand Down Expand Up @@ -82,7 +86,9 @@
row_names = FALSE,
strings_as_factors = FALSE,
verbose = FALSE,
as_tibble = TRUE) {
as_tibble = TRUE,
trim_ws = TRUE,
n_max = Inf) {
if (!file.exists(path)) {
stop("file does not exist", call. = FALSE)
}
Expand All @@ -104,6 +110,9 @@
if (!is.logical(as_tibble)) {
stop("as_tibble must be of type `boolean", call. = FALSE)
}
if (!is.logical(trim_ws)) {
stop("trim_ws must be of type `boolean", call. = FALSE)
}
if (row_names && as_tibble) {
stop("Tibbles do not support row names. To use row names, set as_tibble to false", call. = FALSE)
}
Expand All @@ -115,6 +124,9 @@
stop("Unknown col_types. Can either be a class col_spec, list, character, NULL or NA.",
call. = FALSE)
}
if (!is.numeric(n_max)) {
stop("n_max must be numeric.", call. = FALSE)
}
}

.return_empty <- function(as_tibble = FALSE) {
Expand All @@ -127,22 +139,22 @@
return(data.frame())
}

.type_convert <- function(df, col_types = NULL, verbose = TRUE, na = c("", "NA")) {
.type_convert <- function(df, col_types = NULL, verbose = TRUE, na = c("", "NA"), trim_ws = TRUE) {
if (verbose) {
res <- readr::type_convert(df = df, col_types, na = na)
} else {
suppressMessages({
res <- readr::type_convert(df = df, col_types, na = na)
res <- readr::type_convert(df = df, col_types, na = na, trim_ws = trim_ws)
})
}
return(res)
}

.handle_col_types <- function(res, col_types, verbose, na) {
.handle_col_types <- function(res, col_types, verbose, na, trim_ws) {
if (isTRUE(is.na(col_types)) || nrow(res) == 0) {
return(res)
}
.type_convert(df = res, col_types = col_types, verbose = verbose, na = na)
.type_convert(df = res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws)
}

## standardise `sheet` parameter as a number, i.e. sheet_index
Expand Down Expand Up @@ -173,19 +185,21 @@
}

.read_ods <- function(path,
sheet = 1,
col_names = TRUE,
col_types = NULL,
na = "",
skip = 0,
formula_as_formula = FALSE,
range = NULL,
row_names = FALSE,
strings_as_factors = FALSE,
verbose = FALSE,
as_tibble = TRUE,
.name_repair = "unique",
flat = FALSE) {
sheet = 1,
col_names = TRUE,
col_types = NULL,
na = "",
skip = 0,
formula_as_formula = FALSE,
range = NULL,
row_names = FALSE,
strings_as_factors = FALSE,
verbose = FALSE,
as_tibble = TRUE,
.name_repair = "unique",
flat = FALSE,
trim_ws = TRUE,
n_max = Inf) {
.check_read_args(path,
sheet,
col_names,
Expand All @@ -197,7 +211,9 @@
row_names,
strings_as_factors,
verbose,
as_tibble)
as_tibble,
trim_ws,
n_max)
path <- normalizePath(path)
if (flat) {
.get_sheet_names_func <- get_flat_sheet_names_
Expand All @@ -207,7 +223,7 @@
.read_ods_func <- read_ods_
}
## Get cell range info
limits <- .standardise_limits(range, skip)
limits <- .standardise_limits(range, skip, n_max)
sheet_index <- .standardise_sheet(sheet = sheet, sheet_names = .get_sheet_names_func(file = path, include_external_data = TRUE),
range = range)
strings <- .read_ods_func(file = path,
Expand All @@ -233,7 +249,7 @@
byrow = TRUE),
stringsAsFactors = FALSE)
res <- .change_df_with_col_row_header(x = res, col_header = col_names, row_header = row_names, .name_repair = .name_repair)
res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na)
res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws)
if (strings_as_factors) {
res <- .convert_strings_to_factors(df = res)
}
Expand Down Expand Up @@ -300,9 +316,11 @@
#' Default is `"unique"`.
#'
#' @param ods_format character, must be "auto", "ods" or "fods". The default "auto" is to determine the format automatically. By default, the format is determined by file extension, unless `guess` is `FALSE`.
#' @param guess logical. If the file extension is absent or not recognized, this
#' @param guess logical, If the file extension is absent or not recognized, this
#' controls whether we attempt to guess format based on the file signature or
#' "magic number".
#' @param trim_ws logical, should leading and trailing whitespace be trimmed?
#' @param n_max numeric, Maximum number of data rows to read. Ignored if `range` is given.
#' @return A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file.
#' @author Peter Brohan <peter.brohan+cran@@gmail.com>, Chung-hong Chan <chainsawtiney@@gmail.com>, Gerrit-Jan Schutten <phonixor@@gmail.com>
#' @examples
Expand Down Expand Up @@ -343,7 +361,9 @@ read_ods <- function(path,
as_tibble = TRUE,
.name_repair = "unique",
ods_format = c("auto", "ods", "fods"),
guess = FALSE) {
guess = FALSE,
trim_ws = TRUE,
n_max = Inf) {
ods_format <- .determine_ods_format(path = path, guess = guess, ods_format = match.arg(ods_format))
## Should use match.call but there's a weird bug if one of the variable names is 'file'
.read_ods(path = path,
Expand All @@ -359,24 +379,28 @@ read_ods <- function(path,
verbose = verbose,
as_tibble = as_tibble,
.name_repair = .name_repair,
flat = ods_format == "fods")
flat = ods_format == "fods",
trim_ws = trim_ws,
n_max = n_max)
}

#' @rdname read_ods
#' @export
read_fods <- function(path,
sheet = 1,
col_names = TRUE,
col_types = NULL,
na = "",
skip = 0,
formula_as_formula = FALSE,
range = NULL,
row_names = FALSE,
strings_as_factors = FALSE,
verbose = FALSE,
as_tibble = TRUE,
.name_repair = "unique") {
sheet = 1,
col_names = TRUE,
col_types = NULL,
na = "",
skip = 0,
formula_as_formula = FALSE,
range = NULL,
row_names = FALSE,
strings_as_factors = FALSE,
verbose = FALSE,
as_tibble = TRUE,
.name_repair = "unique",
trim_ws = TRUE,
n_max = Inf) {
## Should use match.call but there's a weird bug if one of the variable names is 'file'
.read_ods(path = normalizePath(path, mustWork = FALSE),
sheet = sheet,
Expand All @@ -391,5 +415,7 @@ read_fods <- function(path,
verbose = verbose,
as_tibble = as_tibble,
.name_repair = .name_repair,
flat = TRUE)
flat = TRUE,
trim_ws = trim_ws,
n_max = n_max)
}
2 changes: 1 addition & 1 deletion man/list_ods_sheets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 11 additions & 3 deletions man/read_ods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added tests/testdata/leadingspaces.ods
Binary file not shown.
19 changes: 19 additions & 0 deletions tests/testthat/test_read_ods.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ test_that("Incorrect Argument", {
expect_error(read_ods(path = "../testdata/sum.ods", strings_as_factors = "a"), "strings_as_factors must be of type `boolean`")
expect_error(read_ods(path = "../testdata/sum.ods", verbose = "a"), "verbose must be of type `boolean`")
expect_error(read_ods(path = "../testdata/sum.ods", row_names = TRUE), "Tibbles do not support")
expect_error(read_ods(path = "../testdata/sum.ods", n_max = "abc"), "n_max must be numeric")
expect_error(read_ods(path = "../testdata/sum.ods", n_max = -1), "n_max must be a positive integer")
expect_error(read_ods(path = "../testdata/sum.ods", trim_ws = "a"), "trim_ws must be")
})

test_that("exceptions in C++ (hard to test)", {
Expand Down Expand Up @@ -58,6 +61,8 @@ test_that("Parses range inputs correctly", {
expect_equal(x[[2,2]], 2)
expect_message(x <- read_ods("../testdata/multisheet.ods", range = "Sheet3!D2:E4"))
expect_equal(x[[1,1]], 3)
expect_warning(x <- read_ods("../testdata/multisheet.ods", n_max = 10, range = "Sheet2!B4:D9"), "Range and non-default")
expect_equal(x[[2,2]], 2)
})

test_that("Deals with repeated spaces correctly when fetching only part of sheet",{
Expand Down Expand Up @@ -105,6 +110,20 @@ test_that("No Warning of empty sheet", {
expect_silent(read_fods("../testdata/empty.fods"))
})

test_that("n_max", {
expect_silent(x <- read_ods("../testdata/starwars.ods", n_max = Inf))
expect_equal(nrow(x), 10)
expect_silent(x <- read_ods("../testdata/starwars.ods", n_max = 5))
expect_equal(nrow(x), 5)
expect_silent(x <- read_ods("../testdata/starwars.ods", n_max = 100))
expect_equal(nrow(x), 10)
})

test_that("trim_ws", {
expect_equal(read_ods("../testdata/leadingspaces.ods", trim_ws = FALSE)[1,1, drop = TRUE], " abc")
expect_equal(read_ods("../testdata/leadingspaces.ods", trim_ws = TRUE)[1,1, drop = TRUE], "abc")
})

## V2.0.0 behavior: backward compatibility

test_that("Single column ODS v2.0.0", {
Expand Down

0 comments on commit 43b1821

Please sign in to comment.