Skip to content

Commit

Permalink
Merge pull request #153 from ropensci/coltypes2
Browse files Browse the repository at this point in the history
  • Loading branch information
chainsawriot authored Aug 22, 2023
2 parents 5c93d0b + b73cd96 commit 38cefb8
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 97 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: readODS
Type: Package
Title: Read and Write ODS Files
Version: 2.0.3
Version: 2.0.4
Authors@R:
c(person("Gerrit-Jan", "Schutten", role = c("aut"), email = "[email protected]"),
person("Chung-hong", "Chan", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-6232-7530")),
Expand Down
13 changes: 13 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# readODS 2.04

## `col_types` can be character ("shorthand") or list

fix #135 and the review by Dr Ruedni

```r
# Specifying col_types as shorthand, the third column as factor; other by guessing
read_ods("starwars.ods", col_types = "??f")
# Specifying col_types as list
read_ods("starwars.ods", col_types = list(species = "f"))
```

# readODS 2.0.3

## Add support for writing flat ODS
Expand Down
174 changes: 88 additions & 86 deletions R/read_ods.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
}

## Based on readxl, although the implementation is different.
## If max row is -1, read to end of row.
## If max row is -1, read to end of row.
## Row and column-numbers are 1-based
.standardise_limits <- function(range, skip) {
if(is.null(range)) {
Expand Down Expand Up @@ -50,17 +50,6 @@
return(limits)
}

.silent_type_convert <- function(x, verbose = TRUE, na = c("", "NA")) {
if (verbose) {
res <- readr::type_convert(df = x, na = na)
} else {
suppressMessages({
res <- readr::type_convert(df = x, na = na)
})
}
return(res)
}

.convert_strings_to_factors <- function(df) {
i <- vapply(df, is.character, logical(1))
df[i] <- lapply(df[i], as.factor)
Expand Down Expand Up @@ -106,6 +95,65 @@
if (row_names && as_tibble) {
stop("Tibbles do not support row names. To use row names, set as_tibble to false", call. = FALSE)
}
if (!inherits(col_types, "col_spec") &&
isFALSE(is.na(col_types)) &&
isFALSE(is.null(col_types)) &&
isFALSE(is.character(col_types)) &&
isFALSE(is.list(col_types))) {
stop("Unknown col_types. Can either be a class col_spec, list, character, NULL or NA.",
call. = FALSE)
}
}

.return_empty <- function(as_tibble = FALSE) {
warning("empty sheet, return empty data frame.", call. = FALSE)
if(as_tibble) {
return(tibble::tibble())
}
return(data.frame())
}

.type_convert <- function(df, col_types = NULL, verbose = TRUE, na = c("", "NA")) {
if (verbose) {
res <- readr::type_convert(df = df, col_types, na = na)
} else {
suppressMessages({
res <- readr::type_convert(df = df, col_types, na = na)
})
}
return(res)
}

.handle_col_types <- function(res, col_types, verbose, na) {
if (isTRUE(is.na(col_types))) {
return(res)
}
.type_convert(df = res, col_types = col_types, verbose = verbose, na = na)
}

## standardise `sheet` parameter as a number
.standardise_sheet <- function(sheet, sheets, limits, range) {
sheet_from_range <- cellranger::as.cell_limits(range)[["sheet"]]
if (!is.null(range) && !is.na(sheet_from_range)) {
if (sheet != 1) {
warning("Sheet suggested in range and using sheet argument. Defaulting to range",
call. = FALSE)
}
sheet <- sheet_from_range ## override
}
is_in_sheet_names <- stringi::stri_cmp(e1 = sheet, e2 = sheets) == 0
if (!is.numeric(sheet) && !any(is_in_sheet_names)) {
stop(paste0("No sheet found with name '", sheet, "'", sep = ""),
call. = FALSE)
}
if (is.numeric(sheet) && sheet > length(sheets)) {
stop(paste0("File contains only ", length(sheets), " sheets. Sheet index out of range.",
call. = FALSE))
}
if (!is.numeric(sheet)) {
sheet <- which(is_in_sheet_names)
}
return(sheet)
}

.read_ods <- function(path,
Expand Down Expand Up @@ -134,65 +182,26 @@
strings_as_factors,
verbose,
as_tibble)
# Get cell range info
limits <- .standardise_limits(range, skip)
# Get sheet number.
if (flat) {
sheets <- get_flat_sheet_names_(file = path, include_external_data = TRUE)
.get_sheet_names_func <- get_flat_sheet_names_
.read_ods_func <- read_flat_ods_
} else {
sheets <- get_sheet_names_(file = path, include_external_data = TRUE)
}
sheet_name <- cellranger::as.cell_limits(range)[["sheet"]]
if(!is.null(range) && !is.na(sheet_name)) {
if(sheet != 1) {
warning("Sheet suggested in range and using sheet argument. Defaulting to range",
call. = FALSE)
}
is_in_sheet_names <- stringi::stri_cmp(e1 = sheet_name, e2 = sheets) == 0
if(any(is_in_sheet_names)) {
sheet <- which(is_in_sheet_names)
} else {
stop(paste0("No sheet found with name '", sheet_name, "'", sep = ""),
call. = FALSE)
}
} else {
is_in_sheet_names <- stringi::stri_cmp(e1 = sheet, e2 = sheets) == 0
if (!is.numeric(sheet) && any(is_in_sheet_names)) {
sheet <- which(is_in_sheet_names)
} else if (!is.numeric(sheet)) {
stop(paste0("No sheet found with name '", sheet, "'", sep = ""),
call. = FALSE)
}
if (sheet > length(sheets)) {
stop(paste0("File contains only ", length(sheets), " sheets. Sheet index out of range.",
call. = FALSE))
}
.get_sheet_names_func <- get_sheet_names_
.read_ods_func <- read_ods_
}

if(flat) {
strings <- read_flat_ods_(file = path,
start_row = limits["min_row"],
stop_row = limits["max_row"],
start_col = limits["min_col"],
stop_col = limits["max_col"],
sheet = sheet,
formula_as_formula = formula_as_formula)
} else {
strings <- read_ods_(file = path,
start_row = limits["min_row"],
stop_row = limits["max_row"],
start_col = limits["min_col"],
stop_col = limits["max_col"],
sheet = sheet,
formula_as_formula = formula_as_formula)
}
if(strings[1] == 0 || strings[2] == 0) {
warning("empty sheet, return empty data frame.", call. = FALSE)
if(as_tibble) {
return(tibble::tibble())
} else {
return(data.frame())
}
## Get cell range info
limits <- .standardise_limits(range, skip)
sheet <- .standardise_sheet(sheet = sheet, sheets = .get_sheet_names_func(file = path, include_external_data = TRUE),
limits = limits, range = range)
strings <- .read_ods_func(file = path,
start_row = limits["min_row"],
stop_row = limits["max_row"],
start_col = limits["min_col"],
stop_col = limits["max_col"],
sheet = sheet,
formula_as_formula = formula_as_formula)
if (strings[1] == 0 || strings[2] == 0) {
return(.return_empty(as_tibble = as_tibble))
}
res <- as.data.frame(
matrix(
Expand All @@ -201,27 +210,14 @@
byrow = TRUE),
stringsAsFactors = FALSE)
res <- .change_df_with_col_row_header(x = res, col_header = col_names, row_header = row_names, .name_repair = .name_repair)
res <- data.frame(res)
if (inherits(col_types, 'col_spec')) {
res <- readr::type_convert(df = res, col_types = col_types, na = na)
} else if (length(col_types) == 0 && is.null(col_types)) {
res <- .silent_type_convert(x = res, verbose = verbose, na = na)
} else if (length(col_types) == 1 && is.na(col_types[1])) {
{} #Pass
} else {
stop("Unknown col_types. Can either be a class col_spec, NULL or NA.",
call. = FALSE)
}

res <- .handle_col_types(data.frame(res), col_types = col_types, verbose = verbose, na = na)
if (strings_as_factors) {
res <- .convert_strings_to_factors(df = res)
}

if (as_tibble) {
res <- tibble::as_tibble(x = res, .name_repair = .name_repair)
}
return(res)

}

#' Read Data From (F)ODS File
Expand All @@ -232,12 +228,12 @@
#' @param path path to the (f)ods file.
#' @param sheet sheet to read. Either a string (the sheet name), or an integer sheet number. The default is 1.
#' @param col_names logical, indicating whether the file contains the names of the variables as its first line. Default is TRUE.
#' @param col_types Either NULL to guess from the spreadsheet or refer to [readr::type_convert()] to specify cols specification. NA will return a data frame with all columns being "characters".
#' @param col_types Either NULL to guess from the spreadsheet or refer to [readr::type_convert()] to specify cols specification. It can also be a shorthand such as "ccf" ("character", "character", "factor"), a list, or an object created by [readr::cols()]. NA will return a data frame with all columns being "characters". Please note that it will not speed up the reading by a lot by specifying this parameter explicitly. It is more for accuracy.
#' @param na Character vector of strings to use for missing values. By default read_ods converts blank cells to missing data. It can also be set to
#' NULL, so that empty cells are treated as NA.
#' @param skip the number of lines of the data file to skip before beginning to read data. If this parameter is larger than the total number of lines in the ods file, an empty data frame is returned.
#' @param formula_as_formula logical, a switch to display formulas as formulas "SUM(A1:A3)" or as the resulting value "3"... or "8".. . Default is FALSE.
#' @param range selection of rectangle using Excel-like cell range, such as \code{range = "D12:F15"} or \code{range = "R1C12:R6C15"}. Cell range processing is handled by the \code{\link[=cellranger]{cellranger}} package.
#' @param range selection of rectangle using Excel-like cell range, such as \code{range = "D12:F15"} or \code{range = "R1C12:R6C15"}. Cell range processing is handled by the \code{\link[=cellranger]{cellranger}} package. If sheet name is in the range, such as \code{range = "Sheet2!A2:B7"}, this sheet name is used instead of the provided `sheet`. If `sheet` is not the default value (1), a warning is given.
#' @param row_names logical, indicating whether the file contains the names of the rows as its first column. Default is FALSE.
#' @param strings_as_factors logical, if character columns to be converted to factors. Default is FALSE.
#' @param verbose logical, if messages should be displayed. Default is FALSE.
Expand All @@ -248,9 +244,9 @@
#' - `"check_unique"`: Check names are unique, but do not repair
#' - `"universal"` : Checks names are unique and valid R variables names in scope
#' - A function to apply custom name repair.
#'
#'
#' Default is `"unique"`.
#'
#'
#' @return A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file.
#' @author Peter Brohan <peter.brohan+cran@@gmail.com>, Chung-hong Chan <chainsawtiney@@gmail.com>, Gerrit-Jan Schutten <phonixor@@gmail.com>
#' @examples
Expand All @@ -267,6 +263,12 @@
#' read_fods("starwars.fods", sheet = 2)
#' # Read a specific range, e.g. A1:C11
#' read_fods("starwars.fods", sheet = 2, range = "A1:C11")
#' # Give a warning and read from Sheet1 (not 2)
#' read_fods("starwars.fods", sheet = 2, range = "Sheet1!A1:C11")
#' # Specifying col_types as shorthand, the third column as factor; other by guessing
#' read_ods("starwars.ods", col_types = "??f")
#' # Specifying col_types as list
#' read_ods("starwars.ods", col_types = list(species = "f"))
#' }
#' @export
read_ods <- function(path,
Expand Down
10 changes: 8 additions & 2 deletions man/read_ods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 57 additions & 6 deletions tests/testthat/test_col_types.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
### throw an error if col_types is not col_spec, single value NA or single value NULL

test_that("col_types throw error", {
expect_error(read_ods('../testdata/col_types.ods', col_types = 123))
expect_error(read_ods('../testdata/col_types.ods', col_types = c(NA, NA)))
expect_error(read_ods('../testdata/col_types.ods', col_types = c(NA, 123)))
expect_error(x <- read_ods("../testdata/col_types.ods", col_types = TRUE))
expect_error(x <- read_ods("../testdata/col_types.ods", col_types = 123))

})

test_that("col_types ODS", {
x <- read_ods('../testdata/col_types.ods', col_types = NA, as_tibble = TRUE)
expect_equal(class(x[[2]]), "character")
Expand All @@ -21,10 +32,50 @@ test_that("multi col_types ODS", {
expect_equal(class(x[[2]]), "character")
})

### throw an error if col_types is not col_spec, single value NA or single value NULL
test_that("col_types shorthand", {
x <- read_ods("../testdata/col_types.ods") # col_types = NULL
expect_equal(class(x[[1]]), "character")
expect_equal(class(x[[2]]), "numeric")
expect_equal(class(x[[3]]), "numeric")
x <- read_ods("../testdata/col_types.ods", col_types = NA)
expect_equal(class(x[[1]]), "character")
expect_equal(class(x[[2]]), "character")
expect_equal(class(x[[3]]), "character")
x <- read_ods("../testdata/col_types.ods", col_types = "fii")
expect_equal(class(x[[1]]), "factor")
expect_equal(class(x[[2]]), "integer")
expect_equal(class(x[[3]]), "integer")
x <- read_ods("../testdata/col_types.ods", col_types = "cii")
expect_equal(class(x[[1]]), "character")
expect_equal(class(x[[2]]), "integer")
expect_equal(class(x[[3]]), "integer")
## https://github.com/tidyverse/readr/issues/1509
## readr::type_convert(df, col_types = "f_")
## x <- read_ods("../testdata/col_types.ods", col_types = "fi_")
## expect_equal(class(x[,1]]), "factor")
## expect_equal(class(x[,2]]), "integer")
## expect_equal(ncol(x), 3)
})

test_that("col_types throw error", {
expect_error(read_ods('../testdata/col_types.ods', col_types = 123))
expect_error(read_ods('../testdata/col_types.ods', col_types = c(NA, NA)))
expect_error(read_ods('../testdata/col_types.ods', col_types = c(NA, 123)))
})
test_that("col_types orthodox", {
x <- read_ods('../testdata/col_types.ods', col_types = readr::cols(cola = readr::col_factor(), colb = readr::col_integer(), colc = readr::col_integer()), as_tibble = TRUE)
expect_equal(class(x[[1]]), "factor")
expect_equal(class(x[[2]]), "integer")
expect_equal(class(x[[2]]), "integer")
x <- read_ods('../testdata/col_types.ods', col_types = readr::cols(cola = readr::col_character(), colb = readr::col_integer(), colc = readr::col_integer()), as_tibble = TRUE)
expect_equal(class(x[[1]]), "character")
expect_equal(class(x[[2]]), "integer")
expect_equal(class(x[[2]]), "integer")
})


test_that("col_types list", {
x <- read_ods("../testdata/col_types.ods", col_types = list(cola = "f", colb = "i", colc = "i"))
expect_equal(class(x[[1]]), "factor")
expect_equal(class(x[[2]]), "integer")
expect_equal(class(x[[3]]), "integer")
x <- read_ods("../testdata/col_types.ods", col_types = list(cola = "c", colb = "i", colc = "i"))
expect_equal(class(x[[1]]), "character")
expect_equal(class(x[[2]]), "integer")
expect_equal(class(x[[3]]), "integer")
})
3 changes: 1 addition & 2 deletions tests/testthat/test_read_ods.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ test_that("Incorrect Argument", {
expect_error(read_ods(), "No file path was")
expect_error(read_ods(path = "not/real/file.ods"), "file does not exist")
expect_error(read_ods(path = "../testdata/sum.ods", col_names = "a"), "col_names must be of type `boolean`")
expect_error(read_ods(path = '../testdata/sum.ods', col_types = "a"), "Unknown col_types. Can either be a class col_spec, NULL or NA.")
expect_error(read_ods(path = "../testdata/sum.ods", skip = -1), "skip must be a positive integer")
expect_error(read_ods(path = "../testdata/sum.ods", formula_as_formula = "a"), "formula_as_formula must be of type `boolean`")
expect_error(read_ods(path = "../testdata/sum.ods", row_names = "a"), "row_names must be of type `boolean`")
Expand Down Expand Up @@ -97,4 +96,4 @@ test_that("read with column headers", {
expect_silent(x <- read_ods("../testdata/starwars.ods", row_names = TRUE, as_tibble = FALSE))
expect_equal(ncol(x), 2)
expect_equal(colnames(x), c("homeworld", "species"))
})
})

0 comments on commit 38cefb8

Please sign in to comment.