From c09c50beaca686420ee9ac54a6f15cfae5145c07 Mon Sep 17 00:00:00 2001
From: chainsawriot <chainsawtiney@gmail.com>
Date: Fri, 29 Dec 2023 18:15:47 +0100
Subject: [PATCH] Fix #154

---
 R/read_ods.R           | 69 ++++++++++++++++++++++++------------------
 man/list_ods_sheets.Rd |  2 +-
 man/read_ods.Rd        |  8 +++--
 3 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/R/read_ods.R b/R/read_ods.R
index ec05966..a1c821f 100644
--- a/R/read_ods.R
+++ b/R/read_ods.R
@@ -32,26 +32,30 @@
     return(g)
 }
 
-
-
 ## Based on readxl, although the implementation is different.
 ## If max row is -1, read to end of row.
 ## Row and column-numbers are 1-based
-.standardise_limits <- function(range, skip) {
+.standardise_limits <- function(range, skip, n_max) {
     if(is.null(range)) {
         skip <- check_nonnegative_integer(x = skip, argument = "skip")
+        n_max <- check_nonnegative_integer(x = n_max, argument = "n_max")
+        if (n_max == Inf) {
+            max_row <- -1
+        } else {
+            max_row <- n_max + 1
+        }
         limits <- c(
             min_row = skip + 1,
-            max_row = -1,
+            max_row = max_row,
             min_col = 1,
             max_col = -1
         )
     } else {
-        if(skip != 0) {
-            warning("Range and non-zero value for skip given. Defaulting to range.", call. = FALSE)
+        if(skip != 0 || n_max != Inf) {
+            warning("Range and non-default value for skip or n_max given. Defaulting to range.", call. = FALSE)
         }
         tryCatch({
-        limits <- cellranger::as.cell_limits(range)
+            limits <- cellranger::as.cell_limits(range)
         }, error = function(e) {
             stop("Invalid `range`")
         })
@@ -127,22 +131,22 @@
     return(data.frame())
 }
 
-.type_convert <- function(df, col_types = NULL, verbose = TRUE, na = c("", "NA")) {
+.type_convert <- function(df, col_types = NULL, verbose = TRUE, na = c("", "NA"), trim_ws = TRUE) {
     if (verbose) {
         res <- readr::type_convert(df = df, col_types, na = na)
     } else {
         suppressMessages({
-            res <- readr::type_convert(df = df, col_types, na = na)
+            res <- readr::type_convert(df = df, col_types, na = na, trim_ws = trim_ws)
         })
     }
     return(res)
 }
 
-.handle_col_types <- function(res, col_types, verbose, na) {
+.handle_col_types <- function(res, col_types, verbose, na, trim_ws) {
     if (isTRUE(is.na(col_types)) || nrow(res) == 0) {
         return(res)
     }
-    .type_convert(df = res, col_types = col_types, verbose = verbose, na = na)
+    .type_convert(df = res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws)
 }
 
 ## standardise `sheet` parameter as a number, i.e. sheet_index
@@ -173,19 +177,21 @@
 }
 
 .read_ods <- function(path,
-                        sheet = 1,
-                        col_names = TRUE,
-                        col_types = NULL,
-                        na = "",
-                        skip = 0,
-                        formula_as_formula = FALSE,
-                        range = NULL,
-                        row_names = FALSE,
-                        strings_as_factors = FALSE,
-                        verbose = FALSE,
-                        as_tibble = TRUE,
-                        .name_repair = "unique",
-                        flat = FALSE) {
+                      sheet = 1,
+                      col_names = TRUE,
+                      col_types = NULL,
+                      na = "",
+                      skip = 0,
+                      formula_as_formula = FALSE,
+                      range = NULL,
+                      row_names = FALSE,
+                      strings_as_factors = FALSE,
+                      verbose = FALSE,
+                      as_tibble = TRUE,
+                      .name_repair = "unique",
+                      flat = FALSE,
+                      trim_ws = TRUE,
+                      n_max = Inf) {
     .check_read_args(path,
         sheet,
         col_names,
@@ -207,7 +213,7 @@
         .read_ods_func <- read_ods_
     }
     ## Get cell range info
-    limits <- .standardise_limits(range, skip)
+    limits <- .standardise_limits(range, skip, n_max)
     sheet_index <- .standardise_sheet(sheet = sheet, sheet_names = .get_sheet_names_func(file = path, include_external_data = TRUE),
                                       range = range)
     strings <- .read_ods_func(file = path,
@@ -233,7 +239,7 @@
             byrow = TRUE),
         stringsAsFactors = FALSE)
     res <- .change_df_with_col_row_header(x = res, col_header = col_names, row_header = row_names, .name_repair = .name_repair)
-    res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na)
+    res <- .handle_col_types(res, col_types = col_types, verbose = verbose, na = na, trim_ws = trim_ws)
     if (strings_as_factors) {
         res <- .convert_strings_to_factors(df = res)
     }
@@ -300,9 +306,10 @@
 #'  Default is `"unique"`.
 #'
 #' @param ods_format character, must be "auto", "ods" or "fods". The default "auto" is to determine the format automatically. By default, the format is determined by file extension, unless `guess` is `FALSE`.
-#' @param guess logical. If the file extension is absent or not recognized, this
+#' @param guess logical, If the file extension is absent or not recognized, this
 #'   controls whether we attempt to guess format based on the file signature or
 #'   "magic number".
+#' @param trim_ws logical, should leading and trailing whitespace be trimmed?
 #' @return A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file.
 #' @author Peter Brohan <peter.brohan+cran@@gmail.com>, Chung-hong Chan <chainsawtiney@@gmail.com>, Gerrit-Jan Schutten <phonixor@@gmail.com>
 #' @examples
@@ -343,7 +350,9 @@ read_ods <- function(path,
                      as_tibble = TRUE,
                      .name_repair = "unique",
                      ods_format = c("auto", "ods", "fods"),
-                     guess = FALSE) {
+                     guess = FALSE,
+                     trim_ws = TRUE,
+                     n_max = Inf) {
     ods_format <- .determine_ods_format(path = path, guess = guess, ods_format = match.arg(ods_format))
     ## Should use match.call but there's a weird bug if one of the variable names is 'file'
     .read_ods(path = path,
@@ -359,7 +368,9 @@ read_ods <- function(path,
         verbose = verbose,
         as_tibble = as_tibble,
         .name_repair = .name_repair,
-        flat = ods_format == "fods")
+        flat = ods_format == "fods",
+        trim_ws = trim_ws,
+        n_max = n_max)
 }
 
 #' @rdname read_ods
diff --git a/man/list_ods_sheets.Rd b/man/list_ods_sheets.Rd
index 96a8ea6..41134f9 100644
--- a/man/list_ods_sheets.Rd
+++ b/man/list_ods_sheets.Rd
@@ -24,7 +24,7 @@ ods_sheets(path)
 
 \item{ods_format}{character, must be "auto", "ods" or "fods". The default "auto" is to determine the format automatically. By default, the format is determined by file extension, unless \code{guess} is \code{FALSE}.}
 
-\item{guess}{logical. If the file extension is absent or not recognized, this
+\item{guess}{logical, If the file extension is absent or not recognized, this
 controls whether we attempt to guess format based on the file signature or
 "magic number".}
 }
diff --git a/man/read_ods.Rd b/man/read_ods.Rd
index 83f5df0..21e46a5 100644
--- a/man/read_ods.Rd
+++ b/man/read_ods.Rd
@@ -20,7 +20,9 @@ read_ods(
   as_tibble = TRUE,
   .name_repair = "unique",
   ods_format = c("auto", "ods", "fods"),
-  guess = FALSE
+  guess = FALSE,
+  trim_ws = TRUE,
+  n_max = Inf
 )
 
 read_fods(
@@ -78,9 +80,11 @@ Default is \code{"unique"}.}
 
 \item{ods_format}{character, must be "auto", "ods" or "fods". The default "auto" is to determine the format automatically. By default, the format is determined by file extension, unless \code{guess} is \code{FALSE}.}
 
-\item{guess}{logical. If the file extension is absent or not recognized, this
+\item{guess}{logical, If the file extension is absent or not recognized, this
 controls whether we attempt to guess format based on the file signature or
 "magic number".}
+
+\item{trim_ws}{logical, should leading and trailing whitespace be trimmed?}
 }
 \value{
 A tibble (\code{tibble}) or data frame (\code{data.frame}) containing a representation of data in the (f)ods file.