Skip to content

Commit

Permalink
Fix #37
Browse files Browse the repository at this point in the history
The string trimming function didn't check for missing. Now it's
checked.

Also, to emulate the behavior of `readxl`, all space columns should be
detected as logical.

benchmark pending
  • Loading branch information
chainsawriot committed Jan 5, 2025
1 parent 5497a77 commit c958b86
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 5 deletions.
12 changes: 9 additions & 3 deletions src/CollectorGuess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,17 @@ bool canParse(const cpp11::strings& x, const canParseFun& canParseF, LocaleInfo*
return true;
}

bool allMissing(const cpp11::strings& x) {
bool allMissing(const cpp11::strings& x, bool trim_ws) {
for (const auto & i : x) {
if (i != NA_STRING && i.size() > 0) {
if (!trim_ws && i != NA_STRING && i.size() > 0) {
return false;
}
if (trim_ws) {
auto istr = trimString(std::string(i));
if (i != NA_STRING && istr != "") {
return false;
}
}
}
return true;
}
Expand Down Expand Up @@ -136,7 +142,7 @@ static bool isDateTime(const std::string& x, LocaleInfo* pLocale) {
return "character";
}

if (allMissing(input)) {
if (allMissing(input, trim_ws)) {
return "logical";
}

Expand Down
7 changes: 5 additions & 2 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,11 @@ inline bool starts_with_comment(
inline std::string trimString(std::string const &str, std::string const &whitespace=" \r\n\t\v\f") {
auto start = str.find_first_not_of(whitespace);
auto end = str.find_last_not_of(whitespace);

return str.substr(start, end - start + 1);
if (start != std::string::npos) {
return str.substr(start, end - start + 1);
} else {
return "";
}
}

#endif
10 changes: 10 additions & 0 deletions tests/testthat/test-parsing.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,13 @@ test_that("parse_guess() trim_ws #32 or tidyverse/readr#1536", {
expect_equal(parse_guess(c(" 1", " 2", " 3"), trim_ws = FALSE), c(" 1", " 2", " 3"))
expect_equal(parse_guess(c(" TRUE", "FALSE ", " T "), trim_ws = TRUE), c(TRUE, FALSE, TRUE))
})

test_that("all empty with trim_ws won't crash, #37 and ropensci/readODS#211", {
expect_error(minty:::guess_parser(c(" ", " "), trim_ws = TRUE), NA)
expect_error(minty:::guess_parser(c(" "), trim_ws = TRUE), NA)
expect_equal(minty:::guess_parser(c(" "), trim_ws = TRUE), "logical")
expect_equal(minty:::guess_parser(c(" ", " "), trim_ws = TRUE), "logical")
expect_error(minty:::guess_parser(c(" "), trim_ws = FALSE), NA)
expect_error(minty:::guess_parser(c(" ", "a"), trim_ws = FALSE), NA)
expect_equal(minty:::guess_parser(c(" ", "a")), "character")
})
12 changes: 12 additions & 0 deletions tests/testthat/test-type-convert.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,15 @@ test_that("r_is_string_cpp11", {
expect_false(r_is_string_cpp11(NA_character_))
expect_false(r_is_string_cpp11(NULL))
})

test_that("integration tests for #37, also ropensci/readODS#211", {
input <- structure(list(picture_archive_url = c("", "", "", "", ""),
video_url = c(" ", " ", " ", " ", " ")),
row.names = c(NA, -5L), class = "data.frame")

expect_error(type_convert(input, trim_ws = TRUE), NA)
output <- type_convert(input, trim_ws = TRUE)
expect_true(is.logical(output$video_url))
output <- type_convert(input, trim_ws = FALSE)
expect_false(is.logical(output$video_url))
})

0 comments on commit c958b86

Please sign in to comment.