Skip to content

Commit

Permalink
added cpp implementation and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
schochastics committed Sep 24, 2023
1 parent 625ef06 commit 8548c66
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 2 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ export(ada_has_port)
export(ada_has_search)
export(ada_url_parse)
export(public_suffix)
export(url_decode)
importFrom(Rcpp,sourceCpp)
useDynLib(adaR, .registration = TRUE)
12 changes: 12 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,15 @@ Rcpp_ada_get_protocol <- function(input_vec, length_vec) {
.Call(`_adaR_Rcpp_ada_get_protocol`, input_vec, length_vec)
}

#' Function to percent-decode characters in URLs
#'
#' Similar to [utils::URLdecode]
#'
#' @param url a character vector
#' @export
#' @examples
#' url_decode("Hello%20World")
url_decode <- function(url) {
.Call(`_adaR_url_decode`, url)
}

7 changes: 5 additions & 2 deletions R/parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@ ada_url_parse <- function(url, decode = TRUE) {
df
}

## NA-aware utils::URLdecode, hopefully without great performance impact
## NA/NULL-aware utils::URLdecode, hopefully without great performance impact
.URLdecode <- function(URL) {
if (is.null(URL)) {
return(character(0))
}
non_na_index <- which(!is.na(URL))
URL[non_na_index] <- utils::URLdecode(URL[non_na_index])
URL[non_na_index] <- url_decode(URL[non_na_index])
URL[!non_na_index] <- NA_character_
return(URL)
}
17 changes: 17 additions & 0 deletions man/url_decode.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,17 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// url_decode
CharacterVector url_decode(CharacterVector url);
RcppExport SEXP _adaR_url_decode(SEXP urlSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< CharacterVector >::type url(urlSEXP);
rcpp_result_gen = Rcpp::wrap(url_decode(url));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_adaR_Rcpp_ada_parse", (DL_FUNC) &_adaR_Rcpp_ada_parse, 2},
Expand All @@ -259,6 +270,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_adaR_Rcpp_ada_get_pathname", (DL_FUNC) &_adaR_Rcpp_ada_get_pathname, 2},
{"_adaR_Rcpp_ada_get_search", (DL_FUNC) &_adaR_Rcpp_ada_get_search, 2},
{"_adaR_Rcpp_ada_get_protocol", (DL_FUNC) &_adaR_Rcpp_ada_get_protocol, 2},
{"_adaR_url_decode", (DL_FUNC) &_adaR_url_decode, 1},
{NULL, NULL, 0}
};

Expand Down
34 changes: 34 additions & 0 deletions src/urldecode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <Rcpp.h>

using namespace Rcpp;

//' Function to percent-decode characters in URLs
//'
//' Similar to [utils::URLdecode]
//'
//' @param url a character vector
//' @export
//' @examples
//' url_decode("Hello%20World")
// [[Rcpp::export]]
CharacterVector url_decode(CharacterVector url) {
return sapply(url, [](const String& u) {
std::string input = u;
std::string output;
size_t i = 0;

while (i < input.length()) {
if (input[i] != '%') {
output += input[i];
i++;
} else {
int value;
sscanf(input.substr(i + 1, 2).c_str(), "%x", &value);
output += static_cast<char>(value);
i += 3;
}
}

return output;
});
}
6 changes: 6 additions & 0 deletions tests/testthat/test-urldecode.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ test_that("Integration #21", {
res <- adaR::ada_url_parse(c("https://www.google.co.jp/search?q=\u30c9\u30a4\u30c4", NA, "https://www.google.co.jp/search?q=\u30c9\u30a4\u30c4"))
expect_equal(res$search, c("?q=\u30c9\u30a4\u30c4", NA_character_, "?q=\u30c9\u30a4\u30c4"))
})

test_that("cpp implementation is correct", {
enc <- "https%3A%2F%2Fwww.google.de%2Fmaps%2F%4047.6647302%2C9.1389738%2C11z%3Fentry%3Dttu"
dec <- "https://www.google.de/maps/@47.6647302,9.1389738,11z?entry=ttu"
expect_equal(url_decode(enc), dec)
})

0 comments on commit 8548c66

Please sign in to comment.