From 80cdfc907751bff8be3328703e42a31b0c111611 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 16 Oct 2023 15:28:11 +0200 Subject: [PATCH 1/4] added set functions --- NAMESPACE | 10 +++ R/RcppExports.R | 40 ++++++++++ R/set.R | 93 +++++++++++++++++++++++ man/ada_set_href.Rd | 62 ++++++++++++++++ src/RcppExports.cpp | 140 +++++++++++++++++++++++++++++++++++ src/adaR.cpp | 151 ++++++++++++++++++++++++++++++++++---- src/adaR.h | 48 ++++++++++-- tests/testthat/test-set.R | 13 ++++ 8 files changed, 537 insertions(+), 20 deletions(-) create mode 100644 R/set.R create mode 100644 man/ada_set_href.Rd create mode 100644 tests/testthat/test-set.R diff --git a/NAMESPACE b/NAMESPACE index f91099f..4c1829f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,16 @@ export(ada_has_non_empty_password) export(ada_has_non_empty_username) export(ada_has_port) export(ada_has_search) +export(ada_set_hash) +export(ada_set_host) +export(ada_set_hostname) +export(ada_set_href) +export(ada_set_password) +export(ada_set_pathname) +export(ada_set_port) +export(ada_set_protocol) +export(ada_set_search) +export(ada_set_username) export(ada_url_parse) export(public_suffix) export(url_decode2) diff --git a/R/RcppExports.R b/R/RcppExports.R index 2243b14..74ee4c3 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -77,6 +77,46 @@ Rcpp_ada_get_protocol <- function(url_vec, decode) { .Call(`_adaR_Rcpp_ada_get_protocol`, url_vec, decode) } +Rcpp_ada_set_href <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_href`, url_vec, subst, decode) +} + +Rcpp_ada_set_username <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_username`, url_vec, subst, decode) +} + +Rcpp_ada_set_password <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_password`, url_vec, subst, decode) +} + +Rcpp_ada_set_port <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_port`, url_vec, subst, decode) +} + +Rcpp_ada_set_host <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_host`, url_vec, subst, decode) +} + +Rcpp_ada_set_hostname <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_hostname`, url_vec, subst, decode) +} + +Rcpp_ada_set_pathname <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_pathname`, url_vec, subst, decode) +} + +Rcpp_ada_set_protocol <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_protocol`, url_vec, subst, decode) +} + +Rcpp_ada_set_search <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_search`, url_vec, subst, decode) +} + +Rcpp_ada_set_hash <- function(url_vec, subst, decode) { + .Call(`_adaR_Rcpp_ada_set_hash`, url_vec, subst, decode) +} + Rcpp_url_decode2 <- function(url) { .Call(`_adaR_Rcpp_url_decode2`, url) } diff --git a/R/set.R b/R/set.R new file mode 100644 index 0000000..dcc2f3f --- /dev/null +++ b/R/set.R @@ -0,0 +1,93 @@ +.set <- function(url, decode, input, func) { + if (is.null(url)) { + return(character(0)) + } + if (is.null(input)) { + return(url) + } + if (length(input) == 1) { + input <- rep(input, length(url)) + } + if (length(input) != length(url)) { + stop("input must have lkength one ot the same length as url", call. = FALSE) + } + func(url, input, decode) +} + +#' Set a specific component of URL +#' +#' These functions set a specific component of URL. +#' @inheritParams ada_url_parse +#' @param input character. containing new component for URL. Vector of length 1 +#' or same length as url. +#' @return character, `NA` if not a valid URL +#' @examples +#' url <- "https://user_1:password_1@example.org:8080/dir/../api?q=1#frag" +#' ada_set_href(url, "https://google.de") +#' ada_set_username(url, "user_2") +#' ada_set_password(url, "hunter2") +#' ada_set_port(url, "1234") +#' ada_set_hash(url, "#section1") +#' ada_set_host(url, "example.de") +#' ada_set_hostname(url, "example.de") +#' ada_set_pathname(url, "path/") +#' ada_set_search(url, "q=2") +#' ada_set_protocol(url, "ws:") +#' @export +ada_set_href <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_href) +} + +#' @rdname ada_set_href +#' @export +ada_set_username <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_username) +} + +#' @rdname ada_set_href +#' @export +ada_set_password <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_password) +} + +#' @rdname ada_set_href +#' @export +ada_set_port <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_port) +} + +#' @rdname ada_set_href +#' @export +ada_set_host <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_host) +} + +#' @rdname ada_set_href +#' @export +ada_set_hostname <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_hostname) +} + +#' @rdname ada_set_href +#' @export +ada_set_pathname <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_pathname) +} + +#' @rdname ada_set_href +#' @export +ada_set_protocol <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_protocol) +} + +#' @rdname ada_set_href +#' @export +ada_set_search <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_search) +} + +#' @rdname ada_set_href +#' @export +ada_set_hash <- function(url, input, decode = TRUE) { + .set(url, decode, input, Rcpp_ada_set_hash) +} diff --git a/man/ada_set_href.Rd b/man/ada_set_href.Rd new file mode 100644 index 0000000..56d8b99 --- /dev/null +++ b/man/ada_set_href.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/set.R +\name{ada_set_href} +\alias{ada_set_href} +\alias{ada_set_username} +\alias{ada_set_password} +\alias{ada_set_port} +\alias{ada_set_host} +\alias{ada_set_hostname} +\alias{ada_set_pathname} +\alias{ada_set_protocol} +\alias{ada_set_search} +\alias{ada_set_hash} +\title{Set a specific component of URL} +\usage{ +ada_set_href(url, input, decode = TRUE) + +ada_set_username(url, input, decode = TRUE) + +ada_set_password(url, input, decode = TRUE) + +ada_set_port(url, input, decode = TRUE) + +ada_set_host(url, input, decode = TRUE) + +ada_set_hostname(url, input, decode = TRUE) + +ada_set_pathname(url, input, decode = TRUE) + +ada_set_protocol(url, input, decode = TRUE) + +ada_set_search(url, input, decode = TRUE) + +ada_set_hash(url, input, decode = TRUE) +} +\arguments{ +\item{url}{character. one or more URL to be parsed} + +\item{input}{character. containing new component for URL. Vector of length 1 +or same length as url.} + +\item{decode}{logical. Whether to decode the output (see \code{\link[utils:URLencode]{utils::URLdecode()}}), default to \code{TRUE}} +} +\value{ +character, \code{NA} if not a valid URL +} +\description{ +These functions set a specific component of URL. +} +\examples{ +url <- "https://user_1:password_1@example.org:8080/dir/../api?q=1#frag" +ada_set_href(url, "https://google.de") +ada_set_username(url, "user_2") +ada_set_password(url, "hunter2") +ada_set_port(url, "1234") +ada_set_hash(url, "#section1") +ada_set_host(url, "example.de") +ada_set_hostname(url, "example.de") +ada_set_pathname(url, "path/") +ada_set_search(url, "q=2") +ada_set_protocol(url, "ws:") +} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 3133266..f296c08 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -230,6 +230,136 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// Rcpp_ada_set_href +CharacterVector Rcpp_ada_set_href(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_href(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_href(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_username +CharacterVector Rcpp_ada_set_username(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_username(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_username(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_password +CharacterVector Rcpp_ada_set_password(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_password(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_password(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_port +CharacterVector Rcpp_ada_set_port(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_port(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_port(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_host +CharacterVector Rcpp_ada_set_host(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_host(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_host(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_hostname +CharacterVector Rcpp_ada_set_hostname(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_hostname(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_hostname(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_pathname +CharacterVector Rcpp_ada_set_pathname(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_pathname(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_pathname(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_protocol +CharacterVector Rcpp_ada_set_protocol(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_protocol(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_protocol(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_search +CharacterVector Rcpp_ada_set_search(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_search(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_search(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} +// Rcpp_ada_set_hash +CharacterVector Rcpp_ada_set_hash(const CharacterVector& url_vec, const CharacterVector& subst, bool decode); +RcppExport SEXP _adaR_Rcpp_ada_set_hash(SEXP url_vecSEXP, SEXP substSEXP, SEXP decodeSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const CharacterVector& >::type url_vec(url_vecSEXP); + Rcpp::traits::input_parameter< const CharacterVector& >::type subst(substSEXP); + Rcpp::traits::input_parameter< bool >::type decode(decodeSEXP); + rcpp_result_gen = Rcpp::wrap(Rcpp_ada_set_hash(url_vec, subst, decode)); + return rcpp_result_gen; +END_RCPP +} // Rcpp_url_decode2 CharacterVector Rcpp_url_decode2(CharacterVector& url); RcppExport SEXP _adaR_Rcpp_url_decode2(SEXP urlSEXP) { @@ -273,6 +403,16 @@ static const R_CallMethodDef CallEntries[] = { {"_adaR_Rcpp_ada_get_pathname", (DL_FUNC) &_adaR_Rcpp_ada_get_pathname, 2}, {"_adaR_Rcpp_ada_get_search", (DL_FUNC) &_adaR_Rcpp_ada_get_search, 2}, {"_adaR_Rcpp_ada_get_protocol", (DL_FUNC) &_adaR_Rcpp_ada_get_protocol, 2}, + {"_adaR_Rcpp_ada_set_href", (DL_FUNC) &_adaR_Rcpp_ada_set_href, 3}, + {"_adaR_Rcpp_ada_set_username", (DL_FUNC) &_adaR_Rcpp_ada_set_username, 3}, + {"_adaR_Rcpp_ada_set_password", (DL_FUNC) &_adaR_Rcpp_ada_set_password, 3}, + {"_adaR_Rcpp_ada_set_port", (DL_FUNC) &_adaR_Rcpp_ada_set_port, 3}, + {"_adaR_Rcpp_ada_set_host", (DL_FUNC) &_adaR_Rcpp_ada_set_host, 3}, + {"_adaR_Rcpp_ada_set_hostname", (DL_FUNC) &_adaR_Rcpp_ada_set_hostname, 3}, + {"_adaR_Rcpp_ada_set_pathname", (DL_FUNC) &_adaR_Rcpp_ada_set_pathname, 3}, + {"_adaR_Rcpp_ada_set_protocol", (DL_FUNC) &_adaR_Rcpp_ada_set_protocol, 3}, + {"_adaR_Rcpp_ada_set_search", (DL_FUNC) &_adaR_Rcpp_ada_set_search, 3}, + {"_adaR_Rcpp_ada_set_hash", (DL_FUNC) &_adaR_Rcpp_ada_set_hash, 3}, {"_adaR_Rcpp_url_decode2", (DL_FUNC) &_adaR_Rcpp_url_decode2, 1}, {"_adaR_url_reverse", (DL_FUNC) &_adaR_url_reverse, 1}, {NULL, NULL, 0} diff --git a/src/adaR.cpp b/src/adaR.cpp index 1740431..067fe0a 100644 --- a/src/adaR.cpp +++ b/src/adaR.cpp @@ -1,4 +1,5 @@ #include "adaR.h" + #include "urldecode.h" std::string charsub(const ada_string stringi) { @@ -72,8 +73,9 @@ DataFrame Rcpp_ada_parse(const CharacterVector& input_vec, bool decode) { _["search"] = search, _["hash"] = hash)); } -//higher-order function for all Rcpp_ada_has_* -LogicalVector Rcpp_ada_has(const CharacterVector& url_vec, std::function func) { +// higher-order function for all Rcpp_ada_has_* +LogicalVector Rcpp_ada_has(const CharacterVector& url_vec, + std::function func) { unsigned int n = url_vec.length(); LogicalVector out(n); for (unsigned int i = 0; i < n; i++) { @@ -117,21 +119,23 @@ LogicalVector Rcpp_ada_has_non_empty_password(const CharacterVector& url_vec) { // [[Rcpp::export]] LogicalVector Rcpp_ada_has_port(const CharacterVector& url_vec) { - return Rcpp_ada_has(url_vec, &ada_has_port); + return Rcpp_ada_has(url_vec, &ada_has_port); } // [[Rcpp::export]] LogicalVector Rcpp_ada_has_hash(const CharacterVector& url_vec) { - return Rcpp_ada_has(url_vec, &ada_has_hash); + return Rcpp_ada_has(url_vec, &ada_has_hash); } // [[Rcpp::export]] LogicalVector Rcpp_ada_has_search(const CharacterVector& url_vec) { - return Rcpp_ada_has(url_vec, &ada_has_search); + return Rcpp_ada_has(url_vec, &ada_has_search); } -//higher-order function for all Rcpp_ada_get_* -CharacterVector Rcpp_ada_get(const CharacterVector& url_vec, std::function func, bool decode) { +// higher-order function for all Rcpp_ada_get_* +CharacterVector Rcpp_ada_get(const CharacterVector& url_vec, + std::function func, + bool decode) { unsigned int n = url_vec.length(); CharacterVector out(n); for (int i = 0; i < url_vec.length(); i++) { @@ -157,12 +161,14 @@ CharacterVector Rcpp_ada_get_href(const CharacterVector& url_vec, bool decode) { } // [[Rcpp::export]] -CharacterVector Rcpp_ada_get_username(const CharacterVector& url_vec, bool decode) { +CharacterVector Rcpp_ada_get_username(const CharacterVector& url_vec, + bool decode) { return Rcpp_ada_get(url_vec, &ada_get_username, decode); } // [[Rcpp::export]] -CharacterVector Rcpp_ada_get_password(const CharacterVector& url_vec, bool decode) { +CharacterVector Rcpp_ada_get_password(const CharacterVector& url_vec, + bool decode) { return Rcpp_ada_get(url_vec, &ada_get_password, decode); } @@ -182,21 +188,140 @@ CharacterVector Rcpp_ada_get_host(const CharacterVector& url_vec, bool decode) { } // [[Rcpp::export]] -CharacterVector Rcpp_ada_get_hostname(const CharacterVector& url_vec, bool decode) { +CharacterVector Rcpp_ada_get_hostname(const CharacterVector& url_vec, + bool decode) { return Rcpp_ada_get(url_vec, &ada_get_hostname, decode); } // [[Rcpp::export]] -CharacterVector Rcpp_ada_get_pathname(const CharacterVector& url_vec, bool decode) { +CharacterVector Rcpp_ada_get_pathname(const CharacterVector& url_vec, + bool decode) { return Rcpp_ada_get(url_vec, &ada_get_pathname, decode); } // [[Rcpp::export]] -CharacterVector Rcpp_ada_get_search(const CharacterVector& url_vec, bool decode) { +CharacterVector Rcpp_ada_get_search(const CharacterVector& url_vec, + bool decode) { return Rcpp_ada_get(url_vec, &ada_get_search, decode); } // [[Rcpp::export]] -CharacterVector Rcpp_ada_get_protocol(const CharacterVector& url_vec, bool decode) { +CharacterVector Rcpp_ada_get_protocol(const CharacterVector& url_vec, + bool decode) { return Rcpp_ada_get(url_vec, &ada_get_protocol, decode); } + +// higher-order function for bool Rcpp_ada_set_* +CharacterVector Rcpp_ada_set_bool( + const CharacterVector& url_vec, + std::function func, + const CharacterVector& subst, bool decode) { + unsigned int n = url_vec.length(); + CharacterVector out(n); + for (int i = 0; i < url_vec.length(); i++) { + String s = url_vec[i]; + std::string_view input(s.get_cstring()); + ada_url url = ada_parse(input.data(), input.length()); + if (!ada_is_valid(url)) { + out[i] = NA_STRING; + } else { + func(url, subst[i], input.length()); + out[i] = charsub(ada_get_href(url)); + } + ada_free(url); + } + if (decode) { + out = Rcpp_url_decode2(out); + } + return (out); +} + +// higher-order function for void Rcpp_ada_set_* +CharacterVector Rcpp_ada_set_void( + const CharacterVector& url_vec, + std::function func, + const CharacterVector& subst, bool decode) { + unsigned int n = url_vec.length(); + CharacterVector out(n); + for (int i = 0; i < url_vec.length(); i++) { + String s = url_vec[i]; + std::string_view input(s.get_cstring()); + ada_url url = ada_parse(input.data(), input.length()); + if (!ada_is_valid(url)) { + out[i] = NA_STRING; + } else { + func(url, subst[i], input.length()); + out[i] = charsub(ada_get_href(url)); + } + ada_free(url); + } + if (decode) { + out = Rcpp_url_decode2(out); + } + return (out); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_href(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_href, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_username(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_username, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_password(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_password, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_port(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_port, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_host(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_host, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_hostname(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_hostname, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_pathname(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_pathname, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_protocol(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode) { + return Rcpp_ada_set_bool(url_vec, &ada_set_protocol, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_search(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode) { + return Rcpp_ada_set_void(url_vec, &ada_set_search, subst, decode); +} + +// [[Rcpp::export]] +CharacterVector Rcpp_ada_set_hash(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode) { + return Rcpp_ada_set_void(url_vec, &ada_set_hash, subst, decode); +} \ No newline at end of file diff --git a/src/adaR.h b/src/adaR.h index 23b9f0e..c743603 100644 --- a/src/adaR.h +++ b/src/adaR.h @@ -1,7 +1,8 @@ #pragma once #include -#include "ada/ada.cpp" // unforunately not header only + +#include "ada/ada.cpp" // unforunately not header only using namespace Rcpp; @@ -22,12 +23,45 @@ LogicalVector Rcpp_ada_has_search(const CharacterVector& url_vec); // get_* CharacterVector Rcpp_ada_get_href(const CharacterVector& url_vec, bool decode); -CharacterVector Rcpp_ada_get_username(const CharacterVector& url_vec, bool decode); -CharacterVector Rcpp_ada_get_password(const CharacterVector& url_vec, bool decode); +CharacterVector Rcpp_ada_get_username(const CharacterVector& url_vec, + bool decode); +CharacterVector Rcpp_ada_get_password(const CharacterVector& url_vec, + bool decode); CharacterVector Rcpp_ada_get_port(const CharacterVector& url_vec, bool decode); CharacterVector Rcpp_ada_get_hash(const CharacterVector& url_vec, bool decode); CharacterVector Rcpp_ada_get_host(const CharacterVector& url_vec, bool decode); -CharacterVector Rcpp_ada_get_hostname(const CharacterVector& url_vec, bool decode); -CharacterVector Rcpp_ada_get_pathname(const CharacterVector& url_vec, bool decode); -CharacterVector Rcpp_ada_get_search(const CharacterVector& url_vec, bool decode); -CharacterVector Rcpp_ada_get_protocol(const CharacterVector& url_vec, bool decode); +CharacterVector Rcpp_ada_get_hostname(const CharacterVector& url_vec, + bool decode); +CharacterVector Rcpp_ada_get_pathname(const CharacterVector& url_vec, + bool decode); +CharacterVector Rcpp_ada_get_search(const CharacterVector& url_vec, + bool decode); +CharacterVector Rcpp_ada_get_protocol(const CharacterVector& url_vec, + bool decode); + +// set_* +CharacterVector Rcpp_ada_set_href(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode); +CharacterVector Rcpp_ada_set_username(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode); +CharacterVector Rcpp_ada_set_password(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode); +CharacterVector Rcpp_ada_set_port(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode); +CharacterVector Rcpp_ada_set_host(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode); +CharacterVector Rcpp_ada_set_hostname(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode); +CharacterVector Rcpp_ada_set_pathname(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode); +CharacterVector Rcpp_ada_set_protocol(const CharacterVector& url_vec, + const CharacterVector& subst, + bool decode); +CharacterVector Rcpp_ada_set_search(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode); +CharacterVector Rcpp_ada_set_hash(const CharacterVector& url_vec, + const CharacterVector& subst, bool decode); diff --git a/tests/testthat/test-set.R b/tests/testthat/test-set.R new file mode 100644 index 0000000..daf1260 --- /dev/null +++ b/tests/testthat/test-set.R @@ -0,0 +1,13 @@ +test_that("all set functions work", { + url <- "https://user_1:password_1@example.org:8080/dir/../api?q=1#frag" + expect_equal(ada_get_href(ada_set_href(url, "https://example.org:8000/api?q=2#das")), "https://example.org:8000/api?q=2#das") + expect_equal(ada_get_username(ada_set_username(url, "user_2")), "user_2") + expect_equal(ada_get_password(ada_set_password(url, "hunter2")), "hunter2") + expect_equal(ada_get_host(ada_set_host(url, "example.de:1234")), "example.de:1234") + expect_equal(ada_get_hostname(ada_set_hostname(url, "example.net/")), "example.net") + expect_equal(ada_get_port(ada_set_port(url, "1234")), "1234") + expect_equal(ada_get_pathname(ada_set_pathname(url, "/dat")), "/dat") + expect_equal(ada_get_search(ada_set_search(url, "q=2")), "?q=2") + expect_equal(ada_get_hash(ada_set_hash(url, "section1")), "#section1") + expect_equal(ada_get_protocol(ada_set_protocol(url, "ws:")), "ws:") +}) From 9e2423ca5fab8a07d59e948ba452fdc7f5616651 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 16 Oct 2023 15:45:56 +0200 Subject: [PATCH 2/4] added more tests --- tests/testthat/test-set.R | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testthat/test-set.R b/tests/testthat/test-set.R index daf1260..f969bde 100644 --- a/tests/testthat/test-set.R +++ b/tests/testthat/test-set.R @@ -11,3 +11,25 @@ test_that("all set functions work", { expect_equal(ada_get_hash(ada_set_hash(url, "section1")), "#section1") expect_equal(ada_get_protocol(ada_set_protocol(url, "ws:")), "ws:") }) + +set_functions <- c( + ada_set_href, ada_set_username, ada_set_password, ada_set_host, ada_set_hostname, ada_set_port, ada_set_pathname, + ada_set_search, ada_set_hash, ada_set_protocol +) + +test_that("invalid urls should return NA", { + url <- "thisisnoturl" + for (func in set_functions) { + expect_equal(func(url, "invalid"), NA_character_) + } +}) + +test_that("invalid component handling", { + url <- "https://user_1:password_1@example.org:8080/dir/../api?q=1#frag" + # expect_equal(ada_get_username(ada_set_username(url, "user_2")), "user_2") + # expect_equal(ada_get_password(ada_set_password(url, "hunter2")), "hunter2") + expect_equal(ada_get_host(ada_set_host(url, "example.de1234")), "example.org:8080") + expect_equal(ada_get_hostname(ada_set_hostname(url, "example.net")), "example.org") + expect_equal(ada_get_port(ada_set_port(url, "blabla")), "") + expect_equal(ada_get_protocol(ada_set_protocol(url, "abc:")), "https:") +}) From 003c2efb29a19dcd52e8f535446b8a9e438d7d6a Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 16 Oct 2023 16:01:59 +0200 Subject: [PATCH 3/4] fixed username and password issue --- src/adaR.cpp | 8 ++++++-- tests/testthat/test-set.R | 6 +----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/adaR.cpp b/src/adaR.cpp index 067fe0a..fb32064 100644 --- a/src/adaR.cpp +++ b/src/adaR.cpp @@ -220,12 +220,14 @@ CharacterVector Rcpp_ada_set_bool( CharacterVector out(n); for (int i = 0; i < url_vec.length(); i++) { String s = url_vec[i]; + String s2 = subst[i]; std::string_view input(s.get_cstring()); + std::string_view replace(s2.get_cstring()); ada_url url = ada_parse(input.data(), input.length()); if (!ada_is_valid(url)) { out[i] = NA_STRING; } else { - func(url, subst[i], input.length()); + func(url, replace.data(), replace.length()); out[i] = charsub(ada_get_href(url)); } ada_free(url); @@ -245,12 +247,14 @@ CharacterVector Rcpp_ada_set_void( CharacterVector out(n); for (int i = 0; i < url_vec.length(); i++) { String s = url_vec[i]; + String s2 = subst[i]; std::string_view input(s.get_cstring()); + std::string_view replace(s2.get_cstring()); ada_url url = ada_parse(input.data(), input.length()); if (!ada_is_valid(url)) { out[i] = NA_STRING; } else { - func(url, subst[i], input.length()); + func(url, replace.data(), replace.length()); out[i] = charsub(ada_get_href(url)); } ada_free(url); diff --git a/tests/testthat/test-set.R b/tests/testthat/test-set.R index f969bde..5dc8944 100644 --- a/tests/testthat/test-set.R +++ b/tests/testthat/test-set.R @@ -26,10 +26,6 @@ test_that("invalid urls should return NA", { test_that("invalid component handling", { url <- "https://user_1:password_1@example.org:8080/dir/../api?q=1#frag" - # expect_equal(ada_get_username(ada_set_username(url, "user_2")), "user_2") - # expect_equal(ada_get_password(ada_set_password(url, "hunter2")), "hunter2") - expect_equal(ada_get_host(ada_set_host(url, "example.de1234")), "example.org:8080") - expect_equal(ada_get_hostname(ada_set_hostname(url, "example.net")), "example.org") - expect_equal(ada_get_port(ada_set_port(url, "blabla")), "") + expect_equal(ada_get_port(ada_set_port(url, "blabla")), "8080") expect_equal(ada_get_protocol(ada_set_protocol(url, "abc:")), "https:") }) From a3e32ebc11777de83c7b3387f60424d99296e644 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 16 Oct 2023 19:18:20 +0200 Subject: [PATCH 4/4] template function --- src/adaR.cpp | 55 +++++++++++++--------------------------------------- 1 file changed, 14 insertions(+), 41 deletions(-) diff --git a/src/adaR.cpp b/src/adaR.cpp index fb32064..80a6c80 100644 --- a/src/adaR.cpp +++ b/src/adaR.cpp @@ -211,10 +211,11 @@ CharacterVector Rcpp_ada_get_protocol(const CharacterVector& url_vec, return Rcpp_ada_get(url_vec, &ada_get_protocol, decode); } -// higher-order function for bool Rcpp_ada_set_* -CharacterVector Rcpp_ada_set_bool( +// higher-order function for Rcpp_ada_set_* +template +CharacterVector Rcpp_ada_set( const CharacterVector& url_vec, - std::function func, + std::function func, const CharacterVector& subst, bool decode) { unsigned int n = url_vec.length(); CharacterVector out(n); @@ -237,95 +238,67 @@ CharacterVector Rcpp_ada_set_bool( } return (out); } - -// higher-order function for void Rcpp_ada_set_* -CharacterVector Rcpp_ada_set_void( - const CharacterVector& url_vec, - std::function func, - const CharacterVector& subst, bool decode) { - unsigned int n = url_vec.length(); - CharacterVector out(n); - for (int i = 0; i < url_vec.length(); i++) { - String s = url_vec[i]; - String s2 = subst[i]; - std::string_view input(s.get_cstring()); - std::string_view replace(s2.get_cstring()); - ada_url url = ada_parse(input.data(), input.length()); - if (!ada_is_valid(url)) { - out[i] = NA_STRING; - } else { - func(url, replace.data(), replace.length()); - out[i] = charsub(ada_get_href(url)); - } - ada_free(url); - } - if (decode) { - out = Rcpp_url_decode2(out); - } - return (out); -} - // [[Rcpp::export]] CharacterVector Rcpp_ada_set_href(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_href, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_href, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_username(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_username, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_username, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_password(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_password, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_password, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_port(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_port, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_port, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_host(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_host, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_host, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_hostname(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_hostname, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_hostname, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_pathname(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_pathname, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_pathname, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_protocol(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_bool(url_vec, &ada_set_protocol, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_protocol, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_search(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_void(url_vec, &ada_set_search, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_search, subst, decode); } // [[Rcpp::export]] CharacterVector Rcpp_ada_set_hash(const CharacterVector& url_vec, const CharacterVector& subst, bool decode) { - return Rcpp_ada_set_void(url_vec, &ada_set_hash, subst, decode); + return Rcpp_ada_set(url_vec, &ada_set_hash, subst, decode); } \ No newline at end of file