ropensci · crew102 · Dec 10, 2024 · Dec 10, 2024 · Dec 15, 2024 · Dec 20, 2024
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -23,10 +23,10 @@ jobs:
     steps:
       - name: Confirm crew102 triggered the build
         run: |
-          if [ "${{ github.event.sender.login }}" == "crew102" ]; then
+          if [ "${{ env.GITHUB_ACTOR }}" == "crew102" ]; then
             echo "Actor is crew102"
           else
-            echo "Actor is ${{ github.actor }}, failing build."
+            echo "Actor is ${{ env.GITHUB_ACTOR }}, failing build."
             exit 1
           fi
 
@@ -119,4 +119,4 @@ jobs:
         uses: actions/upload-artifact@main
         with:
           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
-          path: check
+          path: check
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,7 +19,7 @@ LazyData: TRUE
 Depends:
     R (>= 3.1)
 Imports:
-    httr,
+    httr2,
     lifecycle,
     jsonlite,
     utils

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,7 @@ export(cast_pv_data)
 export(get_endpoints)
 export(get_fields)
 export(get_ok_pk)
+export(pad_patent_id)
 export(qry_funs)
 export(retrieve_linked_data)
 export(search_pv)

diff --git a/R/cast-pv-data.R b/R/cast-pv-data.R
@@ -5,21 +5,23 @@ as_is <- function(x) x
 get_cast_fun <- function(data_type) {
   # Some fields aren't documented, so we don't know what their data type is. Use
   # string type for these.
+  # new version of the API: state of string vs fulltext is in flux. Latter currently unused
   if (length(data_type) != 1) data_type <- "string"
-  switch(
-    data_type,
+  switch(data_type,
     "string" = as_is,
     "date" = as.Date,
-    "float" = as.numeric,
-    "integer" = as.integer,
+    "number" = as_is,
+    "integer" = as_is,
     "int" = as.integer,
-    "fulltext" = as_is
+    "fulltext" = as_is,
+    "boolean" = as_is,
+    "bool" = as.logical
   )
 }
 
 #' @noRd
 lookup_cast_fun <- function(name, typesdf) {
-  data_type <- typesdf[typesdf$field == name, "data_type"]
+  data_type <- typesdf[typesdf$common_name == name, "data_type"]
   get_cast_fun(data_type = data_type)
 }
 
@@ -29,6 +31,18 @@ cast_one.character <- function(one, name, typesdf) {
   cast_fun(one)
 }
 
+#' @noRd
+cast_one.double <- function(one, name, typesdf) {
+  cast_fun <- lookup_cast_fun(name, typesdf)
+  cast_fun(one)
+}
+
+#' @noRd
+cast_one.integer <- function(one, name, typesdf) {
+  cast_fun <- lookup_cast_fun(name, typesdf)
+  cast_fun(one)
+}
+
 #' @noRd
 cast_one.default <- function(one, name, typesdf) NA
 
@@ -69,17 +83,33 @@ cast_one <- function(one, name, typesdf) UseMethod("cast_one")
 #' \dontrun{
 #'
 #' fields <- c("patent_date", "patent_title", "patent_year")
-#' res <- search_pv(query = "{\"patent_number\":\"5116621\"}", fields = fields)
+#' res <- search_pv(query = "{\"patent_id\":\"5116621\"}", fields = fields)
 #' cast_pv_data(data = res$data)
 #' }
 #'
 #' @export
 cast_pv_data <- function(data) {
   validate_pv_data(data)
 
-  endpoint <- names(data)
+  entity_name <- names(data)
+
+  if (entity_name == "rel_app_texts") {
+    # blend the fields from both rel_app_texts entities
+    typesdf <- unique(fieldsdf[fieldsdf$group == entity_name, c("common_name", "data_type")])
+  } else {
+    # need to get the endpoint from entity_name
+    endpoint_df <- fieldsdf[fieldsdf$group == entity_name, ]
+    endpoint <- unique(endpoint_df$endpoint)
+
+    # watch out here- several endpoints return entities that are groups returned
+    # by the patent and publication endpoints (attorneys, inventors, assignees)
+    if(length(endpoint) > 1) {
+      endpoint <- endpoint[!endpoint %in% c("patent", "publication")]
+    }
+
+    typesdf <- fieldsdf[fieldsdf$endpoint == endpoint, c("common_name", "data_type")]
 
-  typesdf <- fieldsdf[fieldsdf$endpoint == endpoint, c("field", "data_type")]
+  }
 
   df <- data[[1]]
 
@@ -89,7 +119,7 @@ cast_pv_data <- function(data) {
 
   df[] <- list_out
   out_data <- list(x = df)
-  names(out_data) <- endpoint
+  names(out_data) <- entity_name
 
   structure(
     out_data,

diff --git a/R/check-query.R b/R/check-query.R
@@ -10,28 +10,32 @@ is_int <- function(x)
 
 #' @noRd
 is_date <- function(x)
-  grepl("[12][[:digit:]]{3}-[01][[:digit:]]-[0-3][[:digit:]]", x)
+  grepl("^[12][[:digit:]]{3}-[01][[:digit:]]-[0-3][[:digit:]]$", x)
 
 #' @noRd
 one_check <- function(operator, field, value, f1) {
-
   if (nrow(f1) == 0)
     stop2(field, " is not a valid field to query for your endpoint")
   if (f1$data_type == "date" && !is_date(value))
     stop2("Bad date: ", value, ". Date must be in the format of yyyy-mm-dd")
-  if (f1$data_type %in% c("string", "fulltext") && !is.character(value))
+  if (f1$data_type %in% c("bool", "int", "string", "fulltext") && !is.character(value))
     stop2(value, " must be of type character")
   if (f1$data_type == "integer" && !is_int(value))
     stop2(value, " must be an integer")
+  if (f1$data_type == "boolean" && !is.logical(value))
+    stop2(value, " must be a boolean")
+  if (f1$data_type == "number" && !is.numeric(value))
+    stop2(value, " must be a number")
 
   if (
-    (operator %in% c("_begins", "_contains") && !(f1$data_type == "string")) ||
-    (operator %in% c("_text_all", "_text_any", "_text_phrase") &&
-     !(f1$data_type == "fulltext")) ||
-    (f1$data_type %in% c("string", "fulltext") &&
-      operator %in% c("_gt", "_gte", "_lt", "_lte"))
-  )
+    # The new version of the API blurrs the distinction between string/fulltext fields.
+    # It looks like the string/fulltext functions can be used interchangeably
+    (operator %in% c("_begins", "_contains", "_text_all", "_text_any", "_text_phrase") &&
+      !(f1$data_type == "fulltext" || f1$data_type == "string")) ||
+      (f1$data_type %in% c("string", "fulltext") &&
+        operator %in% c("_gt", "_gte", "_lt", "_lte"))) {
     stop2("You cannot use the operator ", operator, " with the field ", field)
+  }
 }
 
 #' @noRd
@@ -40,13 +44,16 @@ check_query <- function(query, endpoint) {
   num_opr <- c("_gt", "_gte", "_lt", "_lte")
   str_opr <- c("_begins", "_contains")
   fltxt_opr <- c("_text_all", "_text_any", "_text_phrase")
-  all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr)
+  all_opr <- c(simp_opr, num_opr, str_opr, fltxt_opr, "_in_range")
 
-  flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$can_query == "y", ]
+  flds_flt <- fieldsdf[fieldsdf$endpoint == endpoint, ]
 
   apply_checks <- function(x, endpoint) {
     x <- swap_null_nms(x)
-    if (names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) {
+
+    # troublesome next line:  'length(x) = 2 > 1' in coercion to 'logical(1)'
+    # if (names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) {
+    if (length(names(x)) > 1 || names(x) %in% c("_not", "_and", "_or") || is.na(names(x))) {
       lapply(x, FUN = apply_checks)
     } else if (names(x) %in% all_opr) {
       f1 <- flds_flt[flds_flt$field == names(x[[1]]), ]
@@ -61,8 +68,8 @@ check_query <- function(query, endpoint) {
       )
     } else {
       stop2(
-        names(x), " is either not a valid operator or not a ",
-        "queryable field for this endpoint"
+        names(x), " is not a valid operator or not a ",
+        "valid field for this endpoint"
       )
     }
   }

diff --git a/R/data.R b/R/data.R
@@ -3,7 +3,7 @@
 #' A data frame containing the names of retrievable fields for each of the
 #' endpoints. You can find this data on the API's online documentation for each
 #' endpoint as well (e.g., the
-#' \href{https://patentsview.org/apis/api-endpoints/patents}{patents endpoint
+#' \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent endpoint
 #' field list table}).
 #'
 #' @format A data frame with the following columns:

diff --git a/R/get-fields.R b/R/get-fields.R
@@ -1,3 +1,9 @@
+#' @noRd
+get_top_level_attributes <- function(endpoint) {
+  fieldsdf[fieldsdf$endpoint == endpoint & !grepl("\\.", fieldsdf$field), "field"]
+}
+
+
 #' Get list of retrievable fields
 #'
 #' This function returns a vector of fields that you can retrieve from a given
@@ -13,15 +19,18 @@
 #'   endpoint's fields (i.e., do not filter the field list based on group
 #'   membership). See the field tables located online to see which groups you
 #'   can specify for a given endpoint (e.g., the
-#'   \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patent
+#'   \href{https://search.patentsview.org/docs/docs/Search%20API/SearchAPIReference/#patent}{patents
 #'   endpoint table}), or use the \code{fieldsdf} table
 #'   (e.g., \code{unique(fieldsdf[fieldsdf$endpoint == "patent", "group"])}).
+#' @param include_pk Boolean on whether to include the endpoint's primary key,
+#'    defaults to FALSE.  The primary key is needed if you plan on calling
+#'    \code{\link{unnest_pv_data}} on the results of \code{\link{search_pv}}
 #'
 #' @return A character vector with field names.
 #'
 #' @examples
-#' # Get all assignee-level fields for the patent endpoint:
-#' fields <- get_fields(endpoint = "patent", groups = "assignees")
+#' # Get all top level (non-nested) fields for the patent endpoint:
+#' fields <- get_fields(endpoint = "patent", groups = c("patents"))
 #'
 #' # ...Then pass to search_pv:
 #' \dontrun{
@@ -31,7 +40,7 @@
 #'   fields = fields
 #' )
 #' }
-#' # Get all patent and assignee-level fields for the patent endpoint:
+#' # Get unnested patent and assignee-level fields for the patent endpoint:
 #' fields <- get_fields(endpoint = "patent", groups = c("assignees", "patents"))
 #'
 #' \dontrun{
@@ -41,15 +50,49 @@
 #'   fields = fields
 #' )
 #' }
+#' # Get the nested inventors fields and the primary key in order to call unnest_pv_data
+#' # on the returned data.  unnest_pv_data would throw an error if the primary key was
+#' # not present in the results.
+#' fields <- get_fields(endpoint = "patent", groups = c("inventors"), include_pk = TRUE)
+#'
+#' \dontrun{
+#' # ...Then pass to search_pv and unnest the results
+#' results <- search_pv(
+#'   query = '{"_gte":{"patent_date":"2007-01-04"}}',
+#'   fields = fields
+#' )
+#' unnest_pv_data(results$data)
+#' }
 #'
 #' @export
-get_fields <- function(endpoint, groups = NULL) {
+get_fields <- function(endpoint, groups = NULL, include_pk = FALSE) {
   validate_endpoint(endpoint)
+
+  # using API's shorthand notation, group names can be requested as fields instead of
+  # fully qualifying each nested field.  Fully qualified, all patent endpoint's attributes
+  # is over 4K, too big to be sent on a GET with a modest query
+
+  pk <- get_ok_pk(endpoint)
+  plural_entity <- fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$field == pk, "group"]
+  top_level_attributes <- get_top_level_attributes(endpoint)
+
   if (is.null(groups)) {
-    fieldsdf[fieldsdf$endpoint == endpoint, "field"]
+    c(
+      top_level_attributes,
+      unique(fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group != plural_entity, "group"])
+    )
   } else {
     validate_groups(endpoint, groups = groups)
-    fieldsdf[fieldsdf$endpoint == endpoint & fieldsdf$group %in% groups, "field"]
+
+    # don't include pk if plural_entity group is requested (pk would be a member)
+    extra_field <- if (include_pk && !plural_entity %in% groups) pk else NULL
+    extra_fields <- if (plural_entity %in% groups) top_level_attributes else NULL
+
+    c(
+      extra_field,
+      extra_fields,
+      groups[!groups == plural_entity]
+    )
   }
 }
 

diff --git a/R/print.R b/R/print.R
@@ -24,7 +24,10 @@ print.pv_data_result <- function(x, ...) {
   )
 
   utils::str(
-    x, vec.len = 1, max.level = 2, give.attr = FALSE, strict.width = "cut"
+    x, vec.len = 1, max.level = 2, give.attr = FALSE, strict.width = "cut",
+    formatNum = function(x, ...) {
+      format(x, trim = TRUE, drop0trailing = TRUE, scientific = FALSE, ...)
+    }
   )
 }
 

diff --git a/R/process-error.R b/R/process-error.R