Skip to content

Commit

Permalink
✨ full text search
Browse files Browse the repository at this point in the history
  • Loading branch information
Wytamma committed Apr 25, 2023
1 parent 7ef5333 commit f496a54
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 5 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: GISAIDR
Type: Package
Title: R wrapper for GISAID "API"
Version: 0.9.5
Version: 0.9.6
Author: Wytamma Wirth
Maintainer: Wytamma Wirth <[email protected]>
Description: programmatically interact with the GISAID database query.
Expand Down
17 changes: 16 additions & 1 deletion R/internal_query.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' Query GISAID Database
#'
#' @param credentials GISAID credentials.
#' @param text full text search.
#' @param location search for entries based on geographic location.
#' @param lineage search for entries based on pango lineage designations.
#' @param variant search for entries based on variant designation
Expand All @@ -23,6 +24,7 @@
#' @return Dataframe.
internal_query <-
function(credentials,
text = NULL,
location = NULL,
lineage = NULL,
variant = NULL,
Expand All @@ -45,6 +47,18 @@ internal_query <-
df <- tryCatch({
# search
queue = list()
if (!is.null(text)) {
queue <-
append(
queue,
create_search_queue(
credentials,
credentials$text_ceid,
text,
'DoSimpleSearch'
)
)
}
if (!is.null(location)) {
queue <-
append(
Expand Down Expand Up @@ -320,6 +334,7 @@ internal_query <-
return(
query(
credentials = credentials,
text = text,
location = location,
lineage = lineage,
variant = variant,
Expand All @@ -328,8 +343,8 @@ internal_query <-
to = to,
to_subm = to_subm,
nrows = j$totalRecords,
load_all = FALSE,
# set load_all to false to break the recursion
load_all = FALSE,
low_coverage_excl = low_coverage_excl,
complete = complete,
high_coverage = high_coverage,
Expand Down
9 changes: 7 additions & 2 deletions R/login.R
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,10 @@ login <- function(username, password, database="EpiCoV") {
} else {
linage_ceid <- NULL
}

# Virus Name
virus_name_ceid <- extract_search_ceid('covv_virus_name', customSearch_page_text)

# From
from_ceid <- extract_search_ceid('covv_collection_date_from', customSearch_page_text)

Expand All @@ -222,6 +222,9 @@ login <- function(username, password, database="EpiCoV") {
low_coverage_excl_ceid <- NULL
}
if (database == 'EpiCoV'){
# full text search
text_ceid <-
extract_search_ceid("fts", customSearch_page_text)
# Highq
highq_ceid <-
extract_search_ceid("highq", customSearch_page_text)
Expand All @@ -237,6 +240,7 @@ login <- function(username, password, database="EpiCoV") {
# quality not used by EpiCov
quality_ceid <- NULL
} else {
text_ceid <- NULL
variant_ceid <- NULL
complete_ceid <- NULL
highq_ceid <- NULL
Expand Down Expand Up @@ -274,6 +278,7 @@ login <- function(username, password, database="EpiCoV") {
selection_panel_cid = selection_panel_cid,
selection_ceid = selection_ceid,
download_panel_cid = query_cid,
text_ceid = text_ceid,
location_ceid = location_ceid,
search_cid = search_cid,
linage_ceid = linage_ceid,
Expand Down
5 changes: 5 additions & 0 deletions R/query.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#' Query GISAID Database
#'
#' @param credentials GISAID credentials.
#' @param text full text search.
#' @param location search for entries based on geographic location.
#' @param lineage search for entries based on pango lineage designations.
#' @param variant search for entries based on variant designation
Expand All @@ -24,6 +25,7 @@
#' @return data.frame
query <-
function(credentials,
text = NULL,
location = NULL,
lineage = NULL,
variant = NULL,
Expand Down Expand Up @@ -51,6 +53,7 @@ query <-
for (i in 1:nrow(batches)) {
results <- rbind(results, internal_query(
credentials = credentials,
text = text,
location = location,
lineage = lineage,
variant = variant,
Expand All @@ -74,6 +77,7 @@ query <-
return(
internal_query(
credentials = credentials,
text = text,
location = location,
lineage = lineage,
variant = variant,
Expand All @@ -97,6 +101,7 @@ query <-
return(
internal_query(
credentials = credentials,
text = text,
location = location,
lineage = lineage,
variant = variant,
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@ df$submission_date
[1] "2020-01-10" "2020-01-10" "2020-01-11" "2020-01-11" "2020-01-11" "2020-01-12" "2020-01-14"
[8] "2020-01-14" "2020-01-14" "2020-01-14" "2020-01-16" "2020-01-17" "2020-01-17" ...

### Full text search

Use `text` for full text search.

``` r
accession_ids = c("EPI_ISL_17398411", "EPI_ISL_17199001", "EPI_ISL_17409201", "EPI_ISL_17243716")
df <- query(credentials = credentials, text = paste(accession_ids, collapse = "\n"))
> df$accession_id
```
[1] "EPI_ISL_17199001" "EPI_ISL_17243716" "EPI_ISL_17398411" "EPI_ISL_17409201"

### Search by location

Expand Down
8 changes: 7 additions & 1 deletion tests/testthat/test-query.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ test_that("basic query works", {
})

test_that("can change number of rows", {
df <- query(credentials = credentials, nrows = 100, location = "Africa / ...")
df <- query(credentials = credentials, nrows = 1000, location = "Africa / ...")
expect_equal(nrow(df), 100)
})

Expand Down Expand Up @@ -144,3 +144,9 @@ test_that("order_by works", {
df <- query(credentials = credentials, order_by = 'submission_date')
expect_true(df$submission_date[1] == "2020-01-10")
})

test_that("text search works", {
accession_ids = c("EPI_ISL_17398411", "EPI_ISL_17199001", "EPI_ISL_17409201", "EPI_ISL_17243716")
df <- query(credentials = credentials, text = paste(accession_ids, collapse = "\n"))
expect_true(nrow(df) == 4)
})

0 comments on commit f496a54

Please sign in to comment.