Skip to content

Commit

Permalink
Merge pull request #50 from panukatan/dev
Browse files Browse the repository at this point in the history
create overall function for retrieving bulletin data; fix #47; fix #48
  • Loading branch information
ernestguevarra authored Aug 19, 2024
2 parents 180061b + 7b80dfc commit 05d3999
Show file tree
Hide file tree
Showing 13 changed files with 148 additions and 66 deletions.
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Generated by roxygen2: do not edit by hand

export(eq_build_url)
export(eq_data)
export(eq_data_bulletin)
export(eq_data_summary)
export(eq_get_bulletin)
export(eq_get_bulletin_urls)
export(eq_get_bulletin_urls_)
Expand Down
4 changes: 2 additions & 2 deletions R/eq_build.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#'
#' Build URLs for specific earthquake bulletins
#' Build URLs for specific PHIVOLCS earthquake bulletin summaries
#'
#' @param .url Base URL for PHIVOLCS earthquake bulletins.
#' @param .url Base URL for PHIVOLCS earthquake bulletin summaries.
#' @param .year A vector for year (in YYYY format) for which earthquake
#' bulletins are to be retrieved. The earliest year that can be specified is
#' 2018. If set to NULL (default), all years starting from 2018 to present
Expand Down
29 changes: 21 additions & 8 deletions R/eq_data.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#'
#' Retrieve earthquake information data from PHIVOLCS bulletins
#'
#' @param .url Base URL for PHIVOLCS earthquake bulletins.
#' @param .year A vector for year (in YYYY format) for which earthquake
#' bulletins are to be retrieved. The earliest year that can be specified is
#' 2018. If set to NULL (default), all years starting from 2018 to present
Expand All @@ -14,21 +15,33 @@
#' (default), table of earthquake information for current year and current
#' month is retrieved. Otherwise, all months for all possible years are
#' retrieved.
#' @param simplify Logical. Should output be simplified into a data.frame or
#' tibble? Default is TRUE. Otherwise, a list of processed tibbles of
#' earthquake monitoring data.
#'
#' @returns A tibble of processed earthquake data.
#'
#' @examples
#' eq_data()
#' eq_data_summary()
#'
#' @rdname eq_data
#' @export
#'

eq_data <- function(.year = NULL, .month = NULL,
latest = TRUE, simplify = TRUE) {
eq_data_summary <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
## Retrieve data tables and process ----
eq_get_table(.year = .year, .month = .month, latest = latest) |>
eq_process_table(simplify = simplify)
eq_get_table(.url = .url, .year = .year, .month = .month, latest = latest) |>
eq_process_table(simplify = TRUE)
}

#'
#' @rdname eq_data
#' @export
#'

eq_data_bulletin <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
eq_get_bulletin_urls(
.url = .url, .year = .year, .month = .month, latest = latest
) |>
eq_get_bulletins() |>
eq_process_bulletins()
}
28 changes: 17 additions & 11 deletions R/eq_get_bulletin_urls.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,22 +68,28 @@ eq_get_bulletin_urls_ <- function(.url) {
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Initiate HTML session ----
.session <- rvest::session(.url)

## Retrieve links ----
if (.year == 2018 & .month %in% month.name[seq_len(5)]) {
rvest::session(.url) |>
rvest::html_elements(css = "tr td .auto-style49 a") |>
.session |>
rvest::html_elements(css = "tr td a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(string = x, pattern = "^../../")
)
}
{
x <- x[stringr::str_detect(string = x, pattern = "Earthquake_Information")]
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(
string = x, pattern = "^../../|^../../../../"
)
)
}
)()
} else {
rvest::session(.url) |>
.session |>
rvest::html_elements(css = ".auto-style91 a") |>
rvest::html_attr(name = "href") |>
(\(x)
Expand All @@ -92,7 +98,7 @@ eq_get_bulletin_urls_ <- function(.url) {
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(
string = x, pattern = "^../../|\\\\..\\\\..\\\\"
string = x, pattern = "\\.\\./|\\\\..\\\\..\\\\"
) |>
stringr::str_replace_all(pattern = "\\\\", replacement = "/")
)
Expand Down
61 changes: 34 additions & 27 deletions R/eq_get_bulletins.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,34 +15,41 @@
#'

eq_get_bulletin <- function(.url) {
rvest::session(url = .url) |>
rvest::html_table() |>
(\(x) x[[1]])() |>
(\(x)
{
tibble::tibble(
date_time = x[2, 4],
bulletin_number = stringr::str_extract(
string = x[1, 1], pattern = "[0-9]{1,}"
),
depth = x[2, 8],
magnitude = x[2, 12],
location = x[2, 6],
origin = x[2, 10],
reported_intensity = x[9, 3],
expect_damage = x[11, 4],
expect_aftershocks = x[11, 6],
date_time_issued = x[11, 8],
prepared_by = x[11, 10]
) |>
dplyr::mutate(
dplyr::across(
.cols = dplyr::everything(),
.fns = ~simplify_vectors(.x)
## Check URL ----
url_error <- httr::http_error(.url)

if (url_error) {
NULL
} else {
rvest::session(url = .url) |>
rvest::html_table() |>
(\(x) x[[1]])() |>
(\(x)
{
tibble::tibble(
date_time = x[2, 4],
bulletin_number = stringr::str_extract(
string = x[1, 1], pattern = "[0-9]{1,}"
),
depth = x[2, 8],
magnitude = x[2, 12],
location = x[2, 6],
origin = x[2, 10],
reported_intensity = x[9, 3],
expect_damage = x[11, 4],
expect_aftershocks = x[11, 6],
date_time_issued = x[11, 8],
prepared_by = x[11, 10]
) |>
dplyr::mutate(
dplyr::across(
.cols = dplyr::everything(),
.fns = ~simplify_vectors(.x)
)
)
)
}
)()
}
)()
}
}

#'
Expand Down
9 changes: 7 additions & 2 deletions R/eq_process_bulletins.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,12 @@ eq_process_bulletins <- function(eq_df) {
longitude = get_longitude(.data$location),
latitude = get_latitude(.data$location),
depth = as.integer(.data$depth),
magnitude_type = stringr::str_extract_all(
string = .data$magnitude, pattern = "[A-Za-z]{1,}", simplify = TRUE
) |>
simplify_vectors(),
magnitude = stringr::str_remove_all(
string = .data$magnitude, pattern = "Ms "
string = .data$magnitude, pattern = "[^0-9.-]"
) |>
as.numeric(),
location = get_location(.data$location),
Expand All @@ -43,7 +47,8 @@ eq_process_bulletins <- function(eq_df) {
) |>
dplyr::select(
.data$date_time, .data$bulletin_number, .data$longitude, .data$latitude,
.data$depth, .data$magnitude, .data$reported_intensity, .data$location,
.data$depth, .data$magnitude, .data$magnitude_type,
.data$reported_intensity, .data$location,
.data$origin, .data$expect_damage, .data$expect_aftershocks,
.data$date_time_issued, .data$prepared_by
)
Expand Down
31 changes: 31 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,34 @@ get_location <- function(.location) {
simplify_vectors <- function(x) {
c(x) |> unname() |> unlist()
}


## Retrieve HTML information from different CSS tags ----

# get_links <- function(.session) {
# c(
# .session |>
# rvest::html_elements(css = "tr td .auto-style37 a") |>
# rvest::html_attr(name = "href"),
# .session |>
# rvest::html_elements(css = "tr td .auto-style62 a") |>
# rvest::html_attr(name = "href"),
# .session |>
# rvest::html_elements(css = "tr td .auto-style57 a") |>
# rvest::html_attr(name = "href")
# .session |>
# rvest::html_elements(css = "tr td .auto-style37 a") |>
# rvest::html_attr(name = "href")
# .session |>
# rvest::html_elements(css = "tr td .auto-style12 a") |>
# rvest::html_attr(name = "href")
# .session |>
# rvest::html_elements(css = "tr td a") |>
# rvest::html_attr(name = "href") |>
# (\(x) x[stringr::str_detect(x, pattern = "Earthquake_Information")])()
#
# .session |>
# rvest::html_elements(css = "tr td .auto-style49 a") |>
# rvest::html_attr(name = "href")
# )
# }
6 changes: 3 additions & 3 deletions man/eq_build_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 19 additions & 8 deletions man/eq_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,6 @@ reference:

- title: Data
contents:
- eq_data
- eq_data_summary
- eq_data_bulletin

9 changes: 8 additions & 1 deletion tests/testthat/test-eq_data.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Tests for eq_data function ---------------------------------------------------

test_that("eq_data function output is appropriate", {
expect_s3_class(eq_data(), "tbl")
expect_s3_class(eq_data_summary(), "tbl")
})

#eq_df <- eq_data_bulletin(.year = 2018, .month = "January")
#eq_df <- eq_data_bulletin(.year = 2019, .month = "January")

# test_that("eq_data function output is appropriate", {
# expect_s3_class(eq_df, "tbl")
# })
2 changes: 1 addition & 1 deletion tests/testthat/test-eq_get_bulletin_urls.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
url_list <- eq_get_bulletin_urls()

test_that("get_links function outputs are as expected", {
expect_vector(url_list, ptype = character(), size = nrow(eq_data()))
expect_vector(url_list, ptype = character(), size = nrow(eq_data_summary()))
})

url_list <- eq_get_bulletin_urls(latest = FALSE)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-eq_process_bulletins.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ bulletin_df <- eq_get_bulletins(urls[1:10]) |>
test_that("eq_process_bulletins output is as expected", {
expect_s3_class(bulletin_df, "tbl")
expect_equal(nrow(bulletin_df), 10)
expect_equal(ncol(bulletin_df), 13)
expect_equal(ncol(bulletin_df), 14)
})

0 comments on commit 05d3999

Please sign in to comment.