Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add forecasts data targets; fix #59 #68

Merged
merged 3 commits into from
Sep 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/deploy-targets-releases.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
targets::tar_make(climate_download_files)
targets::tar_make(cyclone_reports_download_files)
targets::tar_make(dam_level_data_raw_csv)
targets::tar_make(forecasts_download_files)
shell: Rscript {0}

- name: Create weekly data release
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ data-raw/cyclones
!data-raw/heat_index
data
!data/.gitkeep
README.html
README_files
335 changes: 335 additions & 0 deletions R/pagasa_forecasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,338 @@ forecasts_download <- function(url,
## Return path to downloaded file ----
destfile
}

#'
#' Process PAGASA forecasts data
#'
#' @param path_to_pdf A file path to PAGASA regional forecasts PDF.
#' @param .text A vector of text values retrieved from PAGASA regional
#' forecasts PDF.
#'
#' @returns A tibble of raw PAGASA regional forecasts data.
#'
#' @examples
#' forecasts_get_data("data-raw/2024-09-08/mindanao/regional_forecast.pdf")
#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_data <- function(path_to_pdf) {
## Get text from PDF ----
.text <- pdftools::pdf_text(pdf = path_to_pdf) |>
stringr::str_split(pattern = "\n") |>
(\(x) x[[1]])()

## Get various identifying information for current data ----
regional_group <- forecasts_get_group(.text)
date_issued <- forecasts_get_date_issued(.text)
validity <- forecasts_get_validity(.text)
regional_group_summary <- forecasts_get_regional_summary(.text)

## Concatenate identifiers to single data.frame ----
df <- data.frame(
regional_group, date_issued, validity, regional_group_summary,
geograhic_unit = which(
pagasa_forecast_regions$regional_grouping == regional_group$regional_group
) |>
(\(x) pagasa_forecast_regions$geographic_unit[x])()
)

## Add forecasts data ----
df <- df |>
data.frame(forecasts_get_weather(.text))

## Special case for National Capital Region wind data ----
if ("National Capital Region" %in% regional_group$regional_group) {
df <- df |>
data.frame(
forecasts_get_wind(.text) |>
(\(x)
{
x[10, 2] <- x[11, 1]
x[c(1:10, 12:13), ]
}
)()
)
} else {
df <- df |>
data.frame(forecasts_get_wind(.text))
}

## Special case for Northern Luzon coastal data ----
if ("Northern Luzon" %in% regional_group$regional_group) {
df <- df |>
data.frame(
forecasts_get_coastal(.text) |>
(\(x)
{
rbind(
x[1:7, ],
data.frame(
coastal_today = rep(NA_character_, 8),
coastal_tomorrow = rep(NA_character_, 8)
),
x[8, ]
)
}
)()
)
} else {
## Special case for National Capital Region coastal data ----
if ("National Capital Region" %in% regional_group$regional_group) {
df <- df |>
data.frame(
forecasts_get_coastal(.text) |>
(\(x)
{
rbind(
x[1, ],
data.frame(
coastal_today = rep(NA_character_, 2),
coastal_tomorrow = rep(NA_character_, 2)
),
x[4:5, ],
data.frame(
coastal_today = rep(NA_character_, 2),
coastal_tomorrow = rep(NA_character_, 2)
),
x[8, ],
data.frame(
coastal_today = NA_character_,
coastal_tomorrow = NA_character_
),
x[10, ],
data.frame(
coastal_today = NA_character_,
coastal_tomorrow = NA_character_
),
x[12, ]
)
}
)()
)
} else {
df <- df |>
data.frame(forecasts_get_coastal(.text))
}
}

## Add temperature data ----
df <- df |>
data.frame(forecasts_get_temperature(.text))

## Remove row names and convert to tibble ----
row.names(df) <- NULL
df <- tibble::tibble(df)

## Return df ----
df
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_group <- function(.text) {
pagasa_division <- which(
stringr::str_detect(string = .text, pattern = "REGIONAL WEATHER")
) |>
(\(x) .text[x])() |>
stringr::word(-1)

regional_group <- pagasa_division |>
(\(x)
{
ifelse(
x == "MINPRSD", "Mindanao",
ifelse(
x == "VISPRSD", "Visayas",
ifelse(
x == "SLPRSD", "Southern Luzon",
ifelse(
x == "NCR-PRSD", "National Capital Region",
"Northern Luzon"
)
)
)
)
}
)()

## Concatenate into a data.frame ----
data.frame(cbind(pagasa_division, regional_group))
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_date_issued <- function(.text) {
stringr::str_detect(
string = .text, pattern = "Issued At|Issued at"
) |>
(\(x) .text[x])() |>
stringr::str_remove(pattern = "Issued At: |Issued at: ") #|>
#strptime(format = "%I:%M %p, %d %B, %Y", tz = "PST")
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_validity <- function(.text) {
stringr::str_detect(
string = .text, pattern = "Valid Beginning"
) |>
(\(x) .text[x])() |>
stringr::str_remove(pattern = "Valid Beginning: ")
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_regional_summary <- function(.text) {
.text |>
(\(x)
{
from <- which(stringr::str_detect(string = x, pattern = "Weather:"))[1]
to <- which(stringr::str_detect(string = x, pattern = "Provinces")) - 1
x[from:to]
}
)() |>
paste(collapse = " ") |>
stringr::str_remove(pattern = "Weather: ")
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_weather <- function(.text) {
which(stringr::str_detect(string = .text, pattern = "Weather: "))[-1] |>
(\(x)
{
cbind(
.text[x] |>
stringr::str_remove_all(
pattern = stringr::str_split(
string = pagasa_forecast_regions$geographic_unit,
pattern = " "
) |>
unlist() |>
unique() |>
paste(collapse = "|")
) |>
stringr::str_remove_all(pattern = "Weather: ") |>
stringr::str_split(pattern = "\\s{2,}", simplify = TRUE),
.text[x + 1] |>
stringr::str_remove_all(
pattern = stringr::str_split(
string = pagasa_forecast_regions$geographic_unit,
pattern = stringr::boundary("word")
) |>
unlist() |>
unique() |>
(\(x) paste0("\\b", x) |> paste(collapse = "|"))()
) |>
stringr::str_split(pattern = "\\s{2,}", simplify = TRUE)
)
}
)() |>
(\(x)
{
data.frame(
weather_today = paste(x[ , 2], x[ , 5]),
weather_tomorrow = paste(x[ , 3], x[ , 6])
)
}
)()
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_wind <- function(.text) {
which(stringr::str_detect(string = .text, pattern = "Wind:")) |>
(\(x) .text[x])() |>
stringr::str_remove_all(
pattern = stringr::str_split(
string = pagasa_forecast_regions$geographic_unit,
pattern = stringr::boundary("word")
) |>
unlist() |>
unique() |>
(\(x) paste0("\\b", x) |> paste(collapse = "|"))()
) |>
stringr::str_remove_all(pattern = "[0-9]{2}-[0-9]{2}°C|[0-9]{2}\\s-\\s[0-9]{2}°C") |>
stringr::str_remove_all(pattern = "Wind:") |>
stringr::str_split(pattern = "\\s{2,}", simplify = TRUE) |>
(\(x)
{
data.frame(
wind_today = x[ , 2],
wind_tomorrow = x[ , 3]
)
}
)()
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_coastal <- function(.text) {
which(stringr::str_detect(string = .text, pattern = "Coastal:")) |>
(\(x) .text[x])() |>
stringr::str_remove_all(pattern = "[0-9]{2}-[0-9]{2}°C|[0-9]{2}\\s-\\s[0-9]{2}°C") |>
stringr::str_remove_all(pattern = "Coastal:") |>
stringr::str_split(pattern = "\\s{2,}", simplify = TRUE) |>
(\(x)
{
data.frame(
coastal_today = x[ , 2],
coastal_tomorrow = x[ , 3]
)
}
)()
}

#'
#' @rdname forecasts_get
#' @export
#'

forecasts_get_temperature <- function(.text) {
stringr::str_extract_all(
string = .text, pattern = "[0-9]{2}-[0-9]{2}°C|[0-9]{2}\\s-\\s[0-9]{2}°C"
) |>
(\(x) x[lapply(X = x, FUN = length) != 0])() |>
(\(x)
{
lapply(
X = x,
FUN = function(x) {
data.frame(rbind(x)) |>
(\(x)
{
names(x) <- c("temperature_today", "temperature_tomorrow")
x
}
)()
}
)
}
)() |>
dplyr::bind_rows() |>
(\(x) { row.names(x) <- NULL; x })()
}
41 changes: 41 additions & 0 deletions R/pagasa_forecasts_utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# PAGASA forecast regions
#

pagasa_forecast_regions <- list(
Mindanao = data.frame(
geographic_unit = c(
"Laguindingan", "Cagayan de Oro City", "Malaybalay City",
"Metro Davao", "Zamboanga City", "Surigao City", "Butuan City",
"Cotabato City", "General Santos City", "Dipolog City", "Tagum City",
"Marawi City", "Iligan City", "Pagadian City"
)
),
`National Capital Region` = data.frame(
geographic_unit = c(
"Metro Manila", "Tarlac", "Nueva Ecija", "Zambales", "Bataan", "Pampanga",
"Bulacan", "Cavite", "Laguna", "Batangas", "Rizal", "Quezon"
)
),
`Northern Luzon` = data.frame(
geographic_unit = c(
"Ilocos Norte", "Ilocos Sur", "La Union", "Pangasinan", "Batanes",
"Cagayan", "Isabela", "Quirino", "Nueva Vizcaya", "Abra", "Benguet",
"Mountain Province", "Ifugao", "Kalinga", "Apayao", "Aurora"
)
),
`Southern Luzon` = data.frame(
geographic_unit = c(
"Albay", "Camarines Norte", "Camarines Sur", "Catanduanes", "Sorsogon",
"Masbate", "Northern Samar", "Oriental Mindoro", "Marinduque", "Romblon"
)
),
Visayas = data.frame(
geographic_unit = c(
"Cebu", "Bohol", "Negros Occidental", "Negros Oriental", "Siquijor",
"Leyte", "Southern Leyte", "Biliran", "Samar", "Eastern Samar", "Iloilo",
"Guimaras", "Capiz", "Aklan", "Antique", "Palawan", "Occidental Mindoro"
)
)
) |>
dplyr::bind_rows(.id = "regional_grouping")
Loading