Merge pull request #68 from panukatan/dev

add forecasts data targets; fix #59
panukatan · Sep 8, 2024 · 0885f52 · 0885f52
2 parents 87b4d3b + 19b6c14
commit 0885f52
Show file tree

Hide file tree

Showing 8 changed files with 430 additions and 27 deletions.
diff --git a/.github/workflows/deploy-targets-releases.yml b/.github/workflows/deploy-targets-releases.yml
@@ -43,6 +43,7 @@ jobs:
           targets::tar_make(climate_download_files)
           targets::tar_make(cyclone_reports_download_files)
           targets::tar_make(dam_level_data_raw_csv)
+          targets::tar_make(forecasts_download_files)
         shell: Rscript {0}
 
       - name: Create weekly data release

diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,5 @@ data-raw/cyclones
 !data-raw/heat_index
 data
 !data/.gitkeep
+README.html
+README_files
diff --git a/R/pagasa_forecasts.R b/R/pagasa_forecasts.R
@@ -88,3 +88,338 @@ forecasts_download <- function(url,
   ## Return path to downloaded file ----
   destfile
 }
+
+#'
+#' Process PAGASA forecasts data
+#' 
+#' @param path_to_pdf A file path to PAGASA regional forecasts PDF.
+#' @param .text A vector of text values retrieved from PAGASA regional
+#'   forecasts PDF.
+#' 
+#' @returns A tibble of raw PAGASA regional forecasts data.
+#' 
+#' @examples
+#' forecasts_get_data("data-raw/2024-09-08/mindanao/regional_forecast.pdf")
+#' 
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_data <- function(path_to_pdf) {
+  ## Get text from PDF ----
+  .text <- pdftools::pdf_text(pdf = path_to_pdf) |>
+    stringr::str_split(pattern = "\n") |>
+    (\(x) x[[1]])()
+
+  ## Get various identifying information for current data ----
+  regional_group <- forecasts_get_group(.text)
+  date_issued <- forecasts_get_date_issued(.text)
+  validity <- forecasts_get_validity(.text)
+  regional_group_summary <- forecasts_get_regional_summary(.text)
+
+  ## Concatenate identifiers to single data.frame ----
+  df <- data.frame(
+    regional_group, date_issued, validity, regional_group_summary,
+    geograhic_unit = which(
+      pagasa_forecast_regions$regional_grouping == regional_group$regional_group
+    ) |>
+      (\(x) pagasa_forecast_regions$geographic_unit[x])()
+  )
+
+  ## Add forecasts data ----
+  df <- df |>
+    data.frame(forecasts_get_weather(.text))
+
+  ## Special case for National Capital Region wind data ----
+  if ("National Capital Region" %in% regional_group$regional_group) {
+    df <- df |>
+      data.frame(
+        forecasts_get_wind(.text) |>
+          (\(x)
+            {
+              x[10, 2] <- x[11, 1]
+              x[c(1:10, 12:13), ]
+          }
+          )()
+      )
+  } else {
+    df <- df |>
+      data.frame(forecasts_get_wind(.text))
+  }
+
+  ## Special case for Northern Luzon coastal data ----
+  if ("Northern Luzon" %in% regional_group$regional_group) {
+    df <- df |>
+      data.frame(
+        forecasts_get_coastal(.text) |>
+          (\(x)
+            {
+              rbind(
+                x[1:7, ],
+                data.frame(
+                  coastal_today = rep(NA_character_, 8),
+                  coastal_tomorrow = rep(NA_character_, 8)
+                ),
+                x[8, ]
+              )
+            }
+          )()
+      )
+  } else {
+    ## Special case for National Capital Region coastal data ----
+    if ("National Capital Region" %in% regional_group$regional_group) {
+      df <- df |>
+        data.frame(
+          forecasts_get_coastal(.text) |>
+            (\(x)
+              {
+                rbind(
+                  x[1, ],
+                  data.frame(
+                    coastal_today = rep(NA_character_, 2),
+                    coastal_tomorrow = rep(NA_character_, 2)
+                  ),
+                  x[4:5, ],
+                  data.frame(
+                    coastal_today = rep(NA_character_, 2),
+                    coastal_tomorrow = rep(NA_character_, 2)
+                  ),
+                  x[8, ],
+                  data.frame(
+                    coastal_today = NA_character_,
+                    coastal_tomorrow = NA_character_
+                  ),
+                  x[10, ],
+                  data.frame(
+                    coastal_today = NA_character_,
+                    coastal_tomorrow = NA_character_
+                  ),
+                  x[12, ]
+                )
+              }
+            )()
+        )
+    } else {
+      df <- df |>
+        data.frame(forecasts_get_coastal(.text))
+    }
+  }
+
+  ## Add temperature data ----
+  df <- df |>
+    data.frame(forecasts_get_temperature(.text))
+
+  ## Remove row names and convert to tibble ----
+  row.names(df) <- NULL
+  df <- tibble::tibble(df)
+
+  ## Return df ----
+  df
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_group <- function(.text) {
+  pagasa_division <- which(
+    stringr::str_detect(string = .text, pattern = "REGIONAL WEATHER")
+  ) |>
+    (\(x) .text[x])() |>
+    stringr::word(-1)
+
+  regional_group <- pagasa_division |>
+    (\(x)
+      {
+        ifelse(
+          x == "MINPRSD", "Mindanao",
+          ifelse(
+            x == "VISPRSD", "Visayas",
+            ifelse(
+              x == "SLPRSD", "Southern Luzon",
+              ifelse(
+                x == "NCR-PRSD", "National Capital Region",
+                "Northern Luzon"
+              )
+            )
+          )
+        )
+      }
+    )()
+
+  ## Concatenate into a data.frame ----
+  data.frame(cbind(pagasa_division, regional_group))
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_date_issued <- function(.text) {
+  stringr::str_detect(
+    string = .text, pattern = "Issued At|Issued at"
+  ) |>
+    (\(x) .text[x])() |>
+    stringr::str_remove(pattern = "Issued At: |Issued at: ") #|>
+    #strptime(format = "%I:%M %p, %d %B, %Y", tz = "PST")
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_validity <- function(.text) {
+  stringr::str_detect(
+    string = .text, pattern = "Valid Beginning"
+  ) |>
+    (\(x) .text[x])() |>
+    stringr::str_remove(pattern = "Valid Beginning: ")
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_regional_summary <- function(.text) {
+  .text |>
+    (\(x)
+      {
+        from <- which(stringr::str_detect(string = x, pattern = "Weather:"))[1]
+        to <- which(stringr::str_detect(string = x, pattern = "Provinces")) - 1
+        x[from:to]
+    }
+    )() |>
+    paste(collapse = " ") |>
+    stringr::str_remove(pattern = "Weather: ")
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_weather <- function(.text) {
+  which(stringr::str_detect(string = .text, pattern = "Weather: "))[-1] |>
+    (\(x)
+      {
+        cbind(
+          .text[x] |>
+            stringr::str_remove_all(
+              pattern = stringr::str_split(
+                string = pagasa_forecast_regions$geographic_unit,
+                pattern = " "
+              ) |>
+                unlist() |>
+                unique() |>
+                paste(collapse = "|")
+            ) |>
+            stringr::str_remove_all(pattern = "Weather: ") |>
+            stringr::str_split(pattern = "\\s{2,}", simplify = TRUE), 
+          .text[x + 1] |>
+            stringr::str_remove_all(
+              pattern = stringr::str_split(
+                string = pagasa_forecast_regions$geographic_unit,
+                pattern = stringr::boundary("word")
+              ) |>
+                unlist() |>
+                unique() |>
+                (\(x) paste0("\\b", x) |> paste(collapse = "|"))()
+            ) |>
+            stringr::str_split(pattern = "\\s{2,}", simplify = TRUE)
+        )
+      }
+    )() |>
+    (\(x)
+      {
+        data.frame(
+          weather_today = paste(x[ , 2], x[ , 5]),
+          weather_tomorrow = paste(x[ , 3], x[ , 6])
+        )
+      }
+    )()
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_wind <- function(.text) {
+  which(stringr::str_detect(string = .text, pattern = "Wind:")) |>
+    (\(x) .text[x])() |>
+    stringr::str_remove_all(
+      pattern = stringr::str_split(
+        string = pagasa_forecast_regions$geographic_unit,
+        pattern = stringr::boundary("word")
+      ) |>
+        unlist() |>
+        unique() |>
+        (\(x) paste0("\\b", x) |> paste(collapse = "|"))()
+    ) |>
+    stringr::str_remove_all(pattern = "[0-9]{2}-[0-9]{2}°C|[0-9]{2}\\s-\\s[0-9]{2}°C") |>
+    stringr::str_remove_all(pattern = "Wind:") |>
+    stringr::str_split(pattern = "\\s{2,}", simplify = TRUE) |>
+    (\(x)
+      {
+        data.frame(
+          wind_today = x[ , 2],
+          wind_tomorrow = x[ , 3]
+        )
+      }
+    )()
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_coastal <- function(.text) {
+  which(stringr::str_detect(string = .text, pattern = "Coastal:")) |>
+    (\(x) .text[x])() |>
+    stringr::str_remove_all(pattern = "[0-9]{2}-[0-9]{2}°C|[0-9]{2}\\s-\\s[0-9]{2}°C") |>
+    stringr::str_remove_all(pattern = "Coastal:") |>
+    stringr::str_split(pattern = "\\s{2,}", simplify = TRUE) |>
+    (\(x)
+      {
+        data.frame(
+          coastal_today = x[ , 2],
+          coastal_tomorrow = x[ , 3]
+        )
+      }
+    )()
+}
+
+#'
+#' @rdname forecasts_get
+#' @export
+#' 
+
+forecasts_get_temperature <- function(.text) {
+  stringr::str_extract_all(
+    string = .text, pattern = "[0-9]{2}-[0-9]{2}°C|[0-9]{2}\\s-\\s[0-9]{2}°C"
+  ) |>
+    (\(x) x[lapply(X = x, FUN = length) != 0])() |>
+    (\(x)
+      {
+        lapply(
+          X = x,
+          FUN = function(x) {
+            data.frame(rbind(x)) |>
+              (\(x) 
+                { 
+                  names(x) <- c("temperature_today", "temperature_tomorrow")
+                  x
+                }
+              )()
+          }
+        )
+      }  
+    )() |>
+    dplyr::bind_rows() |>
+    (\(x) { row.names(x) <- NULL; x })()
+}
diff --git a/R/pagasa_forecasts_utils.R b/R/pagasa_forecasts_utils.R
@@ -0,0 +1,41 @@
+#
+# PAGASA forecast regions
+#
+
+pagasa_forecast_regions <- list(
+  Mindanao = data.frame(
+    geographic_unit = c(
+      "Laguindingan", "Cagayan de Oro City", "Malaybalay City",
+      "Metro Davao", "Zamboanga City", "Surigao City", "Butuan City",
+      "Cotabato City", "General Santos City", "Dipolog City", "Tagum City",
+      "Marawi City", "Iligan City", "Pagadian City"
+    )
+  ),
+  `National Capital Region` = data.frame(
+    geographic_unit = c(
+      "Metro Manila", "Tarlac", "Nueva Ecija", "Zambales", "Bataan", "Pampanga",
+      "Bulacan", "Cavite", "Laguna", "Batangas", "Rizal", "Quezon"
+    )
+  ),
+  `Northern Luzon` = data.frame(
+    geographic_unit = c(
+      "Ilocos Norte", "Ilocos Sur", "La Union", "Pangasinan", "Batanes", 
+      "Cagayan", "Isabela", "Quirino", "Nueva Vizcaya", "Abra", "Benguet",
+      "Mountain Province", "Ifugao", "Kalinga", "Apayao", "Aurora"
+    )
+  ),
+  `Southern Luzon` = data.frame(
+    geographic_unit = c(
+      "Albay", "Camarines Norte", "Camarines Sur", "Catanduanes", "Sorsogon", 
+      "Masbate", "Northern Samar", "Oriental Mindoro", "Marinduque", "Romblon"
+    )
+  ),
+  Visayas = data.frame(
+    geographic_unit = c(
+      "Cebu", "Bohol", "Negros Occidental", "Negros Oriental", "Siquijor",
+      "Leyte", "Southern Leyte", "Biliran", "Samar", "Eastern Samar", "Iloilo",
+      "Guimaras", "Capiz", "Aklan", "Antique", "Palawan", "Occidental Mindoro"
+    )
+  )
+) |>
+  dplyr::bind_rows(.id = "regional_grouping")