From 6dd4b28df38f5fb2914b6dbc3d84a876a5dfef51 Mon Sep 17 00:00:00 2001
From: Mauro Lepore <maurolepore@gmail.com>
Date: Sun, 12 Nov 2023 11:33:04 -0300
Subject: [PATCH] Improve docs and tests (#604)

---
 ...ons_profile_any_add_values_to_categorize.R | 11 ++++-
 ...ns_profile_any_add_values_to_categorize.Rd |  4 +-
 ...ons_profile_any_add_values_to_categorize.R | 47 +++++++++++++++++++
 3 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/R/emissions_profile_any_add_values_to_categorize.R b/R/emissions_profile_any_add_values_to_categorize.R
index 28a0c6012..e78685349 100644
--- a/R/emissions_profile_any_add_values_to_categorize.R
+++ b/R/emissions_profile_any_add_values_to_categorize.R
@@ -5,8 +5,8 @@
 #'
 #' @family pre-processing helpers
 #'
-#' @return The input data frame with the additional column
-#'   `values_to_categorize`.
+#' @return The input data frame with the additional columns `grouped_by` and
+#'   `values_to_categorize` and one row per benchmark per company.
 #'
 #' @export
 #'
@@ -23,10 +23,17 @@
 #' inputs <- read_csv(toy_emissions_profile_upstream_products())
 #' inputs |> emissions_profile_any_add_values_to_categorize()
 emissions_profile_any_add_values_to_categorize <- function(data) {
+  check_emissions_profile_any_add_values_to_categorize(data)
+
   benchmarks <- set_names(epa_benchmarks(data), flat_benchmarks(data))
   map_df(benchmarks, ~ add_rank(data, .x), .id = "grouped_by")
 }
 
+check_emissions_profile_any_add_values_to_categorize <- function(data) {
+  crucial <- c(aka("tsector"), aka("xunit"), aka("isic"), aka("co2footprint"))
+  walk(crucial, \(pattern) check_matches_name(data, pattern))
+}
+
 rank_proportion <- function(x) {
   rank(x) / length(x)
 }
diff --git a/man/emissions_profile_any_add_values_to_categorize.Rd b/man/emissions_profile_any_add_values_to_categorize.Rd
index 2886218c7..fe05c766d 100644
--- a/man/emissions_profile_any_add_values_to_categorize.Rd
+++ b/man/emissions_profile_any_add_values_to_categorize.Rd
@@ -12,8 +12,8 @@ emissions_profile_any_add_values_to_categorize(data)
 upstream-products (a.k.a. inputs).}
 }
 \value{
-The input data frame with the additional column
-\code{values_to_categorize}.
+The input data frame with the additional columns \code{grouped_by} and
+\code{values_to_categorize} and one row per benchmark per company.
 }
 \description{
 Add values to categorize
diff --git a/tests/testthat/test-emissions_profile_any_add_values_to_categorize.R b/tests/testthat/test-emissions_profile_any_add_values_to_categorize.R
index 40fd40a48..86f3dc5f7 100644
--- a/tests/testthat/test-emissions_profile_any_add_values_to_categorize.R
+++ b/tests/testthat/test-emissions_profile_any_add_values_to_categorize.R
@@ -5,3 +5,50 @@ test_that("works with any 'co2-like' dataset", {
   co2 <- example_inputs()
   expect_no_error(emissions_profile_any_add_values_to_categorize(co2))
 })
+
+test_that("adds columns `grouped_by` and `values_to_categorize`", {
+  co2 <- example_products()
+
+  out <- emissions_profile_any_add_values_to_categorize(co2)
+
+  new_names <- c("grouped_by", "values_to_categorize")
+  expect_equal(setdiff(names(out), names(co2)), new_names)
+})
+
+test_that("with one company, adds one row per benchmark per company", {
+  co2 <- example_products()
+
+  out <- emissions_profile_any_add_values_to_categorize(co2)
+
+  number_of_benchmarks <- length(flat_benchmarks(co2))
+  expect_equal(nrow(out), number_of_benchmarks)
+})
+
+test_that("with two companies, adds one row per benchmark per company", {
+  co2 <- example_products(!!aka("id") := c("a", "b"))
+
+  out <- emissions_profile_any_add_values_to_categorize(co2)
+
+  number_of_benchmarks <- length(flat_benchmarks(co2))
+  expect_equal(nrow(out), 2 * number_of_benchmarks)
+})
+
+test_that("without crucial columns errors gracefully", {
+  co2 <- example_products()
+
+  crucial <- aka("tsector")
+  bad <- select(co2, -all_of(crucial))
+  expect_error(emissions_profile_any_add_values_to_categorize(bad), crucial)
+
+  crucial <- aka("xunit")
+  bad <- select(co2, -all_of(crucial))
+  expect_error(emissions_profile_any_add_values_to_categorize(bad), crucial)
+
+  crucial <- aka("isic")
+  bad <- select(co2, -all_of(crucial))
+  expect_error(emissions_profile_any_add_values_to_categorize(bad), crucial)
+
+  crucial <- aka("co2footprint")
+  bad <- select(co2, -all_of(crucial))
+  expect_error(emissions_profile_any_add_values_to_categorize(bad), crucial)
+})