Skip to content

Commit

Permalink
* The emissions*() functions now preserve unmatched products and mi…
Browse files Browse the repository at this point in the history
…ssing benchmarks (#639)

* 'emissions*()' now preserve unmatched products and missing benchmarks (#718)
  • Loading branch information
maurolepore authored Feb 27, 2024
1 parent 5ae8183 commit 657d275
Show file tree
Hide file tree
Showing 11 changed files with 1,264 additions and 219 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tiltIndicator
Title: Indicators for the 'TILT' Project
Version: 0.0.0.9206
Version: 0.0.0.9208
Authors@R: c(
person("Mauro", "Lepore", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "https://orcid.org/0000-0002-1986-7988")),
Expand Down
17 changes: 16 additions & 1 deletion R/emissions_profile.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,25 @@ emissions_profile <- function(companies,
low_threshold = 1 / 3,
high_threshold = 2 / 3) {
product <- emissions_profile_any_at_product_level(companies, co2, low_threshold, high_threshold)
company <- any_at_company_level(product)
company <- epa_at_company_level(product) |>
insert_row_with_na_in_risk_category()

nest_levels(product, company)
}

#' @export
#' @rdname emissions_profile_upstream
emissions_profile_upstream <- emissions_profile

insert_row_with_na_in_risk_category <- function(data) {
levels <- c(risk_category_levels(), NA)
data |>
mutate(has_na = anyNA(.data$risk_category), .by = "grouped_by") |>
filter(!.data$has_na) |>
distinct(.data$companies_id, .data$grouped_by) |>
dplyr::bind_cols(tibble::tibble(risk_category = NA_character_, value = 0)) |>
bind_rows(data) |>
mutate(risk_category = factor(.data$risk_category, levels)) |>
arrange(.data$companies_id, .data$grouped_by, .data$risk_category) |>
mutate(risk_category = as.character(.data$risk_category))
}
7 changes: 6 additions & 1 deletion R/emissions_profile_any_at_product_level.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ emissions_profile_any_at_product_level <- function(companies,
add_risk_category(low_threshold, high_threshold) |>
join_companies(.companies) |>
epa_select_cols_at_product_level() |>
polish_output(cols_na_at_product_level())
filter(if_all_na_is_first_else_TRUE(.data[["grouped_by"]]), .by = all_of("companies_id")) |>
distinct()
}

epa_check <- function(x) {
Expand Down Expand Up @@ -60,3 +61,7 @@ epa_select_cols_at_product_level <- function(data) {
find_co2_footprint(data)
)
}

if_all_na_is_first_else_TRUE <- function(x) {
if (all(is.na(x))) is_first(x) else rep(TRUE, length(x))
}
67 changes: 67 additions & 0 deletions R/epa_at_company_level.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
epa_at_company_level <- function(data) {
count_of_na <- data |>
select(all_of(cols_at_all_levels())) |>
mutate(unmatched_products = sum(is.na(.data$grouped_by)), .by = aka("id")) |>
mutate(missing_benchmarks = sum(is.na(.data$risk_category)), .by = cols_by()) |>
mutate(na = .data$unmatched_products + .data$missing_benchmarks) |>
select(aka("id"), "grouped_by", "na") |>
filter(!is.na(.data$grouped_by)) |>
distinct() |>
mutate(risk_category = NA_character_)

if (all_na(count_of_na, "grouped_by", "risk_category")) {
return(empty_company_output_from(data[[aka("id")]]))
}

with_value <- data |>
select(all_of(cols_at_all_levels())) |>
filter(!is.na(.data$grouped_by), !is.na(.data$risk_category)) |>
bind_rows(tidyr::uncount(count_of_na, .data$na)) |>
dplyr::arrange(.data[[aka("id")]], .data$grouped_by) |>
add_count(.data[[aka("id")]], .data$grouped_by) |>
mutate(value = .data$n / sum(.data$n), .by = cols_by()) |>
select(-"n")

levels <- c(risk_category_levels(), NA)
with_value |>
pick_companies_with_some_match() |>
group_by(.data$companies_id, .data$grouped_by) |>
mutate(risk_category = factor(.data$risk_category, levels = levels)) |>
expand(.data$risk_category) |>
left_join(with_value, by = cols_at_all_levels()) |>
ungroup() |>
mutate(
value = replace_na(.data$value, 0),
.by = cols_by()
) |>
# Hack #285. FIXME: Explore why this happens
summarize(
value = sum(.data$value),
.by = cols_at_all_levels()
) |>
bind_rows(pick_companies_with_no_match(data))
}

all_na <- function(data, ...) {
data |>
select(...) |>
lapply(is.na) |>
unlist() |>
all()
}

pick_companies_with_no_match <- function(data) {
data |>
mutate(all_na_grouped_by = all(is.na(.data$grouped_by)), .by = "companies_id") |>
filter(.data$all_na_grouped_by) |>
select(-"all_na_grouped_by") |>
pull(.data$companies_id) |>
empty_company_output_from()
}

pick_companies_with_some_match <- function(data) {
data |>
mutate(all_na_grouped_by = all(is.na(.data$grouped_by)), .by = "companies_id") |>
filter(!.data$all_na_grouped_by, !is.na(.data$grouped_by)) |>
select(-"all_na_grouped_by")
}
4 changes: 3 additions & 1 deletion R/epa_compute_profile_ranking.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ epa_compute_profile_ranking <- function(data) {

exclude <- short_isic(data) |
is.na(get_column(data, aka("isic"))) |
is.na(get_column(data, aka("tsector")))
is.na(get_column(data, aka("tsector"))) |
is.na(get_column(data, aka("xunit")))

list(!exclude, exclude) |>
map(\(x) filter(data, x)) |>
Expand Down Expand Up @@ -97,6 +98,7 @@ assign_na_to_profile_ranking_in_special_cases <- function(data) {
data |> should_be_na_when_isic_has_2_or_3_digits() ~ NA,
data |> should_be_na_when_missing(aka("isic")) ~ NA,
data |> should_be_na_when_missing(aka("tsector")) ~ NA,
data |> should_be_na_when_missing(aka("xunit")) ~ NA,
.default = .data$profile_ranking
))
}
Expand Down
128 changes: 97 additions & 31 deletions tests/testthat/_snaps/emissions_profile.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,68 +92,134 @@
16 antimonarchy_canine
17 antimonarchy_canine
18 antimonarchy_canine
19 antimonarchy_canine
20 antimonarchy_canine
21 antimonarchy_canine
22 antimonarchy_canine
23 antimonarchy_canine
24 antimonarchy_canine
[[2]]
grouped_by
1 all
2 all
3 all
4 isic_4digit
4 all
5 isic_4digit
6 isic_4digit
7 tilt_sector
8 tilt_sector
7 isic_4digit
8 isic_4digit
9 tilt_sector
10 unit
11 unit
12 unit
13 unit_isic_4digit
14 unit_isic_4digit
15 unit_isic_4digit
16 unit_tilt_sector
17 unit_tilt_sector
18 unit_tilt_sector
10 tilt_sector
11 tilt_sector
12 tilt_sector
13 unit
14 unit
15 unit
16 unit
17 unit_isic_4digit
18 unit_isic_4digit
19 unit_isic_4digit
20 unit_isic_4digit
21 unit_tilt_sector
22 unit_tilt_sector
23 unit_tilt_sector
24 unit_tilt_sector
[[3]]
risk_category
1 high
2 medium
3 low
4 high
5 medium
6 low
7 high
8 medium
9 low
10 high
11 medium
12 low
4 <NA>
5 high
6 medium
7 low
8 <NA>
9 high
10 medium
11 low
12 <NA>
13 high
14 medium
15 low
16 high
17 medium
18 low
16 <NA>
17 high
18 medium
19 low
20 <NA>
21 high
22 medium
23 low
24 <NA>
[[4]]
value
1 1
2 0
3 0
4 1
5 0
4 0
5 1
6 0
7 1
7 0
8 0
9 0
10 1
9 1
10 0
11 0
12 0
13 1
14 0
15 0
16 1
17 0
16 0
17 1
18 0
19 0
20 0
21 1
22 0
23 0
24 0

# At company level, three matched products with different `co2_footprint`, one missing benchmark, and one unmatched product yield the expected output

Code
missing_benchmark
Output
# A tibble: 8 x 4
companies_id grouped_by risk_category value
<chr> <chr> <chr> <dbl>
1 a isic_4digit high 0.2
2 a isic_4digit medium 0.2
3 a isic_4digit low 0.2
4 a isic_4digit <NA> 0.4
5 a unit_isic_4digit high 0.2
6 a unit_isic_4digit medium 0.2
7 a unit_isic_4digit low 0.2
8 a unit_isic_4digit <NA> 0.4

---

Code
no_missing_benchmark
Output
# A tibble: 16 x 4
companies_id grouped_by risk_category value
<chr> <chr> <chr> <dbl>
1 a all high 0.4
2 a all medium 0.2
3 a all low 0.2
4 a all <NA> 0.2
5 a tilt_sector high 0.4
6 a tilt_sector medium 0.2
7 a tilt_sector low 0.2
8 a tilt_sector <NA> 0.2
9 a unit high 0.4
10 a unit medium 0.2
11 a unit low 0.2
12 a unit <NA> 0.2
13 a unit_tilt_sector high 0.4
14 a unit_tilt_sector medium 0.2
15 a unit_tilt_sector low 0.2
16 a unit_tilt_sector <NA> 0.2

Loading

0 comments on commit 657d275

Please sign in to comment.