Index with new COINr methodology.Rmd

---
title: "Modelling comparisons with new COINr methodology"
author: "Jonas Rekdal Mathisen"
date: "`r Sys.Date()`"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r Packages, include=FALSE}
#libraries:
pacman::p_load(COINr, countrycode, reactable, tidyverse, scales, RColorBrew, readxl)
```


##Input data
```{r load, warning=FALSE}

#Import the benchmarking results to R - NOURISHING
#Benchmark_raw <- read_excel("C:\\Users\\JRMA\\OneDrive - Folkehelseinstituttet\\WP2 & 3 collaboration\\Benchmarking tool - 5CC countries scoring\\NOURISHING\\Primary NOURISHING benchmarking sheets - rescaled.xlsx", sheet = "All_countries_format")

#Import the benchmarking results to R - MOVING
Benchmark_raw <- read_excel("C:\\Users\\JRMA\\OneDrive - Folkehelseinstituttet\\WP2 & 3 collaboration\\Benchmarking tool - 5CC countries scoring\\MOVING\\Rescaled MOVING.xlsx", sheet = "all_countries_format")

#List the countries that should be included in analysis
numericals <- c("NO_final_numeric", "PL_final_numeric", "UK_final_numeric", "PT_final_numeric", "NL_final_numeric")

Benchmark_raw %>% head(5) %>% reactable(compact=T)

```

Make lists of survey items to include or exclude from furth analysis
```{r}
list_ME <- Benchmark_raw %>% 
  filter(Indicator_type == "M&E") %>% select(UniqueID) %>% as_vector()

list_implementation <- Benchmark_raw %>% 
  filter(Indicator_type == "Implementation") %>% select(UniqueID) %>% as_vector()

list_blankindicators <- Benchmark_raw %>% 
  filter(is.na(Indicator_type)) %>% select(UniqueID) %>% as_vector()

list_blankandME <- append(list_blankindicators, list_ME)

list_MEandImpl <- append(list_ME, list_implementation)
```


# Rescaled versions of the index
```{r}
#rescale by answer: desired max value divided by answer options (remove one for zero)
Benchmark_rescaled <- Benchmark_raw %>% mutate(across(numericals, ~ case_when(
  str_detect(UniqueID,".0.0") ~ rescale(., to=c(0,80), from = c(0,100)),
  TRUE ~ rescale(., to=c(0,20), from = c(0,100)))))

Benchmark_rescaled <- Benchmark_rescaled %>% mutate_if(is_double,as.integer) %>% round_df()

```

## Transform data to COINr
### Indicator Data (IndData == iData)
```{r}
#Select countries benchmarked and indicator ID only

#Merk, bruk rescaled eller Raw
iData <- Benchmark_rescaled %>% select(c("UniqueID", "NO_final_numeric", "PL_final_numeric", "UK_final_numeric", "PT_final_numeric", "NL_final_numeric"))

#Assign proper names
colnames(iData) <- c("UniqueID", "Norway", "Poland", "United Kingdom", "Portugal", "Netherlands")

Indicators <- c(t(iData$UniqueID))

iData <- data.frame(t(iData[-1]))
colnames(iData) <- Indicators

#Fix the index
iData$uName <- rownames(iData)
row.names(iData) <- NULL


isCoCREATE <- c("Norway", "Netherlands", "Portugal", "Poland", "United Kingdom")

iData

iData <- iData %>% mutate(
  Group_Continent = countrycode(sourcevar = uName, origin="country.name", destination="continent"), 
  uCode = countryname(sourcevar = uName, destination = "iso3c"),
  #Group_EU = countrycode(sourcevar = uName, origin="country.name", destination="eu28"),
  Group_Region = countrycode(sourcevar = uName, origin="country.name", destination="un.regionsub.name"), 
  Group_COCREATE = case_when(uName %in% unlist(isCoCREATE) ~ TRUE, TRUE ~ FALSE))

iData %>% reactable()
iData <- iData %>% mutate_if(is_double,as.integer)

check_iData(iData)
```

### iMeta 
```{r}
##Groups
Groups <- iData %>% select(starts_with(c("Group_"))) %>% colnames() %>% as.data.frame()

colnames(Groups) <- c("iCode")

Groups$iName <- "Group info"

Groups$Parent <- NA

Groups$Level <- NA

Groups$Type <- "Group" 


BenchmarkMeta <-
  Benchmark_raw %>% select(`Policy letter`, `Policy area`, `Benchmark ID`, `Sub-policy area`, UniqueID, `Indicator name`)

###Level 1 - indicators
iMeta_Indicators <- BenchmarkMeta %>% select(UniqueID, `Indicator name`, `Benchmark ID`)

colnames(iMeta_Indicators) <- c("iCode", "iName", "Parent")

iMeta_Indicators$Level <- 1

iMeta_Indicators$Type <- "Indicator"


###Level 2 - sub-policy area
iMeta_SubPolicyArea <- BenchmarkMeta %>% select(`Benchmark ID`, `Sub-policy area`, `Policy letter`) %>% distinct()

colnames(iMeta_SubPolicyArea) <- c("iCode", "iName", "Parent")

iMeta_SubPolicyArea$Level <- 2

iMeta_SubPolicyArea$Type <- "Aggregate" 

iMeta_SubPolicyArea

###Level 2: policy area
iMeta_PolicyArea <- BenchmarkMeta %>% select(`Policy letter`, `Policy area`) %>% distinct()

colnames(iMeta_PolicyArea) <- c("iCode", "iName")

iMeta_PolicyArea$Parent <- "Index"

iMeta_PolicyArea$Level <- 3

iMeta_PolicyArea$Type <- "Aggregate" 

iMeta_PolicyArea

###Level 3: index
iMeta_full <- data.frame()

iMeta_full[1, ] <- c("")

iMeta_full$iCode <- "Index"

##NOURISHING: 
  # iMeta_full$iName <- "NOURISHING"

iMeta_full$iName <- "MOVING"

iMeta_full$Parent <- NA

iMeta_full$Level <- 4

iMeta_full$Type <- "Aggregate"

list_iMeta <- list(Groups, iMeta_Indicators, iMeta_SubPolicyArea,  iMeta_PolicyArea, iMeta_full)

iMeta <- do.call("rbind", list_iMeta)

##For everything
iMeta$Direction <- 1 #All indicators got positive direction
iMeta$Weight <- 1 #All indicators carry the same weight

iMeta <- iMeta %>% as.data.frame()

iMeta

check_iMeta(iMeta)
```

### Build full (new_coin)
```{r}
NOURISHING <- new_coin(iData = iData,
                          iMeta = iMeta,
                          level_names = c("Indicators", 
                                          "Benchmark", 
                                          "Policy area", "Index"),
                       exclude = list_MEandImpl)

NOURISHING
```


## Aggregation
The following table show normal aggregation, with non-weighted presence, w/o M&E.
```{r Aggregation}
NOURISHING <- Aggregate(NOURISHING, dset = "Raw")

dset_aggregated <- get_dset(NOURISHING, dset = "Aggregated")

#CHANGE MOVING/NOURISHING
#NOURISHING
  # dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 11))

dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 7))
```


## Implementation only

```{r}

PresenceIndex_NOURISHING <- new_coin(iData = iData,
                          iMeta = iMeta,
                          level_names = c("Indicators", 
                                          "Benchmark", 
                                          "Policy area", "Index"),
                       exclude = list_blankandME)

PresenceIndex_NOURISHING

PresenceIndex_NOURISHING <- Aggregate(PresenceIndex_NOURISHING, dset = "Raw")

PresenceIndex_NOURISHING_dset_aggregated <- get_dset(PresenceIndex_NOURISHING, dset = "Aggregated")

PresenceIndex_NOURISHING_dset_aggregated

#CHANGE MOVING/NOURISHING
#NOURISHING
  # PresenceIndex_NOURISHING_dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 11))

#MOVING
  PresenceIndex_NOURISHING_dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 7))
```
#Combination of models
```{r}
#CHANGE MOVING/NOURISHING
 #NOURISHING 
    #PresenceDF <- PresenceIndex_NOURISHING_dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 11)) 

  #MOVING
    #PresenceDF <- PresenceIndex_NOURISHING_dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 7)) #MOVING

  #ALL MOVING
    PresenceDF <- PresenceIndex_NOURISHING_dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 29))

PresenceDF <- `rownames<-`(PresenceDF, PresenceDF$uCode) %>% select(-uCode)

#CHANGE MOVING/NOURISHING
 #NOURISHING 
    #BlanksDF <- dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 11))

  #MOVING
    #BlanksDF <- dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 7))

  #ALL MOVING
    BlanksDF <- dset_aggregated %>% round_df(0) %>% select(uCode, tail(names(.), 29))

BlanksDF <- `rownames<-`(BlanksDF, BlanksDF$uCode) %>% select(-uCode)

ArticleModel <- PresenceDF + BlanksDF
#write.csv(ArticleModel, "ArticleModel_crosschecked.csv")

ColumnNames <- ArticleModel %>% colnames() %>% as_vector()

BlanksDF
PresenceDF

ArticleModel %>% rownames_to_column(var = "Country") %>% group_by(Country)
```

## Unpivot test
```{r}
#https://python-bloggers.com/2022/07/how-to-unpivot-a-dataset-in-excel-power-query-vs-r-vs-python/

#names from <- header?
#names to <- navnet på kategoriene
#values to <- variablene som skal vise tallene

pivoteering <- ArticleModel %>% rownames_to_column(var = "uCode")

# CHANGE MOVING/NOURISHING
#pivoted <- pivoteering %>% select(-Index) %>% pivot_longer(cols = c("N", "O", "U", "R", "I", "S", "H", "I.2.", "N.2.", "G"),
 #                                       values_to = "Scores",
 #                                        names_to = "Category")

#pivoted <- pivoteering %>% select(-Index) %>% pivot_longer(cols = c("M", "O", "V", "I", "N", "G"),
#                                        values_to = "Scores",
#                                        names_to = "Category")


pivoted <- pivoteering %>% pivot_longer(ColumnNames,
                                        values_to = "Scores",
                                        names_to = "Category")

# CHANGE MOVING/NOURISHING
list_PAs <- c("M", "O", "V", "I", "N", "G")


#https://stackoverflow.com/questions/62472814/how-to-order-a-column-within-a-group-in-r
#CHANGE MOVING NOURISHING, LIST PAs
  # pivoted <- pivoted %>% mutate(Category = factor(Category, levels = list_PAs))

pivoted <- pivoted %>% mutate(Category = factor(Category, levels = ColumnNames))

pivoted <- pivoted %>% group_by(Category, uCode) %>% summarise(Score = Scores)

pivoted <- 
  pivoted %>%
  mutate(Score,
              Outcome = case_when(
                      Score < 1 ~ "No policy identified",  
                      Score < 25 ~ "Poor",
                      Score < 50 ~ "Low", 
                      Score < 75 ~ "Moderate", 
                      Score <= 100 ~ "High"))


pivoted
```

# CSV output
```{r}
nullhundred <- pivoted %>% write.csv(file = "MOVING-full.csv")
```

# Correlation of ranks 

First, we create the categories that should be correlated.
```{r}
Blanks_CategoryScore <- BlanksDF
Presence_CategoryScore <- PresenceDF
Combined_CategoryScore <- ArticleModel

Blanks_CategoryScore <- 
  Blanks_CategoryScore %>%
  mutate(across(where(is.double), 
                ~ case_when(
                      . < 25 ~ 1, #Poor
                       . < 50 ~ 2, #Low
                       . < 75 ~ 3, #Moderate
                       . <= 100 ~ 4))) #High

Blanks_CategoryScore <- Blanks_CategoryScore %>% mutate_if(is_double,as.numeric)

Presence_CategoryScore <- 
  Presence_CategoryScore %>%
  mutate(across(where(is.double), 
                ~ case_when(
                      . < 25 ~ 1, #Poor
                       . < 50 ~ 2, #Low
                       . < 75 ~ 3, #Moderate
                       . <= 100 ~ 4))) #High

Presence_CategoryScore <- Presence_CategoryScore %>% mutate_if(is_double,as.numeric)

Combined_CategoryScore <- 
Combined_CategoryScore  %>%
  mutate(across(where(is.double), 
                ~ case_when(
                      . < 25 ~ 1, #Poor
                       . < 50 ~ 2, #Low
                       . < 75 ~ 3, #Moderate
                       . <= 100 ~ 4))) #High

Combined_CategoryScore <- Combined_CategoryScore %>% mutate_if(is_double,as.numeric)

Combined_CategoryScore
Presence_CategoryScore
```

Second, we do the correlations. In this case, I will use {`r Presence_CategoryScore`} and {`r Combined_CategoryScore`} to showcase. Note, if the categories are all "Poor" in one column, NA is returned as result. I guess this means perfect correlation but this is not captured in figure or table below.

```{r}
corr_Presence_vs_Combined <- cor(Presence_CategoryScore, Combined_CategoryScore, method = "kendall" ,  ) %>% as.data.frame()

df <- list()
for (i in seq_len(nrow(corr_Presence_vs_Combined))){
  ba <- rep(NA, nrow(corr_Presence_vs_Combined))
  ba[i] <- corr_Presence_vs_Combined[i,i]
  df[[i]] <- ba
}

corr_Presence_vs_Combined <- as.data.frame(df, col.names = names(corr_Presence_vs_Combined), row.names = names(corr_Presence_vs_Combined))


corr_Presence_vs_Combined %>% reactable()
```

```{r}
ggcorrplot(corr_Presence_vs_Combined, ggtheme = ggplot2::theme_gray,
  colors = c("#6D9EC1", "white", "#476930"), outline.color = "black", lab=T, lab_size = 2, show.legend = T, show.diag = T)
```


```{r, include=FALSE}
#Here is a script to order the results, should you change the standard order. You need to change the variable names.
  # xresults_simple_ranks <- results_simple_ranks[order(results_simple_ranks$'UnitCode'),] %>% `rownames<-`(c())
  # xresults_simple_ranks <- column_to_rownames(xresults_simple_ranks, var = "UnitCode")
  # xresults_simple_ranks <- xresults_simple_ranks %>% select(-UnitName)

```