diff --git a/.github/workflows/create-release-branch.yml b/.github/workflows/create-release-branch.yml new file mode 100644 index 0000000..d4feb9f --- /dev/null +++ b/.github/workflows/create-release-branch.yml @@ -0,0 +1,23 @@ +name: Create Release Branch +on: + workflow_dispatch: +jobs: + build-docs: + runs-on: ubuntu-latest + defaults: + run: + shell: bash + working-directory: R-package + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: setup-r + uses: r-lib/actions/setup-r@v2 + with: + r-version: "4.1.1" + - name: Install R dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + working-directory: R-package + - name: Source code + run: R -e 'source("R/create-release-files.R"); create_release_files(OSF_REPO_LINK = ${{secrets.OSF_REPO_LINK}}, OSF_TOKEN = ${{secrets.OSF_TOKEN}}, github_token = "${{ secrets.ACTION_TOKEN }}")' diff --git a/.github/workflows/merge-release.yml b/.github/workflows/merge-release.yml new file mode 100644 index 0000000..47cd87b --- /dev/null +++ b/.github/workflows/merge-release.yml @@ -0,0 +1,27 @@ +name: Merge Release + +on: + pull_request: + types: [closed] + branches: + - 'release-*' + +jobs: + upload-dictionaries: + defaults: + run: + shell: bash + working-directory: R-package + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: setup-r + uses: r-lib/actions/setup-r@v2 + with: + r-version: "4.1.1" + - name: Install R dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + working-directory: R-package + - name: Source code + run: R -e 'source("R/osf-interactions.R"); update_osf(OSF_REPO_LINK = ${{secrets.OSF_REPO_LINK}}, OSF_TOKEN = ${{secrets.OSF.TOKEN}})' diff --git a/release-automation/.Rbuildignore b/release-automation/.Rbuildignore new file mode 100644 index 0000000..d821302 --- /dev/null +++ b/release-automation/.Rbuildignore @@ -0,0 +1,4 @@ +^renv$ +^renv\.lock$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/release-automation/.Rprofile b/release-automation/.Rprofile new file mode 100644 index 0000000..81b960f --- /dev/null +++ b/release-automation/.Rprofile @@ -0,0 +1 @@ +source("renv/activate.R") diff --git a/release-automation/DESCRIPTION b/release-automation/DESCRIPTION new file mode 100644 index 0000000..a19245c --- /dev/null +++ b/release-automation/DESCRIPTION @@ -0,0 +1,24 @@ +Package: release.automation +Type: Package +Title: Release Automation +Version: 2.1.0 +Authors@R: c( + person(given = "Yulric", family = "Sequeira", role = c("aut"), email = "ysequeira@ohri.ca"), + person(given = "Rostyslav", family = "Vyuha", role = c("aut","cre"), email = "rvyuha@toh.ca")) +Maintainer: Rostyslav Vyuha +Description: PHES-ODM +Depends: + R (>= 4.0.3) +Imports: + osfr, + magrittr, + openxlsx, + logger +License: cc-by-4.0 +URL: https://github.com/Big-Life-Lab/PHES-ODM +BugReports: https://github.com/Big-Life-Lab/PHES-ODM/issues +Encoding: UTF-8 +RoxygenNote: 7.2.3 +Suggests: + testthat (>= 3.0.0) +Config/testthat/edition: 3 diff --git a/release-automation/NAMESPACE b/release-automation/NAMESPACE new file mode 100644 index 0000000..7c6a4f2 --- /dev/null +++ b/release-automation/NAMESPACE @@ -0,0 +1,3 @@ +# Generated by roxygen2: do not edit by hand + +export(create_release_files) diff --git a/release-automation/R/create-release-branch.R b/release-automation/R/create-release-branch.R new file mode 100644 index 0000000..3ae527e --- /dev/null +++ b/release-automation/R/create-release-branch.R @@ -0,0 +1,132 @@ +source("R/odm-dictionary-file.R") +source("R/files.R") +source("R/logging.R") +source("R/sets-sheet.R") +source("R/parts-sheet.R") + +#' Create release files +#' +#' Creates release files given the user OSF link and auth token. +#' +#' @param osf_repo_link The link to the OSF repo. +#' @param osf_token The OSF token used to authenticate the user. +#' +#' @export +create_release_branch <- function(osf_repo_link, osf_token, github_token) { + setup_logging() + + errors <- c() + warnings <- c() + + download_dictionary_result <- download_dictionary(osf_token, "dev") + warnings <- c(warnings, download_dictionary_result$warnings) + + parse_files_sheet_result <- parse_files_sheet( + download_dictionary_result$dictionary + ) + errors <- c(errors, parse_files_sheet_result$errors) + warnings <- c(warnings, parse_files_sheet_result$warnings) + if (length(errors) == 0) { + parsed_github_files <- .get_github_files( + parse_files_sheet_result$parsed_files + ) + create_files( + parsed_github_files, + file.path(getwd(), "..") + ) + # Download previous release dictionary + previous_dictionary_download_results <- download_dictionary( + osf_token, "current" + ) + files_to_remove_result <- + parse_files_sheet(parse_previous_dictionary_result$dictionary) + remove_files( + .get_github_files(files_to_remove_result$parsed_files), + parse_previous_dictionary_result$dictionary + ) + + # Set git config + system('git config user.name "PBL-Bot"') + system('git config user.email "projectbiglife@toh.ca"') + system(glue::glue("git config user.password {github_token}")) + + release_branch_name <- glue::glue( + "release-{parse_dictionary_result$dictionary_version}" + ) + + system(glue::glue("git checkout -b {release_branch_name}")) + + commit_and_push_files(dictionary_version, release_branch_name) + + print("Operation Successful") + } else { + print(glue::glue( + "{length(errors) errors found during the build process. They can be seen + in the log file located at {log_file_path}. Aborting operation." + )) + } + + if (length(warnings) > 0) { + print(glue::glue( + "{length(warnings) warnings found during the build process. They can be + seen in the log file located at {log_file_path}." + )) + } +} + +#' Remove Files +#' +#' Helper function to remove files based on output from validate_and_parse_files_sheet. +#' +#' @param files_to_remove List output from validate_and_parse_files_sheet +#' @param dictionary openxlsx environment object storing the dictionary +remove_files <- function(files_to_remove, dictionary) { + # Loop over files to remove based on fileID + for (fileID in names(files_to_remove)) { + current_file_info <- files_to_remove[[fileID]] + # Skip OSF files + if (!"github" %in% current_file_info$destination) { + next() + } + + # Create full file path + file_extension <- switch(current_file_info$file_type, + "excel" = ".xlsx", + "csv" = ".csv" + ) + file_path <- file.path( + "..", + current_file_info$github_location, + paste0(current_file_info$file_name, file_extension) + ) + # Check if file exists + if (file.exists(file_path)) { + file.remove(file_path) + } + } +} + +#' Commit files +#' +#' Utility function to add, commit, and push all changes +#' +#' @param repo git2r object for repo reference +#' @param dictionary_version version of the dictionary being deployed +commit_and_push_files <- function(dictionary_version, branch_name) { + # Add all files + system("git add --all") + # Create commit + system(glue::glue('git commit -m "[BOT] release-{dictionary_version}"')) + # Push updated branch + system(glue::glue("git push origin {branch_name}")) +} + +.get_github_files <- function(parsed_files) { + github_files <- list() + for (file_id in names(parse_files_sheet_result$parsed_files)) { + if (parse_files_sheet_result$parsed_files[[file_id]] == "github") { + github_files[[file_id]] <- parse_files_sheet_result$parsed_files[[file_id]] + } + } + return(github_files) +} diff --git a/release-automation/R/download-dictionary.R b/release-automation/R/download-dictionary.R new file mode 100644 index 0000000..81a4162 --- /dev/null +++ b/release-automation/R/download-dictionary.R @@ -0,0 +1,23 @@ +library(magrittr) + +source("R/get-latest-version-from-summary-sheet.R") + +download_dictionary <- function(osf_token, osf_folder) { + osfr::osf_auth(osf_token) + repo_files <- osfr::osf_retrieve_node(osf_repo_link) %>% + osfr::osf_ls_files() + folder_name <- tidyr::if_else( + osf_folder == "dev", developer_version_folder, current_version_folder + ) + dictionary_folder <- repo_files[repo_files$name == folder_name, ] + dictionary_file <- osfr::osf_ls_files( + dictionary_folder, + type = "file", pattern = "ODM_dev-dictionary" + ) + download_info <- osfr::osf_download( + requested_dictionary, + path = dictionary_set_path, + conflicts = "overwrite" + ) + return(parse_dictionary(download_info[1, "local_path"])) +} diff --git a/release-automation/R/errors.R b/release-automation/R/errors.R new file mode 100644 index 0000000..4b2eda6 --- /dev/null +++ b/release-automation/R/errors.R @@ -0,0 +1,16 @@ +missing_sheet_id <- "E1" +missing_sheet_msg <- function(file_id, sheet_name, dictionary_sheets) { + return(glue::glue( + "Error {missing_sheet_id}: No sheet found with name {sheet_name} when + creating file with ID ", "{file_id}. The following sheets are in the + dictionary ", "{paste(dictionary_sheets, collapse = ", ")}." + )) +} + +invalid_csv_part_id <- "E2" +invalid_csv_part_msg <- function(file_id, part_id) { + return(glue::glue( + "Error {invalid_csv_part_id}: The {part_id} used for the file with ID + {file_id} is a set which is only allowed for files with type excel" + )) +} diff --git a/release-automation/R/files.R b/release-automation/R/files.R new file mode 100644 index 0000000..8efc32b --- /dev/null +++ b/release-automation/R/files.R @@ -0,0 +1,118 @@ +files_sheet_metadata <- list( + file_name = list( + name = "name" + ), + file_ID = list( + name = "fileID" + ), + file_type = list( + name = "fileType", + categories = list( + excel = "excel", + csv = "csv" + ) + ), + part_ID = list( + name = "partID" + ), + add_headers = list( + name = "addHeaders" + ), + destinations = list( + name = "destinations", + categories = list( + osf = "osf", + github = "github" + ) + ), + osf_locations = list( + name = "osfLocation" + ), + github_location = list( + name = "githubLocation" + ) +) + +create_files <- function(files_to_create, dictionary, write_dir) { + # Loop over files to extract based on fileID + for (file_id in names(files_to_create)) { + current_file_info <- files_to_create[[file_id]] + + # Create a write directory based on destination and saving location + current_file_dir <- file.path( + write_dir, + current_file_info$github_location + ) + dir.create(current_file_dir, + showWarnings = FALSE, + recursive = TRUE + ) + + if (current_file_info$file_type == "excel") { + # Use parts as names of sheets to extract + sheets_to_copy <- current_file_info$sheet_names + excel_file <- openxlsx::copyWorkbook(dictionary) + all_dictionary_sheets <- names(dictionary) + for (sheet_name in all_dictionary_sheets) { + if (!(sheet_name %in% sheets_to_copy)) { + openxlsx::removeWorksheet(excel_file, sheet_name) + } + } + + if (!is.null(current_file_info$add_headers)) { + for (sheet_to_copy in sheets_to_copy) { + worksheet <- openxlsx::readWorkbook(excel_file, sheet_to_copy) + openxlsx::writeData( + excel_file, + sheet_to_copy, + .add_headers(worksheet, current_file_info$add_headers) + ) + } + } + + # Save the workbook in the appropriate directory + openxlsx::saveWorkbook(excel_file, + file = file.path( + current_file_dir, + glue::glue("{current_file_info$file_name}.xlsx") + ), + overwrite = TRUE + ) + } else if (current_file_info$file_type == "csv") { + sheet_name <- current_file_info$sheet_names[1] + csv_file <- + openxlsx::readWorkbook(dictionary, sheet_name) + if (!is.null(current_file_info$add_headers)) { + csv_file <- .add_headers(csv_file, current_file_info$add_headers) + } + + write.csv(csv_file, + file = file.path( + current_file_dir, + glue::glue( + "{current_file_info$file_name}.csv" + ) + ), + row.names = FALSE + ) + } + } +} + +.add_headers <- function(df, headers) { + df <- + rbind(colnames(df), df) + if (length(colnames(df)) > length(headers)) { + length_to_append <- + length(colnames(df)) - length(headers) + headers <- c(headers, rep("", length_to_append)) + } else if (length(colnames(df)) < length(headers)) { + length_to_append <- + length(headers) - length(colnames(df)) + for (col_counter in 1:length_to_append) { + df <- cbind(df, "") + } + } + colnames(df) <- headers + return(df) +} diff --git a/release-automation/R/get-latest-version-from-summary-sheet.R b/release-automation/R/get-latest-version-from-summary-sheet.R new file mode 100644 index 0000000..eed2add --- /dev/null +++ b/release-automation/R/get-latest-version-from-summary-sheet.R @@ -0,0 +1,7 @@ +get_latest_version_from_summary_sheet <- function(summary_sheet) { + return( + summary_sheet[[1]] + %>% .[!is.na(.)] + %>% .[length(.)] + ) +} diff --git a/release-automation/R/logging.R b/release-automation/R/logging.R new file mode 100644 index 0000000..9959c2b --- /dev/null +++ b/release-automation/R/logging.R @@ -0,0 +1,7 @@ +log_file_path <- file.path(getwd(), "log") + +#' Sets up logging +setup_logging <- function() { + file.remove(log_file_path) + logger::log_appender(logger::appender_file(log_file_path)) +} diff --git a/release-automation/R/odm-dictionary-file.R b/release-automation/R/odm-dictionary-file.R new file mode 100644 index 0000000..39bd965 --- /dev/null +++ b/release-automation/R/odm-dictionary-file.R @@ -0,0 +1,30 @@ +source("R/warnings.R") +source("R/errors.R") + +parts_sheet_name <- "parts" +sets_sheet_name <- "sets" +files_sheet_name <- "files" +summary_sheet_name <- "summary" + +parse_dictionary <- function(dictionary_path) { + warnings <- c() + + # Read in the dictionary workbook + dictionary <- openxlsx::loadWorkbook(dictionary_path) + + latest_version <- .get_latest_version_from_summary_sheet( + openxlsx::readWorkbook(dictionary, "summary") + ) + version_in_dictionary_file_name <- .get_version_from_dictionary_file_name( + basename(dictionary_path) + ) + if (latest_version != version_in_dictionary_file_name) { + warnings <- c(warnings, version_number_mismatch_msg) + } + + return(list( + dictionary = dictionary, + dictionary_version = latest_version, + warnings = warnings + )) +} diff --git a/release-automation/R/osf-interactions.R b/release-automation/R/osf-interactions.R new file mode 100644 index 0000000..6184cd8 --- /dev/null +++ b/release-automation/R/osf-interactions.R @@ -0,0 +1,92 @@ +source("R/files.R") +source("R/logging.R") + +#' Update OSF files +#' +#' This function updates files on the Open Science Framework (OSF) using the provided OSF link and token. +#' It downloads the dictionary file, validates the dictionary version and files sheet, and stages the files to be uploaded to OSF. +#' +#' @param osf_repo_link The link to the OSF repo. +#' @param osf_token The OSF token used to authenticate the user. +#' @param dictionary_path The path to the dictionary file. If NULL, the file will be downloaded from OSF. +#' @param past_dictionary_path The path to the past dictionary file. If NULL, the file will be downloaded from OSF. +#' +merge_relase_branch <- function(osf_token) { + setup_logging() + + # Download file using passed credentials + dictionary_path <- download_dictionary(osf_token, "dev") + + # Validate dictionary version + parse_dictionary_result <- parse_dictionary(dictionary_path) + + # Validate files sheet + parse_files_sheet_result <- + parse_files_sheet(parse_dictionary_result$dictionary) + + osf_files_to_make <- .get_osf_files(parse_files_sheet_result$parsed_files) + + create_files(osf_files_to_make, dictionary, file.path(getwd(), "../osf")) + + # Download previous release dictionary + previous_dictionary_path <- download_dictionary( + osf_token, "current" + ) + + # Validate dictionary version + parse_previous_dictionary_result <- parse_dictionary(previous_dictionary_path) + + archive_previous_release(osf_token, parse_dictionary_result$dictonary_version) +} + +#' Archive previous release +#' +#' This function archives the previous release on OSF. It creates a new archive folder, moves the previous release to the archive folder. +#' +#' @param osf_token The OSF token used to authenticate the user. +#' @param osf_link The OSF link to the main repo. +archive_previous_release <- function( + osf_token, + osf_link, + past_dictionary_version) { + # Authenticate with OSF, in case a local file is used + osfr::osf_auth(osf_token) + # Retrieve information from OSF + osf_info <- osfr::osf_retrieve_node(osf_link) + # Get info on current release + root_info <- osfr::osf_ls_files(osf_info) + current_release_info <- root_info[root_info$name == "Current Release", ] + # Get info on archieve folder + archive_info <- root_info[root_info$name == "Archived releases", ] + # Make new archive dir + new_archive <- osfr::osf_mkdir(archive_info, paste0("release_", past_dictionary_version)) + + # Move contents of current_release_info into archive + current_release_info_content <- osfr::osf_ls_files(current_release_info) + for (release_index in seq(nrow(current_release_info_content))) { + osfr::osf_mv(current_release_info_content[release_index, ], new_archive) + } + + # Upload staged files + osfr::osf_upload( + current_release_info, "osf-stage/.", + recurse = TRUE, + conflicts = "overwrite" + ) + # Upload dev dictionary + osfr::osf_upload( + current_release_info, + paste0(odm_dictionary$tmp_dictionary_directory, "/."), + conflicts = "overwrite" + ) +} + +.get_osf_files <- function(parsed_files) { + osf_files <- list() + for (file_id in names(parse_files_sheet_result$parsed_files)) { + if (parse_files_sheet_result$parsed_files[[file_id]] == "osf") { + osf_files[[file_id]] <- parse_files_sheet_result$parsed_files[[file_id]] + } + } + return(osf_files) +} diff --git a/release-automation/R/osf.R b/release-automation/R/osf.R new file mode 100644 index 0000000..0fe05ed --- /dev/null +++ b/release-automation/R/osf.R @@ -0,0 +1,37 @@ +library(tidyr) + +osf_repo_link <- "https://osf.io/xevnh/" + +developer_version_folder <- "Developer Version" +current_version_folder <- "Current Version" + +#' Download dictionary +#' +#' Utility function to download dictionary from OSF +#' +#' @param osf_token string containing the OSF auth token +#' @param osf_repo_link string containing the link to the dictionary to download +#' @param dictionary_set_path string containing the path to be set if one is not provided +#' +#' @return string containing the path to the saved dictionary. +download_dictionary <- function(osf_token, osf_folder) { + # Download file using passed credentials + osfr::osf_auth(osf_token) + repo_files <- osfr::osf_retrieve_node(osf_repo_link) %>% + osfr::osf_ls_files() + folder_name <- tidyr::if_else( + osf_folder == "dev", developer_version_folder, current_version_folder + ) + dictionary_folder <- repo_files[repo_files$name == folder_name, ] + dictionary_file <- osfr::osf_ls_files( + dictionary_folder, + type = "file", pattern = "ODM_dictionary_" + ) + download_info <- osfr::osf_download( + requested_dictionary, + path = dictionary_set_path, + conflicts = "overwrite" + ) + + return(download_info[1, "local_path"]) +} diff --git a/release-automation/R/parse-files-sheet.R b/release-automation/R/parse-files-sheet.R new file mode 100644 index 0000000..bca2b9b --- /dev/null +++ b/release-automation/R/parse-files-sheet.R @@ -0,0 +1,198 @@ +library(magrittr) + +source("R/get-latest-version-from-summary-sheet.R") +source("R/errors.R") +source("R/odm-dictionary-file.R") + +parse_files_sheet <- function(odm_dictionary) { + files_sheet <- + openxlsx::readWorkbook(odm_dictionary, files_sheet_name) + sets_sheet <- + openxlsx::readWorkbook(odm_dictionary, sets_sheet_name) + parts_sheet <- + openxlsx::readWorkbook(odm_dictionary, parts_sheet_name) + + dictionary_version <- get_latest_version_from_summary_sheet( + openxlsx::readWorkbook(odm_dictionary, summary_sheet_name) + ) + files_sheet_formatted <- .format_files_sheet_template_variable_columns( + files_sheet, + dictionary_version + ) + + parsed_files <- list() + # Whether there are any errors in the entire file sheet + warnings <- c() + errors <- c() + for (row_index in seq_len(nrow(files_sheet_formatted))) { + is_file_valid <- TRUE + files_sheet_row <- files_sheet_formatted[row_index, ] + + file_type <- files_sheet_row[[files_sheet_metadata$file_type$name]] + if (!files_sheet_row$fileType %in% + files_sheet_metadata$file_type$categories) { + warnings <- c(warnings, invalid_file_type_msg( + working_row[[files_sheet_metadata$file_type$name]], + working_row[[files_sheet_metadata$file_ID$name]] + )) + } + + destinations <- .parse_array_column(files_sheet_row$destinations) + for (destination in destinations) { + if (!destination %in% files_sheet_metadata$destinations$categories) { + warnings <- c(warnings, invalid_destination_msg( + destination, + files_sheet_row[[files_sheet_metadata$file_ID$name]] + )) + } + } + + get_sheet_names_result <- .get_sheet_names_for_part( + files_sheet_row[[files_sheet_metadata$part_ID$name]], + odm_dictionary + ) + if (is.null(get_sheet_names_result$sheet_names)) { + errors <- c(errors, get_sheet_names_result$errors) + is_file_valid <- FALSE + } else if (length(get_sheet_names_result$sheet_names) > 1 && + file_type == "csv") { + errors <- c( + errors, + invalid_csv_part_msg( + files_sheet_row[[files_sheet_metadata$file_ID$name]], + files_sheet_row[[files_sheet_metadata$part_ID$name]] + ) + ) + is_file_valid <- FALSE + } + + if (is_file_valid) { + parsed_files[[files_sheet_row$fileID]] <- list( + file_name = files_sheet_row[[files_sheet_metadata$file_name$name]], + file_type = file_type, + sheet_names = get_sheet_names_result$sheet_names, + add_headers = .parse_array_column(files_sheet_row$addHeaders), + destinations = destinations, + osf_location = files_sheet_row$osfLocation, + github_location = files_sheet_row$githubLocation + ) + } + } + + return(list( + parsed_files = parsed_files, + errors = errors, + warnings = warnings + )) +} + +.format_files_sheet_template_variable_columns <- function( + files_sheet, + version) { + template_variable_columns <- c( + files_sheet_metadata$file_name$name, + files_sheet_metadata$add_headers$name, + files_sheet_metadata$osf_location$name + ) + for (template_variable_column in template_variable_columns) { + files_sheet[[template_variable_column]] <- gsub( + template_variables$version, + version, + files_sheet[[template_variable_column]] + ) + } + return(files_sheet) +} + +.get_sheet_names_for_part <- function( + part_id, + odm_dictionary) { + sets_sheet <- openxlsx::readWorkbook( + odm_dictionary, + sets_sheet_name + ) + sets <- sets_sheet %>% + dplyr::filter(setID == part_id) + if (nrow(sets) > 0) { + sets_validation <- .validate_set_for_file_creation(part_id, odm_dictionary) + if (length(sets_validation) > 0) { + return(list( + sheet_names = NULL, + errors = sets_validation + )) + } + return(list( + sheet_names = sets[[sets_sheet_metadata$part_ID$name]], + errors = c() + )) + } + + + part_validation <- .validate_part_for_file_creation( + part_id, + odm_dictionary + ) + if (length(part_validation) > 0) { + return(list( + sheet_names = NULL, + errors = part_validation + )) + } + return( + list(sheet_names = c(part_id), errors = c()) + ) +} + +.parse_array_column <- function(column_value) { + if (column_value == odm_dictionary$dictionary_missing_value) { + return(NULL) + } + return( + strsplit(column_value, ";")[[1]] %>% + trimws(.) + ) +} + +.validate_set_for_file_creation <- function( + set_id, + odm_dictionary) { + errors <- c() + sheets_in_set <- openxlsx::readWorkbook( + odm_dictionary, + sets_sheet_name + ) %>% + dplyr::filter(!!sets_sheet_metadata$set_ID$name == set_id) %>% + .[[sets_sheet_metadata$part_ID$name]] + file_id <- openxlsx::readWorkbook(odm_dictionary, files_sheet_name) %>% + dplyr::filter(!!files_sheet_metadata$part_ID$name == set_id) %>% + .[[files_sheet_metadata$file_ID$name]] + all_dictionary_sheets <- names(odm_dictionary) + for (set_sheet in sheets_in_set) { + if (!set_sheet %in% all_dictionary_sheets) { + errors <- c(errors, missing_sheet_msg( + file_id, + set_sheet, + all_dictionary_sheets + )) + } + } + return(errors) +} + +.validate_part_for_file_creation <- function( + part_id, + odm_dictionary) { + errors <- c() + file_id <- openxlsx::readWorkbook(odm_dictionary, files_sheet_name) %>% + dplyr::filter(partID == part_id) %>% + .[[files_sheet_metadata$file_ID$name]] + dictionary_sheets <- names(odm_dictionary) + if (!part_id %in% dictionary_sheets) { + errors <- c(errors, missing_sheet_msg( + file_id, + part_id, + dictionary_sheets + )) + } + return(errors) +} diff --git a/release-automation/R/parts-sheet.R b/release-automation/R/parts-sheet.R new file mode 100644 index 0000000..9cecae5 --- /dev/null +++ b/release-automation/R/parts-sheet.R @@ -0,0 +1,5 @@ +parts_sheet_metadata <- list( + part_ID = list( + name = "partID" + ) +) diff --git a/release-automation/R/sets-sheet.R b/release-automation/R/sets-sheet.R new file mode 100644 index 0000000..78f0a1f --- /dev/null +++ b/release-automation/R/sets-sheet.R @@ -0,0 +1,8 @@ +sets_sheet_metadata <- list( + part_ID = list( + name = "partID" + ), + set_ID = list( + name = "setID" + ) +) diff --git a/release-automation/R/warnings.R b/release-automation/R/warnings.R new file mode 100644 index 0000000..398cd14 --- /dev/null +++ b/release-automation/R/warnings.R @@ -0,0 +1,41 @@ +library(glue) + +source("R/files.R") + +version_number_mismatch_id <- "W1" +version_number_mismatch_msg <- function(summary_sheet_version, file_name_version) { + return(glue::glue( + "Warning {version_number_mismatch_id}: The latest version defined in the ", + "summary sheet is not equal to the version found in the file name.", + "The summary sheet version is {summary_sheet_version} while the file ", + "sheet version is {file_name_version}." + )) +} + +invalid_file_type_id <- "W2" +invalid_file_type_msg <- function(invalid_file_type, file_id) { + return(glue::glue( + "Warning {invalid_file_type_id}: Invalid file type found in row with ", + "file ID {file_id}. Allowed file types are ", + "{.fmt_categories(files$file_type$categories)} whereas we ", + "found the file type {invalid_file_type}. Ignoring file." + )) +} + +invalid_destination_id <- "W3" +invalid_destination_msg <- function(invalid_destinations, file_id) { + return(glue::glue( + "Warning {invalid_destination_id_id}: Invalid destinations value found in ", + "row with file ID {file_id}. Allowed values in the destinations column ", + "are {.fmt_categories(files$destinations$categories)} whereas we found a + destination ", "{invalid_destinations}. Ignoring file." + )) +} + +.fmt_file_type_categories <- function() { + return(paste(files_sheet_metadata$file_type$categories, collapse = ", ")) +} + +.fmt_categories <- function(categories) { + return(paste(files_sheet_metadata$file_type$categories, collapse = ", ")) +} diff --git a/release-automation/man/create_release_files.Rd b/release-automation/man/create_release_files.Rd new file mode 100644 index 0000000..73f4f67 --- /dev/null +++ b/release-automation/man/create_release_files.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/files-validation.R +\name{create_release_files} +\alias{create_release_files} +\title{Create release files} +\usage{ +create_release_files(OSF_LINK, OSF_TOKEN, dictionary_path = NULL) +} +\arguments{ +\item{OSF_LINK}{link to the dictionary stored on OSF used for updating.} + +\item{OSF_TOKEN}{OSF auth token used for modifying OSF directories} + +\item{dictionary_path}{optional string containing path to the dictionary directory. When provided no dictionary is downloaded.} +} +\description{ +Creates release files given the user OSF link and auth token. +} diff --git a/release-automation/man/validate_files_sheet.Rd b/release-automation/man/validate_files_sheet.Rd new file mode 100644 index 0000000..f942d5d --- /dev/null +++ b/release-automation/man/validate_files_sheet.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/files-validation.R +\name{validate_files_sheet} +\alias{validate_files_sheet} +\title{Validate files sheet} +\usage{ +validate_files_sheet(dictionary_name, version, dictionary_path) +} +\arguments{ +\item{dictionary_name}{string containing the file dictionary name.} + +\item{version}{string containing the dictionary version.} + +\item{dictionary_path}{string containing path to the dictionary directory.} +} +\value{ +2 lists containing csvs to export and another list containing excels to export. +} +\description{ +Validate files sheet and its internal content for proper file creation. +} diff --git a/release-automation/man/validate_version.Rd b/release-automation/man/validate_version.Rd new file mode 100644 index 0000000..f9d7018 --- /dev/null +++ b/release-automation/man/validate_version.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/files-validation.R +\name{validate_version} +\alias{validate_version} +\title{Validate Version} +\usage{ +validate_version(dictionary_path) +} +\arguments{ +\item{dictionary_path}{string containing path to the dictionary directory.} +} +\value{ +list with: Dictionary file name, Dictionary version, Boolean representing if the versions match. +} +\description{ +Validate dictionary version between file name and summary sheets. +} diff --git a/release-automation/renv/.gitignore b/release-automation/renv/.gitignore new file mode 100644 index 0000000..0ec0cbb --- /dev/null +++ b/release-automation/renv/.gitignore @@ -0,0 +1,7 @@ +library/ +local/ +cellar/ +lock/ +python/ +sandbox/ +staging/ diff --git a/release-automation/renv/activate.R b/release-automation/renv/activate.R new file mode 100644 index 0000000..019b5a6 --- /dev/null +++ b/release-automation/renv/activate.R @@ -0,0 +1,994 @@ + +local({ + + # the requested version of renv + version <- "0.16.0" + + # the project directory + project <- getwd() + + # figure out whether the autoloader is enabled + enabled <- local({ + + # first, check config option + override <- getOption("renv.config.autoloader.enabled") + if (!is.null(override)) + return(override) + + # next, check environment variables + # TODO: prefer using the configuration one in the future + envvars <- c( + "RENV_CONFIG_AUTOLOADER_ENABLED", + "RENV_AUTOLOADER_ENABLED", + "RENV_ACTIVATE_PROJECT" + ) + + for (envvar in envvars) { + envval <- Sys.getenv(envvar, unset = NA) + if (!is.na(envval)) + return(tolower(envval) %in% c("true", "t", "1")) + } + + # enable by default + TRUE + + }) + + if (!enabled) + return(FALSE) + + # avoid recursion + if (identical(getOption("renv.autoloader.running"), TRUE)) { + warning("ignoring recursive attempt to run renv autoloader") + return(invisible(TRUE)) + } + + # signal that we're loading renv during R startup + options(renv.autoloader.running = TRUE) + on.exit(options(renv.autoloader.running = NULL), add = TRUE) + + # signal that we've consented to use renv + options(renv.consent = TRUE) + + # load the 'utils' package eagerly -- this ensures that renv shims, which + # mask 'utils' packages, will come first on the search path + library(utils, lib.loc = .Library) + + # unload renv if it's already been loaded + if ("renv" %in% loadedNamespaces()) + unloadNamespace("renv") + + # load bootstrap tools + `%||%` <- function(x, y) { + if (is.environment(x) || length(x)) x else y + } + + bootstrap <- function(version, library) { + + # attempt to download renv + tarball <- tryCatch(renv_bootstrap_download(version), error = identity) + if (inherits(tarball, "error")) + stop("failed to download renv ", version) + + # now attempt to install + status <- tryCatch(renv_bootstrap_install(version, tarball, library), error = identity) + if (inherits(status, "error")) + stop("failed to install renv ", version) + + } + + renv_bootstrap_tests_running <- function() { + getOption("renv.tests.running", default = FALSE) + } + + renv_bootstrap_repos <- function() { + + # check for repos override + repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) + if (!is.na(repos)) + return(repos) + + # check for lockfile repositories + repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity) + if (!inherits(repos, "error") && length(repos)) + return(repos) + + # if we're testing, re-use the test repositories + if (renv_bootstrap_tests_running()) + return(getOption("renv.tests.repos")) + + # retrieve current repos + repos <- getOption("repos") + + # ensure @CRAN@ entries are resolved + repos[repos == "@CRAN@"] <- getOption( + "renv.repos.cran", + "https://cloud.r-project.org" + ) + + # add in renv.bootstrap.repos if set + default <- c(FALLBACK = "https://cloud.r-project.org") + extra <- getOption("renv.bootstrap.repos", default = default) + repos <- c(repos, extra) + + # remove duplicates that might've snuck in + dupes <- duplicated(repos) | duplicated(names(repos)) + repos[!dupes] + + } + + renv_bootstrap_repos_lockfile <- function() { + + lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock") + if (!file.exists(lockpath)) + return(NULL) + + lockfile <- tryCatch(renv_json_read(lockpath), error = identity) + if (inherits(lockfile, "error")) { + warning(lockfile) + return(NULL) + } + + repos <- lockfile$R$Repositories + if (length(repos) == 0) + return(NULL) + + keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1)) + vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1)) + names(vals) <- keys + + return(vals) + + } + + renv_bootstrap_download <- function(version) { + + # if the renv version number has 4 components, assume it must + # be retrieved via github + nv <- numeric_version(version) + components <- unclass(nv)[[1]] + + # if this appears to be a development version of 'renv', we'll + # try to restore from github + dev <- length(components) == 4L + + # begin collecting different methods for finding renv + methods <- c( + renv_bootstrap_download_tarball, + if (dev) + renv_bootstrap_download_github + else c( + renv_bootstrap_download_cran_latest, + renv_bootstrap_download_cran_archive + ) + ) + + for (method in methods) { + path <- tryCatch(method(version), error = identity) + if (is.character(path) && file.exists(path)) + return(path) + } + + stop("failed to download renv ", version) + + } + + renv_bootstrap_download_impl <- function(url, destfile) { + + mode <- "wb" + + # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 + fixup <- + Sys.info()[["sysname"]] == "Windows" && + substring(url, 1L, 5L) == "file:" + + if (fixup) + mode <- "w+b" + + args <- list( + url = url, + destfile = destfile, + mode = mode, + quiet = TRUE + ) + + if ("headers" %in% names(formals(utils::download.file))) + args$headers <- renv_bootstrap_download_custom_headers(url) + + do.call(utils::download.file, args) + + } + + renv_bootstrap_download_custom_headers <- function(url) { + + headers <- getOption("renv.download.headers") + if (is.null(headers)) + return(character()) + + if (!is.function(headers)) + stopf("'renv.download.headers' is not a function") + + headers <- headers(url) + if (length(headers) == 0L) + return(character()) + + if (is.list(headers)) + headers <- unlist(headers, recursive = FALSE, use.names = TRUE) + + ok <- + is.character(headers) && + is.character(names(headers)) && + all(nzchar(names(headers))) + + if (!ok) + stop("invocation of 'renv.download.headers' did not return a named character vector") + + headers + + } + + renv_bootstrap_download_cran_latest <- function(version) { + + spec <- renv_bootstrap_download_cran_latest_find(version) + type <- spec$type + repos <- spec$repos + + message("* Downloading renv ", version, " ... ", appendLF = FALSE) + + baseurl <- utils::contrib.url(repos = repos, type = type) + ext <- if (identical(type, "source")) + ".tar.gz" + else if (Sys.info()[["sysname"]] == "Windows") + ".zip" + else + ".tgz" + name <- sprintf("renv_%s%s", version, ext) + url <- paste(baseurl, name, sep = "/") + + destfile <- file.path(tempdir(), name) + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (inherits(status, "condition")) { + message("FAILED") + return(FALSE) + } + + # report success and return + message("OK (downloaded ", type, ")") + destfile + + } + + renv_bootstrap_download_cran_latest_find <- function(version) { + + # check whether binaries are supported on this system + binary <- + getOption("renv.bootstrap.binary", default = TRUE) && + !identical(.Platform$pkgType, "source") && + !identical(getOption("pkgType"), "source") && + Sys.info()[["sysname"]] %in% c("Darwin", "Windows") + + types <- c(if (binary) "binary", "source") + + # iterate over types + repositories + for (type in types) { + for (repos in renv_bootstrap_repos()) { + + # retrieve package database + db <- tryCatch( + as.data.frame( + utils::available.packages(type = type, repos = repos), + stringsAsFactors = FALSE + ), + error = identity + ) + + if (inherits(db, "error")) + next + + # check for compatible entry + entry <- db[db$Package %in% "renv" & db$Version %in% version, ] + if (nrow(entry) == 0) + next + + # found it; return spec to caller + spec <- list(entry = entry, type = type, repos = repos) + return(spec) + + } + } + + # if we got here, we failed to find renv + fmt <- "renv %s is not available from your declared package repositories" + stop(sprintf(fmt, version)) + + } + + renv_bootstrap_download_cran_archive <- function(version) { + + name <- sprintf("renv_%s.tar.gz", version) + repos <- renv_bootstrap_repos() + urls <- file.path(repos, "src/contrib/Archive/renv", name) + destfile <- file.path(tempdir(), name) + + message("* Downloading renv ", version, " ... ", appendLF = FALSE) + + for (url in urls) { + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (identical(status, 0L)) { + message("OK") + return(destfile) + } + + } + + message("FAILED") + return(FALSE) + + } + + renv_bootstrap_download_tarball <- function(version) { + + # if the user has provided the path to a tarball via + # an environment variable, then use it + tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA) + if (is.na(tarball)) + return() + + # allow directories + info <- file.info(tarball, extra_cols = FALSE) + if (identical(info$isdir, TRUE)) { + name <- sprintf("renv_%s.tar.gz", version) + tarball <- file.path(tarball, name) + } + + # bail if it doesn't exist + if (!file.exists(tarball)) { + + # let the user know we weren't able to honour their request + fmt <- "* RENV_BOOTSTRAP_TARBALL is set (%s) but does not exist." + msg <- sprintf(fmt, tarball) + warning(msg) + + # bail + return() + + } + + fmt <- "* Bootstrapping with tarball at path '%s'." + msg <- sprintf(fmt, tarball) + message(msg) + + tarball + + } + + renv_bootstrap_download_github <- function(version) { + + enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") + if (!identical(enabled, "TRUE")) + return(FALSE) + + # prepare download options + pat <- Sys.getenv("GITHUB_PAT") + if (nzchar(Sys.which("curl")) && nzchar(pat)) { + fmt <- "--location --fail --header \"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "curl", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } else if (nzchar(Sys.which("wget")) && nzchar(pat)) { + fmt <- "--header=\"Authorization: token %s\"" + extra <- sprintf(fmt, pat) + saved <- options("download.file.method", "download.file.extra") + options(download.file.method = "wget", download.file.extra = extra) + on.exit(do.call(base::options, saved), add = TRUE) + } + + message("* Downloading renv ", version, " from GitHub ... ", appendLF = FALSE) + + url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) + name <- sprintf("renv_%s.tar.gz", version) + destfile <- file.path(tempdir(), name) + + status <- tryCatch( + renv_bootstrap_download_impl(url, destfile), + condition = identity + ) + + if (!identical(status, 0L)) { + message("FAILED") + return(FALSE) + } + + message("OK") + return(destfile) + + } + + renv_bootstrap_install <- function(version, tarball, library) { + + # attempt to install it into project library + message("* Installing renv ", version, " ... ", appendLF = FALSE) + dir.create(library, showWarnings = FALSE, recursive = TRUE) + + # invoke using system2 so we can capture and report output + bin <- R.home("bin") + exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" + r <- file.path(bin, exe) + + args <- c( + "--vanilla", "CMD", "INSTALL", "--no-multiarch", + "-l", shQuote(path.expand(library)), + shQuote(path.expand(tarball)) + ) + + output <- system2(r, args, stdout = TRUE, stderr = TRUE) + message("Done!") + + # check for successful install + status <- attr(output, "status") + if (is.numeric(status) && !identical(status, 0L)) { + header <- "Error installing renv:" + lines <- paste(rep.int("=", nchar(header)), collapse = "") + text <- c(header, lines, output) + writeLines(text, con = stderr()) + } + + status + + } + + renv_bootstrap_platform_prefix <- function() { + + # construct version prefix + version <- paste(R.version$major, R.version$minor, sep = ".") + prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") + + # include SVN revision for development versions of R + # (to avoid sharing platform-specific artefacts with released versions of R) + devel <- + identical(R.version[["status"]], "Under development (unstable)") || + identical(R.version[["nickname"]], "Unsuffered Consequences") + + if (devel) + prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") + + # build list of path components + components <- c(prefix, R.version$platform) + + # include prefix if provided by user + prefix <- renv_bootstrap_platform_prefix_impl() + if (!is.na(prefix) && nzchar(prefix)) + components <- c(prefix, components) + + # build prefix + paste(components, collapse = "/") + + } + + renv_bootstrap_platform_prefix_impl <- function() { + + # if an explicit prefix has been supplied, use it + prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) + if (!is.na(prefix)) + return(prefix) + + # if the user has requested an automatic prefix, generate it + auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) + if (auto %in% c("TRUE", "True", "true", "1")) + return(renv_bootstrap_platform_prefix_auto()) + + # empty string on failure + "" + + } + + renv_bootstrap_platform_prefix_auto <- function() { + + prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) + if (inherits(prefix, "error") || prefix %in% "unknown") { + + msg <- paste( + "failed to infer current operating system", + "please file a bug report at https://github.com/rstudio/renv/issues", + sep = "; " + ) + + warning(msg) + + } + + prefix + + } + + renv_bootstrap_platform_os <- function() { + + sysinfo <- Sys.info() + sysname <- sysinfo[["sysname"]] + + # handle Windows + macOS up front + if (sysname == "Windows") + return("windows") + else if (sysname == "Darwin") + return("macos") + + # check for os-release files + for (file in c("/etc/os-release", "/usr/lib/os-release")) + if (file.exists(file)) + return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) + + # check for redhat-release files + if (file.exists("/etc/redhat-release")) + return(renv_bootstrap_platform_os_via_redhat_release()) + + "unknown" + + } + + renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { + + # read /etc/os-release + release <- utils::read.table( + file = file, + sep = "=", + quote = c("\"", "'"), + col.names = c("Key", "Value"), + comment.char = "#", + stringsAsFactors = FALSE + ) + + vars <- as.list(release$Value) + names(vars) <- release$Key + + # get os name + os <- tolower(sysinfo[["sysname"]]) + + # read id + id <- "unknown" + for (field in c("ID", "ID_LIKE")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + id <- vars[[field]] + break + } + } + + # read version + version <- "unknown" + for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { + if (field %in% names(vars) && nzchar(vars[[field]])) { + version <- vars[[field]] + break + } + } + + # join together + paste(c(os, id, version), collapse = "-") + + } + + renv_bootstrap_platform_os_via_redhat_release <- function() { + + # read /etc/redhat-release + contents <- readLines("/etc/redhat-release", warn = FALSE) + + # infer id + id <- if (grepl("centos", contents, ignore.case = TRUE)) + "centos" + else if (grepl("redhat", contents, ignore.case = TRUE)) + "redhat" + else + "unknown" + + # try to find a version component (very hacky) + version <- "unknown" + + parts <- strsplit(contents, "[[:space:]]")[[1L]] + for (part in parts) { + + nv <- tryCatch(numeric_version(part), error = identity) + if (inherits(nv, "error")) + next + + version <- nv[1, 1] + break + + } + + paste(c("linux", id, version), collapse = "-") + + } + + renv_bootstrap_library_root_name <- function(project) { + + # use project name as-is if requested + asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") + if (asis) + return(basename(project)) + + # otherwise, disambiguate based on project's path + id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) + paste(basename(project), id, sep = "-") + + } + + renv_bootstrap_library_root <- function(project) { + + prefix <- renv_bootstrap_profile_prefix() + + path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) + if (!is.na(path)) + return(paste(c(path, prefix), collapse = "/")) + + path <- renv_bootstrap_library_root_impl(project) + if (!is.null(path)) { + name <- renv_bootstrap_library_root_name(project) + return(paste(c(path, prefix, name), collapse = "/")) + } + + renv_bootstrap_paths_renv("library", project = project) + + } + + renv_bootstrap_library_root_impl <- function(project) { + + root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) + if (!is.na(root)) + return(root) + + type <- renv_bootstrap_project_type(project) + if (identical(type, "package")) { + userdir <- renv_bootstrap_user_dir() + return(file.path(userdir, "library")) + } + + } + + renv_bootstrap_validate_version <- function(version) { + + loadedversion <- utils::packageDescription("renv", fields = "Version") + if (version == loadedversion) + return(TRUE) + + # assume four-component versions are from GitHub; three-component + # versions are from CRAN + components <- strsplit(loadedversion, "[.-]")[[1]] + remote <- if (length(components) == 4L) + paste("rstudio/renv", loadedversion, sep = "@") + else + paste("renv", loadedversion, sep = "@") + + fmt <- paste( + "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.", + "Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.", + "Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.", + sep = "\n" + ) + + msg <- sprintf(fmt, loadedversion, version, remote) + warning(msg, call. = FALSE) + + FALSE + + } + + renv_bootstrap_hash_text <- function(text) { + + hashfile <- tempfile("renv-hash-") + on.exit(unlink(hashfile), add = TRUE) + + writeLines(text, con = hashfile) + tools::md5sum(hashfile) + + } + + renv_bootstrap_load <- function(project, libpath, version) { + + # try to load renv from the project library + if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) + return(FALSE) + + # warn if the version of renv loaded does not match + renv_bootstrap_validate_version(version) + + # load the project + renv::load(project) + + TRUE + + } + + renv_bootstrap_profile_load <- function(project) { + + # if RENV_PROFILE is already set, just use that + profile <- Sys.getenv("RENV_PROFILE", unset = NA) + if (!is.na(profile) && nzchar(profile)) + return(profile) + + # check for a profile file (nothing to do if it doesn't exist) + path <- renv_bootstrap_paths_renv("profile", profile = FALSE, project = project) + if (!file.exists(path)) + return(NULL) + + # read the profile, and set it if it exists + contents <- readLines(path, warn = FALSE) + if (length(contents) == 0L) + return(NULL) + + # set RENV_PROFILE + profile <- contents[[1L]] + if (!profile %in% c("", "default")) + Sys.setenv(RENV_PROFILE = profile) + + profile + + } + + renv_bootstrap_profile_prefix <- function() { + profile <- renv_bootstrap_profile_get() + if (!is.null(profile)) + return(file.path("profiles", profile, "renv")) + } + + renv_bootstrap_profile_get <- function() { + profile <- Sys.getenv("RENV_PROFILE", unset = "") + renv_bootstrap_profile_normalize(profile) + } + + renv_bootstrap_profile_set <- function(profile) { + profile <- renv_bootstrap_profile_normalize(profile) + if (is.null(profile)) + Sys.unsetenv("RENV_PROFILE") + else + Sys.setenv(RENV_PROFILE = profile) + } + + renv_bootstrap_profile_normalize <- function(profile) { + + if (is.null(profile) || profile %in% c("", "default")) + return(NULL) + + profile + + } + + renv_bootstrap_path_absolute <- function(path) { + + substr(path, 1L, 1L) %in% c("~", "/", "\\") || ( + substr(path, 1L, 1L) %in% c(letters, LETTERS) && + substr(path, 2L, 3L) %in% c(":/", ":\\") + ) + + } + + renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) { + renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv") + root <- if (renv_bootstrap_path_absolute(renv)) NULL else project + prefix <- if (profile) renv_bootstrap_profile_prefix() + components <- c(root, renv, prefix, ...) + paste(components, collapse = "/") + } + + renv_bootstrap_project_type <- function(path) { + + descpath <- file.path(path, "DESCRIPTION") + if (!file.exists(descpath)) + return("unknown") + + desc <- tryCatch( + read.dcf(descpath, all = TRUE), + error = identity + ) + + if (inherits(desc, "error")) + return("unknown") + + type <- desc$Type + if (!is.null(type)) + return(tolower(type)) + + package <- desc$Package + if (!is.null(package)) + return("package") + + "unknown" + + } + + renv_bootstrap_user_dir <- function() { + dir <- renv_bootstrap_user_dir_impl() + path.expand(chartr("\\", "/", dir)) + } + + renv_bootstrap_user_dir_impl <- function() { + + # use local override if set + override <- getOption("renv.userdir.override") + if (!is.null(override)) + return(override) + + # use R_user_dir if available + tools <- asNamespace("tools") + if (is.function(tools$R_user_dir)) + return(tools$R_user_dir("renv", "cache")) + + # try using our own backfill for older versions of R + envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME") + for (envvar in envvars) { + root <- Sys.getenv(envvar, unset = NA) + if (!is.na(root)) + return(file.path(root, "R/renv")) + } + + # use platform-specific default fallbacks + if (Sys.info()[["sysname"]] == "Windows") + file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv") + else if (Sys.info()[["sysname"]] == "Darwin") + "~/Library/Caches/org.R-project.R/R/renv" + else + "~/.cache/R/renv" + + } + + + renv_json_read <- function(file = NULL, text = NULL) { + + # if jsonlite is loaded, use that instead + if ("jsonlite" %in% loadedNamespaces()) + renv_json_read_jsonlite(file, text) + else + renv_json_read_default(file, text) + + } + + renv_json_read_jsonlite <- function(file = NULL, text = NULL) { + text <- paste(text %||% read(file), collapse = "\n") + jsonlite::fromJSON(txt = text, simplifyVector = FALSE) + } + + renv_json_read_default <- function(file = NULL, text = NULL) { + + # find strings in the JSON + text <- paste(text %||% read(file), collapse = "\n") + pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' + locs <- gregexpr(pattern, text, perl = TRUE)[[1]] + + # if any are found, replace them with placeholders + replaced <- text + strings <- character() + replacements <- character() + + if (!identical(c(locs), -1L)) { + + # get the string values + starts <- locs + ends <- locs + attr(locs, "match.length") - 1L + strings <- substring(text, starts, ends) + + # only keep those requiring escaping + strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE) + + # compute replacements + replacements <- sprintf('"\032%i\032"', seq_along(strings)) + + # replace the strings + mapply(function(string, replacement) { + replaced <<- sub(string, replacement, replaced, fixed = TRUE) + }, strings, replacements) + + } + + # transform the JSON into something the R parser understands + transformed <- replaced + transformed <- gsub("{}", "`names<-`(list(), character())", transformed, fixed = TRUE) + transformed <- gsub("[[{]", "list(", transformed, perl = TRUE) + transformed <- gsub("[]}]", ")", transformed, perl = TRUE) + transformed <- gsub(":", "=", transformed, fixed = TRUE) + text <- paste(transformed, collapse = "\n") + + # parse it + json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]] + + # construct map between source strings, replaced strings + map <- as.character(parse(text = strings)) + names(map) <- as.character(parse(text = replacements)) + + # convert to list + map <- as.list(map) + + # remap strings in object + remapped <- renv_json_remap(json, map) + + # evaluate + eval(remapped, envir = baseenv()) + + } + + renv_json_remap <- function(json, map) { + + # fix names + if (!is.null(names(json))) { + lhs <- match(names(json), names(map), nomatch = 0L) + rhs <- match(names(map), names(json), nomatch = 0L) + names(json)[rhs] <- map[lhs] + } + + # fix values + if (is.character(json)) + return(map[[json]] %||% json) + + # handle true, false, null + if (is.name(json)) { + text <- as.character(json) + if (text == "true") + return(TRUE) + else if (text == "false") + return(FALSE) + else if (text == "null") + return(NULL) + } + + # recurse + if (is.recursive(json)) { + for (i in seq_along(json)) { + json[i] <- list(renv_json_remap(json[[i]], map)) + } + } + + json + + } + + # load the renv profile, if any + renv_bootstrap_profile_load(project) + + # construct path to library root + root <- renv_bootstrap_library_root(project) + + # construct library prefix for platform + prefix <- renv_bootstrap_platform_prefix() + + # construct full libpath + libpath <- file.path(root, prefix) + + # attempt to load + if (renv_bootstrap_load(project, libpath, version)) + return(TRUE) + + # load failed; inform user we're about to bootstrap + prefix <- paste("# Bootstrapping renv", version) + postfix <- paste(rep.int("-", 77L - nchar(prefix)), collapse = "") + header <- paste(prefix, postfix) + message(header) + + # perform bootstrap + bootstrap(version, libpath) + + # exit early if we're just testing bootstrap + if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) + return(TRUE) + + # try again to load + if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { + message("* Successfully installed and loaded renv ", version, ".") + return(renv::load()) + } + + # failed to download or load renv; warn the user + msg <- c( + "Failed to find an renv installation: the project will not be loaded.", + "Use `renv::activate()` to re-initialize the project." + ) + + warning(paste(msg, collapse = "\n"), call. = FALSE) + +}) diff --git a/release-automation/renv/settings.dcf b/release-automation/renv/settings.dcf new file mode 100644 index 0000000..169d82f --- /dev/null +++ b/release-automation/renv/settings.dcf @@ -0,0 +1,10 @@ +bioconductor.version: +external.libraries: +ignored.packages: +package.dependency.fields: Imports, Depends, LinkingTo +r.version: +snapshot.type: implicit +use.cache: TRUE +vcs.ignore.cellar: TRUE +vcs.ignore.library: TRUE +vcs.ignore.local: TRUE diff --git a/release-automation/specs/release-automation.qmd b/release-automation/specs/release-automation.qmd new file mode 100644 index 0000000..3170458 --- /dev/null +++ b/release-automation/specs/release-automation.qmd @@ -0,0 +1,288 @@ +--- +editor: + markdown: + wrap: sentence +--- + +# Release Automation + +This document will go over the specifications for a software system to automate the release process for the Ottawa Data Model (ODM). + +## Audience + +The primary audience for this document are software engineers who will be responsible for developing the system. + +## Context + +Wastewater surveillance enables public health departments to monitor communities for possible outbreaks of different infectious diseases using wastewater samples, most notably the different variants of the COVID-19 virus. +The ODM dictionary is an open source data model used to represent wastewater surveillance data with all its documentation available [online](https://github.com/Big-Life-Lab/PHES-ODM). + +Practically, the dictionary is implemented as an Excel document. +Although the main purpose of the Excel sheet is to Excelent the data model details in a machine actionablemachine-actionableains other sheets, for example data templates that make it easy for users to input their wastewater data. + +Releasing a new version of the dictionary is a laborious process that requires converting the Excel document to multiple output formats. +In addition, the different release files are uploaded to multiple release locations. +Details about the release process are available [online](https://odm.discourse.group/t/generation-of-tables-and-lists-from-the-odm-working-excel-file/99/7). +The current manual process of implementing a release takes time away from the dictionary developers and is susceanderrors. +Automating this process would increase the release's quality, as well as give back time to the dictionary staff. + +## User interactions + +The user will interact with the software system in two ways: + +1. **Trigerring a release**: The user will use the GitHub actions tab to start a new release. The steps are outlined in [this diagram](./trigerring-a-release.puml); and +2. **Merging a release**: Once the user is happy with the release changes, they can merge their release by merging the release PR. The steps are outlines in [this diagram](./merging-a-release.puml). + +## Software Constraints + +- The software system will use GitHub actions as its continuous integration tool. +- The software system will be written in R or Python. + +## Features + +### RA-1: Trigerring the Process + +A user will manually trigger the release process from the [Github Actions tab](https://github.com/Big-Life-Lab/PHES-ODM/actions) in the [PHES-ODM repo](https://github.com/Big-Life-Lab/PHES-ODM). +The following inputs will need to be provided by the user: + +1. Link to the Excel dictionary to use for the release. Currently, only links to an OSF repo are allowed. + + The Excel dictionary used for the release is in the OSF.io `Developer dictionaries/New version` folder (https://osf.io/sxuaf/). The developer's version of the Excel dictionary is used. I.e. `ODM_dev-dictionary-2.0.0.xlsx` + +2. The OSF personal access token to use. The system will need this to gain access to the repo and perform operations on it. + +### RA-2: Creating the Release Files + +The first step in each release is the creation of the different files that form the develop copy of the dictionary. +The orginal copy of the Excel files in on OHRI sharepoint. +The dictionary staff will manually copy the dictionary from Sharepoint and upload the copy to the OSF.io `Developer dictionaries/New version` folder. +The files are created from this dictionary Excel document whose link is provided by the user an as input. +In addition, the files tab in the document contains all the metadata needed for this step. + +The structure of the files tab is shown [below](./release-automation.qmd#files-sheet). +Each row in the files sheet represents a file to be created in the release. + +The file name can be constructed using the `[name](./release-automation.qmd#name)` and `[type](./release-automation.qmd#type)` columns in the files sheet. +The [`type`](./release-automation.qmd#type) column decides what the file extension should be, **.csv** for CSV files and **.xlsx** for excel files. + +The [`part`](./release-automation.qmd#part) column determines where the contents of the file comes from or what to fill the file with. +The column can contain an ID for a part or a set which should match up with a row in the parts sheet or sets sheet respectively. +When the column contains a reference to a part, the content of the file should be filled with the sheet in the dictionary that has the same name as that part. +When the column contains a reference to a set, the sheets in the dictionary with the same name as each part in the set should be added as a sheet in the file. +The name of the sheets should match the name of the part it represents. + +The [`addHeader`](./release-automation.qmd#addheaders) column allows the user to add a string as the first line in the file. +Reasons for doing this are explained [here](https://odm.discourse.group/t/generation-of-tables-and-lists-from-the-odm-working-excel-file/99/9). +Each header should be added as a cell in the first row of the sheet. + +For example, consider the following release file, + +| A | B | +|-----|-----| +| 1 | 2 | + +If the value of the `addHeader` column is `version;1.1.0;name;John Doe`, then the release file would be modified as below, + +| version | 1.1.0 | name | John Doe | +|---------|-------|------|----------| +| A | B | | | +| 1 | 2 | | | + +### RA-3: Deploying the files to GitHub + +Once the release files have been built they will need to be uploaded to their release destinations. All of this information is encoded in the [`destinations`](./#destinations) column in the [`files`](./#files-sheet) sheet. + +Files whose [`destinations`](./#destinations) column contains the `github` keyword will need to be uploaded to the [PHES-ODM repo](https://github.com/Big-Life-Lab/PHES-ODM). The [`githubLocation`](./#githubLocation) column identifies the path where the file should be uploaded. + +The following two states will need to be handled + +1. When there are no release files on GitHub. + The files should be created and put in their correct locations. + A branch should be created from `main` and named `release-{version}` and files put in there. + A commit should be made with the new files called `[BOT] release-{version}` + A PR should be made from the new branch into `main`. The PR should be called `[BOT] Release {version}` +2. When there is a release version on GitHub + 2.1. If the previous release is newer than the new release, then an error should be thrown and the entire process should stop. + 2.2: Otherwise, all the old files need to be deleted. The same steps as the first state need to be followed + +Finally, for every new release any existing release branches need to be deleted and their PRs need to be closed. + +### RA-4: Deploying the files to OSF + +Similar to deploying files to OSF, files whose [`destinations`](./release-automation.qmd#destinations) column contains the `osf` keyword need to be uploaded to OSF. +The `osfLocation` folder identifies the path where the file should be uploaded. + +The deployment to OSF should take place only when the release branch on GitHub has been merged to `main`. + +There are three states that need to handled when deplying the files to OSF, + +1. When there are no release files on OSF. This means that this is the first release of the dictionary and all the files should be created and put in their correct location. +2. When there is a previous release on OSF whose version is not the same as the new release. 2.1. If the previous release is newer than the new release, then an error should be thrown and the entire process should stop. 2.2: Otherwise, all the old files need to be moved to a sub folder within an archive folder. The name of the sub folder should be the previous release version. Within the sub folder, the previous release files should be placed in their old paths. From there, the new files should created and put in their correct location. +3. When there is a previous release on OSF whose version is the same as the new release. All the old files should be deleted. The new files should be created and put in their correct location. + +### RA-5: Trigger a PR in the PHES-ODM-Doc repo + +Once the upload has been completed to all relevant destinations, a workflow should be trigged in the [PHES-ODM-Doc](https://github.com/Big-Life-Lab/PHES-ODM-Doc). +This will allow the documentation repo to update itself with the new files. + +### RA-6: Trigger a PR in the PHES-ODM-Validation repo + +Once the PR has been created in the PHES-ODM repo, a workflow will need to be trigged in the [PHES-ODM-Validation](https://github.com/Big-Life-Lab/PHES-ODM-Validation) repo to allow it to update to the new dictionary files. + +## Errors + +### InvalidDestinations + +Error generated when the `destinations` column has a value that is not one of +accepted categories. + +## Reference + +This section contains reference material used throughout the document. + +### Sheet Data Types + +This section goes over the data types that each column in a sheet can be encoded as. +Although all sheet files, for example CSV and Excel, are read in as a string, these data types build on top of that encoding to simulate other data types. +The data types are: + +#### string + +#### templateString + +A string with placeholders for data that will need to be filled in by a program. +The placeholders are identified by opening and closing curly braces. + +For example, consider the template string "The file version is {version}". +It has only one variable, `version`, which will need to be filled in. + +The full list of allowed variables are documented in the [template variables section](./#template-variables). + +#### categorical + +A column with only a certain number of allowed values. + +For example, a categorical column that encodes the type of a pet could have the categories "dog" and "cat" + +#### list + +A column that encodes multiple values + +The multiple values are seperated by a semi-colon (;) + +For example, a column that encodes the names of a person's pets could have the value "Roscoe;Amy". + +#### nullable + +An addon type that allows a column to have null values. + +Null values are encoded as `N/A` + +### Template Variables + +#### version + +The current release version + +Can be obtained from the `version` column in the `summary` sheet in the dictionary + +This variable should be set to the latest version in the version column + +### Files Sheet + +This section documents details about the different columns in the files sheet in the dictionary. +This is the sheet that contains metadata used to build and deploy the release files. + +Unless otherwise stated, all columns are required + +#### ID + +The unique identifier for this file. +Mainly used as the primary key for the sheet. + +type: [string](./#string) + +#### label + +Human readable description for the file + +type: [string](./#string), [nullable](./#nullable) + +#### name + +The name of the file in the release + +type: [templateString](./#templatestring), [non-nullable](./#nullable) + + +#### type + +The file type + +type: [categorical](./#categorical), [non-nullable](./#nullable) + + +categories: + +- excel +- csv + +#### part + +The name of the part that identifies what sheet(s) from the dictionary should be included in the file + +type: [string](./#string), [non-nullable](./#nullable) + +Validations: + +* The value should set to a `set` or a `part` +* The value can be sey to a `set` only if the [type](./#type) column is `excel` + +#### addHeaders + +The contents of an optional header row to add as the first line in the file. +Each header should added as a cell in the first row. + +type: [list](./#list) of [templateString](./#templatestring), [nullable](./#nullable) + +#### destinations + +Where the file will be uploaded to + +type: [list](./#list) of [categorical](./#categorical), [non-nullable](./#nullable) + +Categories + +* osf +* github + +Validations + +* Has to have at least one destination + +#### osfLocation + +The path for the file on OSF + +type: [string](./#string), [nullable](./#nullable) + +Validations: + +* Required if one of the destinations is osf + +#### githubLocation + +The path for the file on GitHub. + +This path is relative to the root of the +[PHES-ODM repository](https://github.com/Big-Life-Lab/PHES-ODM). For example, +if the `githubLocation` value is `data/raw`, then a folder called `data` should +be created within the root of the repository, within which a folder called +`raw` should be created. + +type: [string](./#string), [nullable](./#nullable) + +Validations: + +* Required if one of the destinations is Github + diff --git a/release-automation/tests/testthat.R b/release-automation/tests/testthat.R new file mode 100644 index 0000000..dceea32 --- /dev/null +++ b/release-automation/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(PHES.ODM) + +test_check("PHES.ODM") diff --git a/release-automation/tests/testthat/helper-create-odm-dictionary.R b/release-automation/tests/testthat/helper-create-odm-dictionary.R new file mode 100644 index 0000000..a27f113 --- /dev/null +++ b/release-automation/tests/testthat/helper-create-odm-dictionary.R @@ -0,0 +1,12 @@ +create_odm_dictionary <- function(dictionary_df) { + odm_dictionary <- openxlsx::createWorkbook("ODM-dictionary") + for (sheet_name in names(dictionary_df)) { + openxlsx::addWorksheet(odm_dictionary, sheet_name) + openxlsx::writeData( + odm_dictionary, + sheet_name, + dictionary_df[[sheet_name]] + ) + } + return(odm_dictionary) +} diff --git a/release-automation/tests/testthat/test-create-files.R b/release-automation/tests/testthat/test-create-files.R new file mode 100644 index 0000000..5e72313 --- /dev/null +++ b/release-automation/tests/testthat/test-create-files.R @@ -0,0 +1,110 @@ +test_that("Creating a CSV file", { + odm_dictionary_df <- list( + "summary" = data.frame( + version = c("1.0.0") + ), + "files" = data.frame( + fileID = c("partsSheet"), + label = c(""), + name = c("ODM_parts_{version}"), + fileType = c("csv"), + partID = c("parts"), + addHeaders = c("version;{version}"), + destinations = c("github"), + osfLocation = c("N/A"), + githubLocation = c("dictionary-tables/") + ), + parts = data.frame( + partID = c("parts") + ), + sets = data.frame(setID = c("sets")) + ) + odm_dictionary <- create_odm_dictionary(odm_dictionary_df) + parse_result <- parse_files_sheet(odm_dictionary) + + file_creation_path <- "../assets/files" + withr::defer({ + unlink(file_creation_path, recursive = TRUE) + }) + + create_files(parse_result$parsed_files, odm_dictionary, file_creation_path) + + csv_file_path <- file.path( + file_creation_path, + "/dictionary-tables/ODM_parts_1.0.0.csv" + ) + expect_equal( + file.exists(csv_file_path), + TRUE + ) + expect_equal( + read.csv(csv_file_path), + data.frame( + version = c("partID", "parts"), + X1.0.0 = c(NA, NA) + ) + ) +}) + +test_that("Creating an Excel file", { + odm_dictionary_df <- list( + "summary" = data.frame( + version = c("1.0.0") + ), + "files" = data.frame( + fileID = c("dictionary"), + label = c(""), + name = c("ODM_parts_{version}"), + fileType = c("excel"), + partID = c("dictionarySheets"), + addHeaders = c("version;{version}"), + destinations = c("github"), + osfLocation = c("N/A"), + githubLocation = c("dictionary-tables/") + ), + parts = data.frame( + partID = c("parts", "sets", "dictionarySheets") + ), + sets = data.frame( + setID = c("dictionarySheets", "dictionarySheets"), + partID = c("parts", "sets") + ) + ) + odm_dictionary <- create_odm_dictionary(odm_dictionary_df) + parse_result <- parse_files_sheet(odm_dictionary) + + file_creation_path <- "../assets/files" + withr::defer({ + unlink(file_creation_path, recursive = TRUE) + }) + + create_files(parse_result$parsed_files, odm_dictionary, file_creation_path) + + excel_file_path <- file.path( + file_creation_path, + "/dictionary-tables/ODM_parts_1.0.0.xlsx" + ) + expect_equal( + file.exists(excel_file_path), + TRUE + ) + created_excel <- openxlsx::loadWorkbook( + file.path(excel_file_path) + ) + expect_equal( + openxlsx::readWorkbook(created_excel, "parts"), + data.frame( + version = c("partID", "parts", "sets", "dictionarySheets"), + "1.0.0" = c("", "", "", ""), + check.names = FALSE + ) + ) + expect_equal( + openxlsx::readWorkbook(created_excel, "sets"), + data.frame( + version = c("setID", "dictionarySheets", "dictionarySheets"), + "1.0.0" = c("partID", "parts", "sets"), + check.names = FALSE + ) + ) +}) diff --git a/release-automation/tests/testthat/test-parse_files_sheet.R b/release-automation/tests/testthat/test-parse_files_sheet.R new file mode 100644 index 0000000..46f0a46 --- /dev/null +++ b/release-automation/tests/testthat/test-parse_files_sheet.R @@ -0,0 +1,123 @@ +test_that("Correctly parse CSV files", { + odm_dictionary_df <- list( + "summary" = data.frame( + version = c("1.0.0") + ), + "files" = data.frame( + fileID = c("partsSheet"), + label = c(""), + name = c("ODM_parts_{version}"), + fileType = c("csv"), + partID = c("parts"), + addHeaders = c("version;{version}"), + destinations = c("github"), + osfLocation = c("N/A"), + githubLocation = c("dictionary-tables/") + ), + parts = data.frame( + partID = c("parts") + ), + sets = data.frame(setID = c("sets")) + ) + odm_dictionary <- create_odm_dictionary(odm_dictionary_df) + parse_result <- parse_files_sheet(odm_dictionary) + + expected_parsed_files <- list( + partsSheet = list( + file_name = "ODM_parts_1.0.0", + file_type = "csv", + sheet_names = c("parts"), + add_headers = c("version", "1.0.0"), + destinations = c("github"), + osf_location = "N/A", + github_location = "dictionary-tables/" + ) + ) + + expect_equal(parse_result$parsed_files, expected_parsed_files) + expect_length(parse_result$errors, 0) + expect_length(parse_result$warnings, 0) +}) + +test_that("Correctly parse Excel files when partID is a part", { + odm_dictionary_df <- list( + "summary" = data.frame( + version = c("1.0.0") + ), + "files" = data.frame( + fileID = c("partsSheet"), + label = c(""), + name = c("ODM_parts_{version}"), + fileType = c("excel"), + partID = c("parts"), + addHeaders = c("version;{version}"), + destinations = c("github"), + osfLocation = c("N/A"), + githubLocation = c("dictionary-tables/") + ), + parts = data.frame( + partID = c("parts") + ), + sets = data.frame(setID = c("sets")) + ) + odm_dictionary <- create_odm_dictionary(odm_dictionary_df) + parse_result <- parse_files_sheet(odm_dictionary) + + expected_parsed_files <- list( + partsSheet = list( + file_name = "ODM_parts_1.0.0", + file_type = "excel", + sheet_names = c("parts"), + add_headers = c("version", "1.0.0"), + destinations = c("github"), + osf_location = "N/A", + github_location = "dictionary-tables/" + ) + ) + + expect_equal(parse_result$parsed_files, expected_parsed_files) + expect_length(parse_result$errors, 0) + expect_length(parse_result$warnings, 0) +}) + +test_that("Correctly parse Excel files when partID is a set", { + odm_dictionary_df <- list( + "summary" = data.frame( + version = c("1.0.0") + ), + "files" = data.frame( + fileID = c("dictionary"), + label = c(""), + name = c("ODM_parts_{version}"), + fileType = c("excel"), + partID = c("dictionarySheets"), + addHeaders = c("version;{version}"), + destinations = c("github"), + osfLocation = c("N/A"), + githubLocation = c("dictionary-tables/") + ), + parts = data.frame( + partID = c("parts", "sets", "dictionarySheets") + ), + sets = data.frame( + setID = c("dictionarySheets", "dictionarySheets"), + partID = c("parts", "sets") + ) + ) + odm_dictionary <- create_odm_dictionary(odm_dictionary_df) + parse_result <- parse_files_sheet(odm_dictionary) + expected_parsed_files <- list( + dictionary = list( + file_name = "ODM_parts_1.0.0", + file_type = "excel", + sheet_names = c("parts", "sets"), + add_headers = c("version", "1.0.0"), + destinations = c("github"), + osf_location = "N/A", + github_location = "dictionary-tables/" + ) + ) + expect_equal(parse_result$parsed_files, expected_parsed_files) + expect_length(parse_result$errors, 0) + expect_length(parse_result$warnings, 0) +}) diff --git a/release-automation/user-guide/PHES-ODM-automation.qmd b/release-automation/user-guide/PHES-ODM-automation.qmd new file mode 100644 index 0000000..7956baf --- /dev/null +++ b/release-automation/user-guide/PHES-ODM-automation.qmd @@ -0,0 +1,105 @@ +# Release Automation User Guide + +This user guide provides an overview of the software system designed to automate the release process for the Ottawa Data Model (ODM). The system aims to simplify and streamline the release of the ODM dictionary, which is essential for wastewater surveillance and public health monitoring. + +## Table of Contents + +1. [Introduction](#introduction) +2. [Context](#context) +3. [User Interactions](#user-interactions) +3. [Software Constraints](#software-constraints) +4. [Features](#features) + - [RA-1: Triggering the Process](#ra-1-triggering-the-process) + - [RA-2: Creating the Release Files](#ra-2-creating-the-release-files) + - [RA-3: Deploying the Files to GitHub](#ra-3-deploying-the-files-to-github) + - [RA-4: Deploying the Files to OSF](#ra-4-deploying-the-files-to-osf) + - [RA-5: Triggering a PR in the PHES-ODM-Doc repo](#ra-5-triggering-a-pr-in-the-phes-odm-doc-repo) + - [RA-6: Triggering a PR in the PHES-ODM-Validation repo](#ra-6-triggering-a-pr-in-the-phes-odm-validation-repo) +5. [Reference](#reference) + +## 1. Introduction + +The Release Automation system is designed to automate the release process for the Ottawa Data Model (ODM), a critical component of wastewater surveillance for monitoring infectious diseases. This guide explains how to use the system, context, user interactions, software constraints, and features. + +## 2. Context + +The ODM dictionary is an open-source data model used for representing wastewater surveillance data, and its documentation is available online [here](https://github.com/Big-Life-Lab/PHES-ODM-Doc). The dictionary is implemented as an Excel document, and its main purpose is to provide machine-actionable data model details, including data templates for users to input their wastewater data. + +Releasing a new version of the dictionary involves converting the Excel document to multiple output formats and uploading the release files to various locations. The current manual process is time-consuming and error-prone. Automating this process aims to improve the quality of releases and save time for the dictionary staff. + +## 4. User Interactions + +Users will interact with the software system in two main ways: + +1. **Triggering a Release**: Users can initiate a new release using the GitHub Actions tab. This then creates a new release branch to be reviewed in a PR. +2. **Merging a Release**: After reviewing the release changes, users can merge the release by merging the release pull request (PR). Once a release branch is merged a second action is triggered that is responsible for uploading the new files to OSF. + +## 5. Software Constraints + +The software system operates under the following constraints: + +- The system uses GitHub Actions as its continuous integration tool. +- The system is written in R. + +## 6. Features + +The Release Automation system comprises several features to automate the release process. Each feature is identified with a unique code (e.g., RA-1, RA-2). + +### RA-1: Triggering the Process + +To trigger the release process, users need to provide the following inputs: + +1. Link to the OSF repo containing the dictionary. Currently, only links to an OSF repo are allowed. The Excel dictionary used for the release is in the OSF.io "dev-release" folder. For example, "ODM_dev-dictionary-2.0.0.xlsx." +2. The OSF personal access token to use. This token is required for system access to the repository and performing necessary operations. + +Both of these inputs are stored as github secrets: `OSF_REPO_LINK` and `OSF_TOKEN`. + +### RA-2: Creating the Release Files + +The creation of release files is the first step in the release process. These files are generated from the Excel dictionary provided by the user. The "files" tab in the document contains all the metadata needed for this step. The structure of the "files" tab is documented in the "files sheet" section. + +### RA-3: Deploying the Files to GitHub + +Files designated for GitHub deployment are uploaded to the PHES-ODM repo. + +New files should be created and placed in their correct locations. A new branch named "release-{version}" is created from "main," and the files are added to this branch. A commit is made with the new files, labeled "[BOT] release-{version." A pull request (PR) is created from the new branch into "main," titled "[BOT] Release {version." + +### RA-4: Deploying the Files to OSF + +Files designated for OSF deployment are uploaded to OSF repositorie. + +All files are created and placed in their correct locations. And the previes version of the files are archieved. Inside the "Archieved releases" folder inside a subdirectory with name of "release_{version}" + +## 7. Reference + +This section contains reference material used throughout the document, including sheet data types and details about the "files" sheet in the dictionary. + +### Sheet Data Types + +- **string** +- **templateString + +**: A string with placeholders for data to be filled in by a program. +- **categorical**: A column with a limited set of allowed values. +- **list**: A column that encodes multiple values separated by a semicolon. +- **nullable**: Allows a column to have null values. + +### Template Variables + +- **version**: The current release version, obtained from the "version" column in the "summary" sheet in the dictionary. + +### Files Sheet + +This section documents details about the different columns in the "files" sheet in the dictionary. This sheet contains metadata used to build and deploy release files. + +- **ID**: The unique identifier for the file. +- **label**: A human-readable description for the file. +- **name**: The name of the file in the release. It can contain templateString. For example "ODM_{version}.xlsx". +- **type**: The file type (categorical: excel or csv). Currently only these file types are supported. +- **part**: The name of the part that identifies what sheet(s) from the dictionary should be included in the file. In cases where this part is a set an excel file is created with all parts belonging to that set as seperate sheets. +- **addHeaders**: Contents of an optional header row to add as the first line in the file. This currently only applies to csv files. +- **destinations**: Where the file will be uploaded (categorical: osf, github). Only these destinations are supported. +- **osfLocation**: The path for the file on OSF. This is only required if the file is being uploaded to OSF. The path should be relative to the root of the "Current Release" folder. +- **githubLocation**: The path for the file on GitHub. This is only required if the file is being uploaded to GitHub. The path should be relative to the root of the repository. + +For further details on the ODM dictionary please refer to the ODM documentation found [here](https://docs.phes-odm.org/). \ No newline at end of file diff --git a/release-automation/user-guide/common-bugs.qmd b/release-automation/user-guide/common-bugs.qmd new file mode 100644 index 0000000..214ac43 --- /dev/null +++ b/release-automation/user-guide/common-bugs.qmd @@ -0,0 +1,34 @@ +# Common Bugs and where to find them. + +## Introduction + +The PHES-ODM automatic deployment has files sheet validation as well as file creation. This document is intended to help you identify and fix common bugs that may occur during the deployment process. + +The warning messages are stored within a log file which is located at the R-paclage directory. The log file is named "log" and is updated every time the deployment script is run. + +## Common Bugs + +### 1. 'Multiple dictionaries found, only one dictionary should be stored.' +This is encountered when multiple files matching the dictionary file regex are found in the dictionary directory. The dictionary directory is located on osf however a local one can be passed when debugging. The current regex is "ODM_dictionary_(\\d.*?).xlsx". +To resolve this issue check that only one dictionary is present within the passed directory path, or withing the "dev-release" directory on osf. + +### 2. 'No valid files were detected. Make sure the dictionary file is named correctly.' +This is encountered when no files matching the dictionary file regex are found in the dictionary directory. The dictionary directory is located on osf however a local one can be passed when debugging. The current regex is "ODM_dictionary_(\\d.*?).xlsx". + +### 3. 'Dictionary file name version does not reflect version in summary sheet' +The version number in the dictionary file name is the number after the last underscore. The version number in the summary sheet is the number in the "Version" column. To resolve this issue check that the version number in the dictionary file name matches the version number in the summary sheet. + +### 4. 'File ID: {fileID} has an invalid destination set, and will not be exported.' +This is encountered when the destination column in the files sheet is not set to either of the valid destinations("ODM" or "github"). Utilizing the provided fileID(Unique identifier used in the files sheet) check that the destination column is set to either "ODM" or "github" for the supplied file. Note that the process will continue and this sheet will not be exported. + +### 5. '{single_part} does not have a matching sheet but is part of {set_name} set, which was selected to be exported. This sheet cannot be exported.' +This is encountered when a sheet that is part of a set is not present in the dictionary file. To resolve this issue check that {single_part} named sheet is present in the dictionary file. Alternatively it could be removed from the {set_name} set in the sets sheet. Note that the process will continue and this sheet will not be exported however the other sheets of the set will. + +### 6. '{single_part} is missing from the parts sheet but is present in the {set_name} set, therefore it cant be exported.' +This is encountered when a {single_part} part listed in the sets sheet under a selected {set_name} set is not present in the parts sheet. To resolve this issue check that {single_part} is present in the parts sheet. Alternatively it could be removed from the {set_name} set in the sets sheet. Note that the process will continue and this sheet will not be exported however the other sheets of the set will. + +### 7. '{partID} is recorded for csv but is found in sets. csv does not support exporting a set.' +This is encountered when a {partID} part is found in the sets sheet. Passing a set to the partID column in the files sheet causes that entire set to be exported. This however is only possible for excel files. To resolve this issue consider either passing a non set name part in partID column or setting the file type to excel. + +### 8. '{partID} has an unrecognized fileType of {file_type} and can't be exported.' +This is encountered when a {partID} part has an unrecognized fileType. To resolve this issue check that the fileType column in the files sheet is set to either "excel" or "csv". \ No newline at end of file