diff --git a/modules/data.land/.Rbuildignore b/modules/data.land/.Rbuildignore index 42a2bb949ab..1479c91c930 100644 --- a/modules/data.land/.Rbuildignore +++ b/modules/data.land/.Rbuildignore @@ -1,2 +1,3 @@ contrib data-raw +^data-raw$ diff --git a/modules/data.land/NAMESPACE b/modules/data.land/NAMESPACE index 3c28da884b7..7717ea544fb 100644 --- a/modules/data.land/NAMESPACE +++ b/modules/data.land/NAMESPACE @@ -27,12 +27,12 @@ export(format_identifier) export(from.Tag) export(from.TreeCode) export(gSSURGO.Query) -export(get.attributes) export(get.soil) export(get_resource_map) export(get_veg_module) export(ic_process) export(id_resolveable) +export(landiq2std) export(load_veg) export(matchInventoryRings) export(match_pft) @@ -48,6 +48,7 @@ export(prepare_pools) export(put_veg_module) export(sample_ic) export(sclass) +export(shp2gpkg) export(shp2kml) export(soil.units) export(soil2netcdf) @@ -60,7 +61,25 @@ export(to.TreeCode) export(write_ic) export(write_veg) importFrom(dplyr,"%>%") +importFrom(dplyr,mutate) importFrom(magrittr,"%>%") importFrom(ncdf4,ncvar_get) importFrom(rlang,"%||%") importFrom(rlang,.data) +importFrom(sf,st_as_sf) +importFrom(sf,st_as_sfc) +importFrom(sf,st_bbox) +importFrom(sf,st_crop) +importFrom(sf,st_crs) +importFrom(sf,st_geometry_type) +importFrom(sf,st_is_valid) +importFrom(sf,st_make_valid) +importFrom(sf,st_read) +importFrom(sf,st_transform) +importFrom(sf,st_write) +importFrom(terra,crop) +importFrom(terra,crs) +importFrom(terra,ext) +importFrom(terra,extract) +importFrom(terra,rast) +importFrom(terra,vect) diff --git a/modules/data.land/R/data.R b/modules/data.land/R/data.R index 78fc30551e4..bae0b9f84fa 100644 --- a/modules/data.land/R/data.R +++ b/modules/data.land/R/data.R @@ -78,3 +78,16 @@ #' contains an identical texture.csv, also with no obvious source label. #' See also comments in soil_utils.R "soil_class" + +#' Crop-PFT mapping +#' +#' A lookup table to assign each LandIQ `crop` into one of three PFT +#' categories: "woody perennial crop", "non crop", or "herbaceous crop". +#' +#' @format A tibble with 2 columns: +#' \describe{ +#' \item{crop}{character; the exact LandIQ crop name} +#' \item{pft}{character; one of "woody perennial crop", "herbaceous crop", "non-crop"} +#' } +#' @source data-raw/landiq_pft_map.R +"landiq_pft_map" \ No newline at end of file diff --git a/modules/data.land/R/gis.functions.R b/modules/data.land/R/gis.functions.R index 0fa83941f88..4c616655190 100644 --- a/modules/data.land/R/gis.functions.R +++ b/modules/data.land/R/gis.functions.R @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------- # Copyright (c) 2012 University of Illinois, NCSA. # All rights reserved. This program and the accompanying materials -# are made available under the terms of the +# are made available under the terms of the # University of Illinois/NCSA Open Source License # which accompanies this distribution, and is available at # http://opensource.ncsa.illinois.edu/license.html @@ -10,38 +10,37 @@ #--------------------------------------------------------------------------------------------------# ##' ##' Convert ESRI shapefile (*.shp) to keyhole markup language (KML) file format -##' +##' ##' @title Convert shapefile to KML -##' +##' ##' @param dir Directory of GIS shapefiles to convert to kml/kmz ##' @param ext File extension for files to convert to kml/kmz. Defaults to ESRI shapefile, ##' '.shp'. [Place holder for other potential vector files to conver to kml] ##' @param kmz TRUE/FALSE. Option to write out file as a compressed kml. Requires zip utility -##' @param proj4 OPTIONAL. Define output proj4 projection string. If set, input vector will be +##' @param proj4 OPTIONAL. Define output proj4 projection string. If set, input vector will be ##' reprojected to desired projection. Not yet implemented. ##' @param color OPTIONAL. Fill color for output kml/kmz file ##' @param NameField OPTIONAL. Define names for individual features in KML/KMZ file ##' @param out.dir OPTIONAL. Output directory for converted files ##' ##' @export -##' +##' ##' @examples ##' \dontrun{ ##' dir <- Sys.glob(file.path(R.home(), 'library', 'PEcAn.data.land','data')) ##' out.dir <- path.expand('~/temp') ##' shp2kml(dir,'.shp',kmz=FALSE,NameField='STATE',out.dir=out.dir) ##' system(paste('rm -r ',out.dir)) -##'} -##' +##' } +##' ##' @author Shawn P. Serbin shp2kml <- function(dir, ext, kmz = FALSE, proj4 = NULL, color = NULL, NameField = NULL, out.dir = NULL) { - # TODO: Enable compression of KML files using zip/gzip utility. Not quite figured this out yet # TODO: Allow assignment of output projection info by entering proj4 string # TODO: Allow for customization of output fill colors and line size # TODO: Allow for selection of taget attribute in output kml/kmz file(s) # TODO: Allow for setting out labels - + if (!is.null(out.dir)) { if (!file.exists(out.dir)) { dir.create(out.dir, recursive = TRUE) @@ -50,99 +49,143 @@ shp2kml <- function(dir, ext, kmz = FALSE, proj4 = NULL, color = NULL, NameField } else { output <- dir } - + # Get list of shapefiles in directory files <- list.files(path = dir, pattern = "*.shp", full.names = FALSE) remove <- grep("*xml", files) if (length(remove) > 0) { files <- files[-remove] } - + # loop here for (i in files) { print("") print(paste0("Converting : ** ", i, " ** to KML/KMZ file")) print("") print("") - + # Read in shapefile(s) & get coordinates/projection info shp.file <- # readShapeSpatial(file.path(dir,i),verbose=TRUE) coordinates(test) <- ~X+Y - + layers <- sf::st_layers(file.path(dir, i)) # shp.file <- readOGR(file.path(dir,i),layer=layers) # no need to read in file - + # Display vector info to the console print("") print(paste0("Input layers: ", layers$name)) print(paste0("Input projection info: ", layers$crs[[1]]$input)) print("") - + # Write out kml/kmz using plotKML package if (is.null(color)){ color <- 'grey70' } - + if (kmz == TRUE) { # NOT YET FULLY IMPLEMENTED in.file <- file.path(dir, i, fsep = .Platform$file.sep) out.file <- file.path(output, unlist(strsplit(i, "\\.")), fsep = .Platform$file.sep) - OGRstring <- paste0("ogr2ogr -progress -f KML", " ", - paste0(out.file, ".kmz"), - " ", in.file, " ", "-dsco NameField=", NameField) - system(OGRstring) # Run KML conversion - + OGRstring <- paste0( + "ogr2ogr -progress -f KML", " ", + paste0(out.file, ".kmz"), + " ", in.file, " ", "-dsco NameField=", NameField + ) + system(OGRstring) # Run KML conversion + # ADD COMPRESSION STEP HERE!!! - } else { # kml(shp.file,file=paste(output,'test.kml'),colour = 'grey70', alpha = 0.75, width=2, # balloon=FALSE) # writeOGR(shp.file['STATE'],'test2.kml',layer='statep010',NameField='STATE',driver='KML') - + # Using ogr2ogr external system utility. Works much better than R packages. - in.file <- file.path(dir, i, fsep = .Platform$file.sep) - out.file <- file.path(output, unlist(strsplit(i, "\\.")), fsep = .Platform$file.sep) - OGRstring <- paste0("ogr2ogr -progress -f KML", " ", paste0(out.file, ".kml"), - " ", in.file, " ", "-dsco NameField=", NameField) - system(OGRstring) # Run KML conversion + in.file <- file.path(dir, i, fsep = .Platform$file.sep) + out.file <- file.path(output, unlist(strsplit(i, "\\.")), fsep = .Platform$file.sep) + OGRstring <- paste0( + "ogr2ogr -progress -f KML", " ", paste0(out.file, ".kml"), + " ", in.file, " ", "-dsco NameField=", NameField + ) + system(OGRstring) # Run KML conversion } - } # End of loop + } # End of loop } # shp2kml -#--------------------------------------------------------------------------------------------------# -##' -##' Function to extract attribute information from vector or raster data layer. -##' -##' @title Retrieve attribute information from a vector or raster layer -##' @name get.attributes -##' -##' @param file vector or raster layer -##' @param coords vector containin xmin,ymin,xmax,ymax defing the bounding box for subset -##' -##' @export -##' -##' @examples -##' \dontrun{ -##' file <- Sys.glob(file.path(R.home(), 'library', 'PEcAn.data.land','data','*.kml')) -##' out <- get.attributes(file=file,coords=c(-95,42,-84,47)) -##' print(out) -##' } -##' -##' @author Shawn P. Serbin -get.attributes <- function(file, coords) { - # ogr tools do not seem to function properly in R. Need to figure out a work around reading in - # kml files drops important fields inside the layers. - - #library(fields) - #require(rgdal) - # note that OGR support is now provided by the sf and terra packages among others - - # print('NOT IMPLEMENTED YET') subset_layer(file,coords) -} # get.attributes +#' Retrieve Attribute Information from Vector or Raster Layers +#' +#' This function extracts attribute information from a vector or raster file within a specified bounding box defined by coordinates. +#' +#' @param file Character. Path to the vector (e.g., Shapefile, GeoPackage) or raster (e.g., GeoTIFF) file. +#' @param coords Numeric vector. A vector containing `xmin`, `ymin`, `xmax`, `ymax` defining the bounding box for subsetting. +#' +#' @return +#' - For vector files: An `sf` object containing the subset of features within the bounding box. +#' - For raster files: A `SpatRaster` object cropped to the bounding box. +#' +#' @importFrom sf st_read st_bbox st_crop st_as_sfc +#' @importFrom terra rast crop ext +#' +#' @aliases get_attributes, get.attributes +#' @author Shawn P. Serbin +#' @author David LeBauer +#' +#' @examples +#' \dontrun{ +#' # Example with a vector file (Shapefile) +#' vector_file <- "path/to/vector.shp" +#' bbox_coords <- c(-95, 42, -84, 47) # xmin, ymin, xmax, ymax +#' subset_vector <- get_attributes(file = vector_file, coords = bbox_coords) +#' print(subset_vector) +#' +#' # Example with a raster file (GeoTIFF) +#' raster_file <- "path/to/raster.tif" +#' subset_raster <- get_attributes(file = raster_file, coords = bbox_coords) +#' print(subset_raster) +#' } +get_attributes <- function(file, coords) { + # Validate 'coords' input + if (!is.numeric(coords) || length(coords) != 4) { + stop("`coords` must be a numeric vector of length 4: c(xmin, ymin, xmax, ymax).") + } + + # Determine file type based on extension + file_ext <- tools::file_ext(file) + + if (file_ext %in% c("shp", "gpkg", "geojson", "kml", "gml", "geo")) { + # Handle vector data + vector_sf <- sf::st_read(file, quiet = TRUE) + + # Create bounding box as sf object + bbox_sf <- sf::st_as_sfc(sf::st_bbox(c( + xmin = coords[1], + ymin = coords[2], + xmax = coords[3], + ymax = coords[4] + ), crs = sf::st_crs(vector_sf))) + + # Subset vector data within bounding box + subset_vector <- sf::st_crop(vector_sf, bbox_sf) + + return(subset_vector) + } else if (file_ext %in% c("tif", "tiff", "img", "grd")) { + # Handle raster data + raster_data <- terra::rast(file) + + # Define bounding box extent + bbox_ext <- terra::ext(coords[1], coords[3], coords[2], coords[4]) + + # Crop raster to bounding box + subset_raster <- terra::crop(raster_data, bbox_ext) + + return(subset_raster) + } else { + stop("Unsupported file type. Please provide a vector (e.g., Shapefile, GeoPackage) or raster (e.g., GeoTIFF) file.") + } +} #--------------------------------------------------------------------------------------------------# ##' ##' Function to subset and clip a GIS vector or raster layer by a bounding box ##' or clip/subset layer (e.g. shapefile/KML) -##' +##' ##' @param file input file to be subset ##' @param coords vector with xmin,ymin,xmax,ymax defing the bounding box for subset ##' @param sub.layer Vector layer defining the subset region @@ -162,12 +205,11 @@ get.attributes <- function(file, coords) { ##' subset_layer(file=file,coords=c(-95,42,-84,47),out.dir=out.dir) ##' system(paste('rm -r',out.dir,sep='')) ##' } -##' +##' ##' @export subset_layer -##' +##' ##' @author Shawn P. Serbin subset_layer <- function(file, coords = NULL, sub.layer = NULL, clip = FALSE, out.dir = NULL, out.name = NULL) { - # Setup output directory for subset layer if (is.null(out.dir)) { out.dir <- dirname(file) @@ -179,32 +221,143 @@ subset_layer <- function(file, coords = NULL, sub.layer = NULL, clip = FALSE, ou if (!file.exists(out.dir)) { dir.create(out.dir, recursive = TRUE) } - + # Setup output file name for subset layer if (is.null(out.name)) { - out.name <- paste0(unlist(strsplit(basename(file), "\\."))[1], - ".sub.", unlist(strsplit(basename(file), "\\."))[2]) + out.name <- paste0( + unlist(strsplit(basename(file), "\\."))[1], + ".sub.", unlist(strsplit(basename(file), "\\."))[2] + ) } else { out.name <- out.name } - + print(paste0("Subsetting layer: ", out.name)) output <- file.path(out.dir, out.name, fsep = .Platform$file.sep) - + if (unlist(strsplit(basename(file), "\\."))[2] == "kml") { format <- "-f KML" } else { format <- paste0("-f ", "'ESRI Shapefile'") } - + if (clip) { - OGRstring <- paste0("ogr2ogr -spat", " ", coords[1], " ", coords[2], " ", coords[3], " ", coords[4], - " ", format, " ", output, " ", file, " ", "-clipsrc", " ", "spat_extent") + OGRstring <- paste0( + "ogr2ogr -spat", " ", coords[1], " ", coords[2], " ", coords[3], " ", coords[4], + " ", format, " ", output, " ", file, " ", "-clipsrc", " ", "spat_extent" + ) } else { - OGRstring <- paste0("ogr2ogr -spat", " ", coords[1], " ", coords[2], " ", coords[3], " ", coords[4], - " ", format, " ", output, " ", file) + OGRstring <- paste0( + "ogr2ogr -spat", " ", coords[1], " ", coords[2], " ", coords[3], " ", coords[4], + " ", format, " ", output, " ", file + ) } - + # Run subset command system(OGRstring) } # subset_layer +#' Extract Raster Values at Point or Polygon Locations +#' +#' This function extracts raster values from a specified raster file at the locations of points or polygons provided either as a data frame with latitude and longitude or as an `sf` object with point or polygon geometries. +#' +#' @param raster_path Character. Path to the raster file. +#' @param spatial_data Input data. Either: +#' - A data frame containing `lat` and `lon` columns (for points), or +#' - An `sf` object with point or polygon geometries. +#' @param value_colname Character. Name of the new column to store extracted raster values. Default is `"raster_value"`. +#' @param scaling_factor Numeric. A factor to scale the extracted raster values. Default is `1` (no scaling). +#' +#' @return An `sf` object with an additional column containing the extracted (and scaled) raster values. +#' +#' @importFrom sf st_as_sf st_geometry_type st_crs st_transform st_read +#' @importFrom terra rast crs extract vect +#' @importFrom dplyr mutate +#' @author David LeBauer +#' @examples +#' \dontrun{ +#' # Example with data frame (points) +#' points_df <- data.frame( +#' id = 1:3, +#' lon = c(-120.5, -121.3, -122.1), +#' lat = c(35.3, 36.7, 34.8) +#' ) +#' raster_values_df <- extract_raster_values( +#' raster_path = "path/to/raster.tif", +#' spatial_data = points_df, +#' value_colname = "extracted_val", +#' scaling_factor = 10 +#' ) +#' +#' # Example with sf object (polygons) +#' polygons_sf <- sf::st_read("path/to/polygons.shp") +#' raster_values_polygons <- extract_raster_values( +#' raster_path = "path/to/raster.tif", +#' spatial_data = polygons_sf, +#' value_colname = "mean_raster_val", +#' scaling_factor = 1 +#' ) +#' } +extract_raster_values <- function(raster_path, spatial_data, value_colname = "raster_value", scaling_factor = 1) { + # Convert input to sf object and determine geometry type + if (is.data.frame(spatial_data)) { + if (!all(c("lat", "lon") %in% names(spatial_data))) { + stop("The data frame must contain 'lat' and 'lon' columns.") + } + # Convert to sf object with POINT geometries + spatial_sf <- sf::st_as_sf(spatial_data, coords = c("lon", "lat"), crs = 4326) + geom_type <- "POINT" + } else if (inherits(spatial_data, "sf")) { + # Check geometry type + geom_types <- unique(sf::st_geometry_type(spatial_data)) + if (all(geom_types %in% c("POINT", "MULTIPOINT"))) { + geom_type <- "POINT" + spatial_sf <- spatial_data + } else if (all(geom_types %in% c("POLYGON", "MULTIPOLYGON"))) { + geom_type <- "POLYGON" + spatial_sf <- spatial_data + } else { + stop("The `sf` object must contain either only POINT/MULTIPOINT or only POLYGON/MULTIPOLYGON geometries.") + } + } else { + stop("`spatial_data` must be either a data frame with 'lat' and 'lon' columns or an `sf` object with point or polygon geometries.") + } + + # Read the raster file using terra + raster_data <- terra::rast(raster_path) + + # Check and align CRS + raster_crs <- terra::crs(raster_data, describe = TRUE)$code + spatial_crs <- sf::st_crs(spatial_sf)$epsg + + if (!is.na(raster_crs) && !is.na(spatial_crs) && raster_crs != spatial_crs) { + spatial_sf <- sf::st_transform(spatial_sf, crs = raster_data) + } + + # Extract raster values based on geometry type + if (geom_type == "POINT") { + extracted_data <- terra::extract(raster_data, terra::vect(spatial_sf)) + + # Validate extraction result + if (ncol(extracted_data) < 2) { + stop("Extraction failed. Please check the raster and point locations.") + } + + extracted_col <- extracted_data[[2]] + } else if (geom_type == "POLYGON") { + # For polygons, calculate summary statistics (e.g., mean) + extracted_data <- terra::extract(raster_data, terra::vect(spatial_sf), fun = mean, na.rm = TRUE) + + # Validate extraction result + if (ncol(extracted_data) < 2) { + stop("Extraction failed. Please check the raster and polygon locations.") + } + + extracted_col <- extracted_data[[2]] + } + + # Add extracted (and scaled) raster values to the sf object and return + spatial_sf <- spatial_sf |> + dplyr::mutate(!!value_colname := extracted_col / scaling_factor) + + return(spatial_sf) +} diff --git a/modules/data.land/R/landiq2std.R b/modules/data.land/R/landiq2std.R new file mode 100644 index 00000000000..781498fccc0 --- /dev/null +++ b/modules/data.land/R/landiq2std.R @@ -0,0 +1,145 @@ +#' Convert a LandIQ Shapefile into a Standardized Format consisting of +#' a GeoPackage file to store geospatial information and an associated +#' CSV file with attributes +#' +#' This function reads a LandIQ crop map shapefile downloaded from +#' https://data.cnra.ca.gov/dataset/statewide-crop-mapping and processes the data into +#' a standardized GeoPackage and CSV format. +#' +#' @param input_file Character. Path to the input Shapefile +#' (GeoPackage created by shp2gpkg is also valid) +#' @param output_gpkg Character. Path to the output GeoPackage +#' @param output_csv Character. Path to the output CSV. +#' @param overwrite Logical. If TRUE, overwrites existing files. +#' +#' @return Invisibly returns a list with paths to the output files. +#' @details +#' This function: +#' - Reads a Shapefile using the `sf` package. +#' - Calculates centroids to extract latitude and longitude. +#' - Converts `Acres` column to hectares (`ha`). +#' - Extracts crop information and assigns a PFT (Plant Functional Type). +#' - Outputs a standardized GeoPackage (geometry in California Albers, +#' EPSG:3310) with columns `id`, `geometry`, `lat`, `lon`, and `area_ha`. +#' - Outputs a CSV with columns `year`, `crop`, `pft`, `source`, and `notes`. +#' +#' Note: TODO provide crop-->PFT mapping as an external file using either +#' defaults stored with the PEcAn.data.land package or an optional custom +#' mapping +#' +#' @examples +#' # Specify input and output file paths +#' input_file <- "path/to/landiq.shp" +#' output_gpkg <- "path/to/your_output.gpkg" +#' output_csv <- "path/to/your_output.csv" +#' +#' # Run the conversion function +#' landiq2std(input_file, output_gpkg, output_csv) +#' +#' @export +landiq2std <- function(input_file, output_gpkg, output_csv, overwrite = TRUE) { + # Check input file format + # If shapefile, convert to GeoPackage + if (grepl(pattern = "\\.shp$", input_file)) { + # read in, repair geometries, write out repaired geopackage + tempfile <- tempfile(fileext = ".gpkg") + shp2gpkg(input_file, tempfile, overwrite = TRUE) + input_file <- tempfile # now gpkg is input file + on.exit(unlink(input_file), add = TRUE) + } + + # Read the Shapefile + landiq_polygons <- sf::st_read(input_file, quiet = FALSE) + + # Determine crop year from column name + crop_col <- grep("Crop[0-9]{4}", colnames(landiq_polygons), value = TRUE) + year <- gsub("Crop", "", crop_col) + # Check required columns + required_cols <- c("Acres", crop_col, "Source", "Comments", "County", "geom") + missing_cols <- setdiff(required_cols, colnames(landiq_polygons)) + if (length(missing_cols) > 0) { + PEcAn.logger::logger.error("Input file is missing the following columns: ", paste(missing_cols, collapse = ", ")) + } + + landiq_polygons_updated <- landiq_polygons |> + sf::st_transform(4326) |> + dplyr::mutate( + coords = sf::st_coordinates(sf::st_centroid(geom)) + ) |> + dplyr::mutate( + year = year, + lon = coords[, "X"], + lat = coords[, "Y"], + area_ha = PEcAn.utils::ud_convert(Acres, "acre", "ha") + ) |> + dplyr::select(-coords) |> + # digest step used to create unique site_ids requires rowwise + dplyr::rowwise() |> + mutate( + site_id = digest::digest(geom, algo = "xxhash64") + ) |> + dplyr::rename(county = County) + + # Process data for GeoPackage in California Albers (EPSG:3310) + gpkg_data <- landiq_polygons_updated |> + dplyr::select(site_id, geom, lat, lon, area_ha, county) |> + sf::st_transform(3310) # EPSG:3310 = NAD83 / California Albers + + # Process data for CSV + csv_data <- landiq_polygons_updated |> + tidyr::as_tibble() |> + dplyr::mutate( + crop = .data[[crop_col]] + ) |> + # join to external lookup table for pft + dplyr::left_join(landiq_pft_map, by = "crop") |> + # default any missing pft to "annual crop" + dplyr::mutate( + pft = dplyr::coalesce(pft, "annual crop") + ) |> + dplyr::rename( + source = Source, + notes = Comments + ) |> + dplyr::select(site_id, lat, lon, year, crop, pft, source, notes) + + # Warn about crops without a PFT + unassigned_pft <- csv_data |> + dplyr::filter(grepl("no PFT for", pft)) |> + dplyr::distinct(crop, pft) + if (nrow(unassigned_pft) > 0) { + PEcAn.logger::logger.warn( + "The following crops do not have a PFT assigned:", + paste(unassigned_pft$crop, collapse = ", ") + ) + } + + # Write outputs + if(!overwrite) { + if (file.exists(output_gpkg)) { + PEcAn.logger::logger.error("Output GeoPackage already exists. Set overwrite = TRUE to overwrite.") + } + if (file.exists(output_csv)) { + PEcAn.logger::logger.error("Output CSV already exists. Set overwrite = TRUE to overwrite.") + } + } else { + if (file.exists(output_gpkg)) { + PEcAn.logger::logger.info("Overwriting existing file", output_gpkg) + file.remove(output_gpkg, showWarnings = FALSE) + } + if (file.exists(output_csv)) { + PEcAn.logger::logger.info("Overwriting existing file", output_csv) + file.remove(output_csv, showWarnings = FALSE) + } + } + + sf::st_write(gpkg_data, + output_gpkg, + layer = "sites", # analogous to BETYdb table name + quiet = FALSE + ) + readr::write_csv(csv_data, output_csv) + + # Return success status + invisible(TRUE) +} diff --git a/modules/data.land/R/shp2gpkg.R b/modules/data.land/R/shp2gpkg.R new file mode 100644 index 00000000000..a13d0dc0de6 --- /dev/null +++ b/modules/data.land/R/shp2gpkg.R @@ -0,0 +1,84 @@ +#' Convert Shapefile to GeoPackage +#' +#' This function converts a Shapefile to a GeoPackage using the `sf` package. +#' +#' @param input_shp Character. Path to the input Shapefile (e.g., `"data/myfile.shp"`). +#' @param output_gpkg Character. Path to the output GeoPackage (e.g., `"output/myfile.gpkg"`). +#' @param layer_name Character. Optional. Name of the layer in the GeoPackage. Defaults to the base name of the Shapefile. +#' @param overwrite Logical. Whether to overwrite an existing layer in the GeoPackage. Defaults to `TRUE`. +#' +#' @return Invisibly returns the path to the GeoPackage (`output_gpkg`) upon successful conversion. +#' @details +#' This function reads a Shapefile, converts it to an `sf` object, and writes it to a GeoPackage. +#' The `sf` package handles the conversion and ensures spatial data integrity. +#' +#' @examples +#' # Convert 'roads.shp' to 'roads.gpkg' with the default layer name +#' shp2gpkg("data/roads.shp", "output/roads.gpkg") +#' +#' # Convert with a custom layer name +#' shp2gpkg("data/roads.shp", "output/roads.gpkg", layer_name = "custom_layer") +#' +#' # Prevent overwriting existing layers +#' shp2gpkg("data/roads.shp", "output/roads.gpkg", overwrite = FALSE) +#' +#' @importFrom sf st_read st_write st_is_valid st_make_valid +#' @export +shp2gpkg <- function(input_shp, output_gpkg, layer_name = NULL, overwrite = TRUE) { + # Load the Shapefile + shapefile <- sf::st_read(input_shp, quiet = TRUE) + + # Check validity of geometries + is_geometry_valid <- sf::st_is_valid(shapefile) + total_geometries <- length(is_geometry_valid) + n_invalid <- sum(!is_geometry_valid) + percent_invalid <- (n_invalid / total_geometries) * 100 + + # Log invalid geometry statistics + + PEcAn.logger::logger.info("Total records in shapefile: ", total_geometries) + + if (n_invalid > 0) { + # Report invalid geometry statistics + PEcAn.logger::logger.info( + n_invalid, + "(", round(percent_invalid, 2), "%)", + "of geometries are invalid" + ) + PEcAn.logger::logger.info(". Attempting to repair invalid geometries") + + # Attempt to repair invalid geometries + repaired_shapefile <- sf::st_make_valid(shapefile) + is_repair_successful <- sf::st_is_valid(repaired_shapefile) + n_repaired <- n_invalid - sum(!is_repair_successful) + n_remaining_invalid <- sum(!is_repair_successful) + + # Report repair results + message("Repaired ", n_repaired, " geometries successfully.") + if (n_remaining_invalid > 0) { + PEcAn.logger::logger.warn("Could not repair ", n_remaining_invalid, " geometries. They will be excluded.") + } else { + PEcAn.logger::logger.info("All geometries were repaired successfully.") + } + + # Retain only valid geometries after repair + shapefile <- repaired_shapefile[is_repair_successful, ] + } + + # Determine layer name from input file if not provided + if (is.null(layer_name)) { + layer_name <- tools::file_path_sans_ext(basename(input_shp)) + } + + # Write to GeoPackage + sf::st_write( + shapefile, + output_gpkg, + layer = layer_name, + delete_layer = overwrite, + quiet = TRUE + ) + + # Invisibly return the file path + invisible(output_gpkg) +} diff --git a/modules/data.land/data-raw/landiq_pft_map.R b/modules/data.land/data-raw/landiq_pft_map.R new file mode 100644 index 00000000000..52badfa1fee --- /dev/null +++ b/modules/data.land/data-raw/landiq_pft_map.R @@ -0,0 +1,37 @@ +non_crop <- c( + "Flowers, Nursery and Christmas Tree Farms", "Greenhouse", + "Idle", "Managed Wetland", "Urban" +) + +woody_crop <- c( + "Almonds", "Apples", "Avocados", "Bush Berries", "Cherries", + "Citrus", "Dates", "Grapes", "Kiwis", "Miscellaneous Deciduous", + "Miscellaneous Subtropical Fruits", "Olives", "Peaches/Nectarines", + "Pears", "Pistachios", "Plums, Prunes and Apricots", "Pomegranates", + "Walnuts", "Young Perennials" +) + +herbaceous_crop <- c( + "Alfalfa and Alfalfa Mixtures", "Beans (Dry)", "Carrots", "Cole Crops", + "Corn, Sorghum and Sudan", "Cotton", "Lettuce/Leafy Greens", + "Melons, Squash and Cucumbers", "Miscellaneous Field Crops", + "Miscellaneous Grain and Hay", "Miscellaneous Grasses", "Miscellaneous Truck Crops", + "Mixed Pasture", "Onions and Garlic", "Peppers", "Potatoes and Sweet Potatoes", + "Rice", "Safflower", "Strawberries", "Sunflowers", "Tomatoes", + "Wheat", "Wild Rice" +) + +landiq_pft_map <- dplyr::bind_rows( + dplyr::tibble( + crop = woody_crops, + pft = "woody perennial crop" + ), + dplyr::tibble( + crop = non_crop, + pft = "non-crop" + ), + dplyr::tibble( + crop = herbaceous_crop, + pft = "herbaceous crop" + ) +) diff --git a/modules/data.land/data/landiq_pft_map.rda b/modules/data.land/data/landiq_pft_map.rda new file mode 100644 index 00000000000..857642769b1 Binary files /dev/null and b/modules/data.land/data/landiq_pft_map.rda differ diff --git a/modules/data.land/man/extract_raster_values.Rd b/modules/data.land/man/extract_raster_values.Rd new file mode 100644 index 00000000000..ac1a82d2d6c --- /dev/null +++ b/modules/data.land/man/extract_raster_values.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gis.functions.R +\name{extract_raster_values} +\alias{extract_raster_values} +\title{Extract Raster Values at Point or Polygon Locations} +\usage{ +extract_raster_values( + raster_path, + spatial_data, + value_colname = "raster_value", + scaling_factor = 1 +) +} +\arguments{ +\item{raster_path}{Character. Path to the raster file.} + +\item{spatial_data}{Input data. Either: +- A data frame containing `lat` and `lon` columns (for points), or +- An `sf` object with point or polygon geometries.} + +\item{value_colname}{Character. Name of the new column to store extracted raster values. Default is `"raster_value"`.} + +\item{scaling_factor}{Numeric. A factor to scale the extracted raster values. Default is `1` (no scaling).} +} +\value{ +An `sf` object with an additional column containing the extracted (and scaled) raster values. +} +\description{ +This function extracts raster values from a specified raster file at the locations of points or polygons provided either as a data frame with latitude and longitude or as an `sf` object with point or polygon geometries. +} +\examples{ +\dontrun{ +# Example with data frame (points) +points_df <- data.frame( + id = 1:3, + lon = c(-120.5, -121.3, -122.1), + lat = c(35.3, 36.7, 34.8) +) +raster_values_df <- extract_raster_values( + raster_path = "path/to/raster.tif", + spatial_data = points_df, + value_colname = "extracted_val", + scaling_factor = 10 +) + +# Example with sf object (polygons) +polygons_sf <- sf::st_read("path/to/polygons.shp") +raster_values_polygons <- extract_raster_values( + raster_path = "path/to/raster.tif", + spatial_data = polygons_sf, + value_colname = "mean_raster_val", + scaling_factor = 1 +) +} +} +\author{ +David LeBauer +} diff --git a/modules/data.land/man/get.attributes.Rd b/modules/data.land/man/get.attributes.Rd deleted file mode 100644 index 9a92eccf83c..00000000000 --- a/modules/data.land/man/get.attributes.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gis.functions.R -\name{get.attributes} -\alias{get.attributes} -\title{Retrieve attribute information from a vector or raster layer} -\usage{ -get.attributes(file, coords) -} -\arguments{ -\item{file}{vector or raster layer} - -\item{coords}{vector containin xmin,ymin,xmax,ymax defing the bounding box for subset} -} -\description{ -Function to extract attribute information from vector or raster data layer. -} -\examples{ -\dontrun{ -file <- Sys.glob(file.path(R.home(), 'library', 'PEcAn.data.land','data','*.kml')) -out <- get.attributes(file=file,coords=c(-95,42,-84,47)) -print(out) -} - -} -\author{ -Shawn P. Serbin -} diff --git a/modules/data.land/man/get_attributes.Rd b/modules/data.land/man/get_attributes.Rd new file mode 100644 index 00000000000..6811197b45f --- /dev/null +++ b/modules/data.land/man/get_attributes.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gis.functions.R +\name{get_attributes} +\alias{get_attributes} +\alias{get_attributes,} +\alias{get.attributes} +\title{Retrieve Attribute Information from Vector or Raster Layers} +\usage{ +get_attributes(file, coords) +} +\arguments{ +\item{file}{Character. Path to the vector (e.g., Shapefile, GeoPackage) or raster (e.g., GeoTIFF) file.} + +\item{coords}{Numeric vector. A vector containing `xmin`, `ymin`, `xmax`, `ymax` defining the bounding box for subsetting.} +} +\value{ +- For vector files: An `sf` object containing the subset of features within the bounding box. +- For raster files: A `SpatRaster` object cropped to the bounding box. +} +\description{ +This function extracts attribute information from a vector or raster file within a specified bounding box defined by coordinates. +} +\examples{ +\dontrun{ +# Example with a vector file (Shapefile) +vector_file <- "path/to/vector.shp" +bbox_coords <- c(-95, 42, -84, 47) # xmin, ymin, xmax, ymax +subset_vector <- get_attributes(file = vector_file, coords = bbox_coords) +print(subset_vector) + +# Example with a raster file (GeoTIFF) +raster_file <- "path/to/raster.tif" +subset_raster <- get_attributes(file = raster_file, coords = bbox_coords) +print(subset_raster) +} +} +\author{ +Shawn P. Serbin + +David LeBauer +} diff --git a/modules/data.land/man/landiq2std.Rd b/modules/data.land/man/landiq2std.Rd new file mode 100644 index 00000000000..d3ce140afc3 --- /dev/null +++ b/modules/data.land/man/landiq2std.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/landiq2std.R +\name{landiq2std} +\alias{landiq2std} +\title{Convert a LandIQ Shapefile into a Standardized Format consisting of +a GeoPackage file to store geospatial information and an associated +CSV file with attributes} +\usage{ +landiq2std(input_file, output_gpkg, output_csv) +} +\arguments{ +\item{input_file}{Character. Path to the input Shapefile +(GeoPackage created by shp2gpkg is also valid)} + +\item{output_gpkg}{Character. Path to the output GeoPackage} + +\item{output_csv}{Character. Path to the output CSV.} +} +\value{ +Invisibly returns a list with paths to the output files. +} +\description{ +This function reads a LandIQ crop map shapefile downloaded from +https://data.cnra.ca.gov/dataset/statewide-crop-mapping and processes the data into +a standardized GeoPackage and CSV format. +} +\details{ +This function: +- Reads a Shapefile using the `sf` package. +- Calculates centroids to extract latitude and longitude. +- Converts `Acres` column to hectares (`ha`). +- Extracts crop information and assigns a PFT (Plant Functional Type). +- Outputs a standardized GeoPackage with columns `id`, `geometry`, `lat`, `lon`, and `area_ha`. +- Outputs a CSV with columns `year`, `crop`, `pft`, `source`, and `notes`. + +Note: TODO provide crop-->PFT mapping as an external file using either +defaults stored with the PEcAn.data.land package or an optional custom +mapping +} +\examples{ +# Specify input and output file paths +input_file <- "path/to/landiq.shp" +output_gpkg <- "path/to/your_output.gpkg" +output_csv <- "path/to/your_output.csv" + +# Run the conversion function +landiq2std(input_file, output_gpkg, output_csv) + +} diff --git a/modules/data.land/man/shp2gpkg.Rd b/modules/data.land/man/shp2gpkg.Rd new file mode 100644 index 00000000000..0154625c012 --- /dev/null +++ b/modules/data.land/man/shp2gpkg.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/shp2gpkg.R +\name{shp2gpkg} +\alias{shp2gpkg} +\title{Convert Shapefile to GeoPackage} +\usage{ +shp2gpkg(input_shp, output_gpkg, layer_name = NULL, overwrite = TRUE) +} +\arguments{ +\item{input_shp}{Character. Path to the input Shapefile (e.g., `"data/myfile.shp"`).} + +\item{output_gpkg}{Character. Path to the output GeoPackage (e.g., `"output/myfile.gpkg"`).} + +\item{layer_name}{Character. Optional. Name of the layer in the GeoPackage. Defaults to the base name of the Shapefile.} + +\item{overwrite}{Logical. Whether to overwrite an existing layer in the GeoPackage. Defaults to `TRUE`.} +} +\value{ +Invisibly returns the path to the GeoPackage (`output_gpkg`) upon successful conversion. +} +\description{ +This function converts a Shapefile to a GeoPackage using the `sf` package. +} +\details{ +This function reads a Shapefile, converts it to an `sf` object, and writes it to a GeoPackage. +The `sf` package handles the conversion and ensures spatial data integrity. +} +\examples{ +# Convert 'roads.shp' to 'roads.gpkg' with the default layer name +shp2gpkg("data/roads.shp", "output/roads.gpkg") + +# Convert with a custom layer name +shp2gpkg("data/roads.shp", "output/roads.gpkg", layer_name = "custom_layer") + +# Prevent overwriting existing layers +shp2gpkg("data/roads.shp", "output/roads.gpkg", overwrite = FALSE) + +} diff --git a/modules/data.land/man/shp2kml.Rd b/modules/data.land/man/shp2kml.Rd index afae5b612e8..748a64d69db 100644 --- a/modules/data.land/man/shp2kml.Rd +++ b/modules/data.land/man/shp2kml.Rd @@ -22,7 +22,7 @@ shp2kml( \item{kmz}{TRUE/FALSE. Option to write out file as a compressed kml. Requires zip utility} -\item{proj4}{OPTIONAL. Define output proj4 projection string. If set, input vector will be +\item{proj4}{OPTIONAL. Define output proj4 projection string. If set, input vector will be reprojected to desired projection. Not yet implemented.} \item{color}{OPTIONAL. Fill color for output kml/kmz file}