R/ShapeToMask.R

Defines functions .modifyLonsTo180 .transformToNc .areacov .polygonize .shapetomask ShapeToMask

Documented in ShapeToMask

#' Convert Shapefile to Mask Array
#' 
#' This function reads a shapefile (.shp) containing information about polygonal 
#' regions. It then transfers the shapefile data into an array and subsets the 
#' output based on requested region names or IDs. The accepted shapefile 
#' databases are 'NUTS', 'LAU', and 'GADM', each with its own unique format. 
#' However, the function can use other shapefiles databases with specifying the 
#' categories names with the parameter 'id_shape_col'. 
#' 
#' To ensure accurate comparison with the shapefile, the function loads a 
#' reference dataset that provides longitude and latitude information. By 
#' intersecting each subset of the shapefile with the reference coordinates, the 
#' function selects only the desired regions. The final step involves creating a 
#' mask array. Depending on the chosen option, the mask array is either returned 
#' as the function's output or saved into a NetCDF format in the specified 
#' directory.
#' 
#' Note: Modules GDAL, PROJ and GEOS are required.
#'  
#' @param shp_file A character string indicating the shp file path.
#' @param ref_grid A character string indicating the path to the reference 
#'   data. Either (1) a netCDF file or (2) a list of lon and lat to provide the 
#'   reference grid points. It is NULL by default.
#' @param compute_area_coverage A logical value indicating the method to find 
#'   the intersection of the reference grid and the shapefile. When it is TRUE, 
#'   the method used is the calculation of the area coverage fraction of 
#'   intersection. If it is FALSE, the method used is searching if the centroid 
#'   of the grid cell falls inside the shapefile or not. It is FALSE by default.
#' @param shp_system A character string containing the Shapefile System Database 
#'   Name used to subset the shapefile into regions by using parameters 'reg_ids' 
#'   or 'reg_names'. The accepted systems are: 'NUTS', 'LAU', and 'GADM'. When it 
#'   is used, you must specify either 'reg_ids' or 'reg_names'; if you don't need 
#'   to subset different regions, set it to NULL. It is set to "NUTS" by default
#'   (optional). 
#' @param reg_ids A character string indicating the unique ID in shapefile. 
#'   It is NULL by default (optional).
#' @param reg_names A named list of character string vectors indicating the 
#'   country and the region name. The name of the list stands for the country 
#'   name code and the vector character strings indicate the region name for 
#'   each country. It is NULL by default (optional).
#' @param reg_level An integer number from 1 to 3 indicating the 'NUTS' dataset 
#'   level. For other datasets this parameter is not used. One mask can only have 
#'   a unique level. It is set to 3 by default (optional).
#' @param lat_dim A character string indicating the latitudinal dimension. If it 
#'   is NULL, the latitudinal name will be searched using an internal function 
#'   with the following possible names: 'lat', 'latitude', 'y', 'j' and 
#'   'nav_lat'. It is set to NULL by default.
#' @param lon_dim A character string indicating the longitudinal dimension. If it 
#'   is NULL, the longitudinal name will be searched using an internal function 
#'   with the following possible names: 'lon', 'longitude', 'x', 'i' and 
#'   'nav_lon'. It is set to NULL by default.
#' @param region A logical value indicating if we want a dimension for the 
#'   regions in the resulting mask array. It is FALSE by default.
#' @param check_valid A logical value that when it is TRUE it uses the function 
#'   'sf::st_make_valid' applied to the shapefile and to the coordinates.
#' @param find_min_dist A logical value indicating if we want to look for the 
#'   nearest coordinate between the shapefile region and the reference grid when 
#'   there is no intersection between the shapefile and the reference grid. It is 
#'   FALSE by default.
#' @param max_dist A numeric value indicating the maximum distance is accepted 
#'   to the closest gridpoint when there is no intersection between the shapefile 
#'   and the reference grid. 
#' @param ncores The number of parallel processes to spawn for the use for 
#'   parallel computation in multiple cores.
#' @param fileout A character string of the path to save the NetCDF mask. If not 
#'   specified (default), only the mask array will be returned. 
#' @param units A character string indicating if your GIS files has a grid in degrees or meters. If it 
#'   is NULL, the units will be set as "meters"
#'   with the following possible names: 'degrees', 'meters'
#' @param name_shape_col A character string indicating in the shape file which is the name of the column 
#'  with the values of the names of the different polygons. It is NULL by default.
#' @param id_shape_col A character string indicating in the shape file which is the name of the column 
#'  with the values of the IDs of the different polygons. It is NULL by default.
#' @param ... Arguments passed on to 's2_options' in function 'st_intersection'.  
#'   See 's2 package'.
#'  
#' @return A multidimensional array containing a mask array with longitude and 
#' latitude dimensions. If 'region' is TRUE, there will be a dimension for 
#' the region. 
#'  
#' @examples 
#' \dontrun{
#' # Example using an external shapefile not distributed with the package
#' shp_file <- paste0('/esarchive/shapefiles/NUTS3/NUTS_RG_60M_2021_4326.shp/', 
#'                    'NUTS_RG_60M_2021_4326.shp')
#' ref_grid <- list(lon = seq(10, 40, 0.5), lat = seq(40, 85, 0.5))
#' NUTS_name <- list(FI = c('Lappi', 'Kainuu'), SI = c('Pomurska', 'Podravska'))
#' mask <- ShapeToMask(shp_file = shp_file, ref_grid = ref_grid, 
#'                      reg_names = NUTS_name)
#' }
#' @import easyNCDF
#' @import sf
#' @import foreach
#' @import jsonlite
#' @importFrom dplyr mutate arrange pull group_by summarise
#' @importFrom doParallel registerDoParallel
#' @export
ShapeToMask <- function(shp_file, ref_grid, compute_area_coverage = FALSE, 
                        shp_system = "NUTS", reg_names = NULL, reg_ids = NULL, 
                        reg_level = 3, 
                        lat_dim = NULL, lon_dim = NULL, 
                        region = FALSE, check_valid = FALSE, 
                        find_min_dist = FALSE, max_dist = 50, ncores = NULL, 
                        fileout = NULL, units = 'degrees', id_shape_col = NULL,
                        name_shape_col = NULL, ...) {

  # TODO: Suppress warnings?
  # TODO: Add saving option?
  # sf_use_s2(FALSE)

  # Initial checks
  # shp_file
  if (is.null(shp_file)) {
    stop("Parameter 'shp_file' cannot be NULL.")
  }
  if (!is.character(shp_file)) {
    stop("Parameter 'shp_file' must be a character string.")
  }
  # lon_dim, lat_dim
  if (!is.null(lon_dim)) {
    if (!is.character(lon_dim)) {
      stop("Parameter 'lon_dim' must be a character string.")
    }
  }
  if (!is.null(lat_dim)) {
    if (!is.character(lat_dim)) {
      stop("Parameter 'lat_dim' must be a character string.")
    }
  }
  if (!is.null(id_shape_col)) {
    if (!is.character(id_shape_col)) {
      stop("Parameter 'id_shape_col' must be a character string.")
    }
  }
  if (!is.null(name_shape_col)) {
    if (!is.character(name_shape_col)) {
      stop("Parameter 'name_shape_col' must be a character string.")
    }
  }
  if (!is.null(units)) {
    if (!is.character(units)) {
      stop("Parameter 'units' must be a character string.")
    }
  }
  # ref_grid
  if (is.null(ref_grid)) {
    stop("Parameter 'ref_grid' cannot be NULL.")
  }
  if (!any(is.character(ref_grid) | is.list(ref_grid))) {
    stop("Parameter 'ref_grid' must be either a netCDF file or a list of lon and lat.")
  }
  if (is.character(ref_grid)) {
    if (!file.exists(ref_grid)) {
      stop("ref_grid file does not exist.")
    } 
  }
  if (is.list(ref_grid)) {
    if (length(ref_grid) != 2) {
      stop("If 'ref_grid' is a list, it must have two items for longitude and latitude.")
    }
    if (is.null(lat_dim) | is.null(lon_dim)) {
      lon_known_names <- c(.KnownLonNames(), 'lons')
      lat_known_names <- c(.KnownLatNames(), 'lats')
      lon_dim <- lon_known_names[which(lon_known_names %in% names(ref_grid))]
      lat_dim <- lat_known_names[which(lat_known_names %in% names(ref_grid))]

      if (identical(lon_dim, character(0)) | identical(lat_dim, character(0))) {
        stop("longitude and latitude names are not recognized in 'ref_grid'. ",  
             "Please specify 'lon_dim' and 'lat_dim'.")
      }
    }
  }

  # shp_system, reg_ids, reg_names, id_shape_col
  if (!is.null(shp_system)) {
    if (!is.character(shp_system)) {
      stop("Parameter 'shp_system' must be a character strinig.")
    }
    if (all(is.null(reg_ids), is.null(reg_names))) {
      stop("If 'shp_system' is used, you must provide either parameter ",
           "'reg_ids' or 'reg_names'.")
    } 
  } else if (!is.null(id_shape_col)) {
    if (is.null(reg_ids)) {
      stop("If 'id_shape_col' is used, parameter 'reg_ids' must be provided.")
    }
    if (!is.character(id_shape_col)) {
      stop("Parameter 'id_shape_col' must be a character strinig.")
    }
  }
  if (all(!is.null(reg_ids), !is.null(reg_names))) {
    warning("Only use 'reg_ids' to get the shape region. 'reg_names' is not used.")
  }
  # reg_level
  if (!is.null(reg_level)) {
    if (!is.numeric(reg_level)) {
      stop("Parameter 'reg_level' must be numeric.")
    }
  }
  # region
  if (!is.logical(region)) {
    stop("Parameter 'region' must be a logical value.")
  }
  # check_valid
  if (!is.logical(check_valid)) {
    stop("Parameter 'check_valid' must be a logical value.")
  }
  # find_min_dist
  if (!is.logical(find_min_dist)) {
    stop("Parameter 'find_min_dist' must be a logical value.")
  }
  # max_dist
  if (!is.null(max_dist)) {
    if (!is.numeric(max_dist)) {
      stop("Parameter 'max_dist' must be a numeric.")
    }
  }
  # ncores
  if (!is.null(ncores)) {
    if (!is.numeric(ncores)) {
      stop("Parameter 'ncores' must be numeric.")
    }
    ncores <- round(ncores)
    if (ncores < 2) {
      ncores <- NULL
    }
  }

  # Step 1: Load the shapefile
  shp <- sf::st_read(shp_file)  # class sf
  shp_0 <- shp

  if (units == 'degrees') {
    shp <- sf::st_transform(shp, 4326)
  } else if (units == 'meters') {
    shp <- sf::st_transform(shp, 3857)
  }
  
  shp_crs <- sf::st_crs(shp)

  NUTS_ID <- ADM1_PCODE <- ISO <- NULL
  CNTR_CODE <- NUTS_NAME <- ADM0_EN <- ADM1_EN <- Name <- LEVL_CODE <- NULL

  if (!is.null(reg_ids)) {
    ## Method 1: Directly use IDs
    if (!is.null(id_shape_col)) {
      if (id_shape_col %in% names(shp)) {
        shp <- subset(shp, get(id_shape_col) %in% reg_ids)
      } else {
        stop("Shape system name not found in shapefile names.")
      }
    } else if (shp_system == "NUTS") {
      shp <- subset(shp, NUTS_ID %in% reg_ids)
      id_shape_col <- "NUTS_ID"
    } else if (shp_system == "ADM") {
      shp <- subset(shp, ADM1_PCODE %in% reg_ids)
      id_shape_col <- "ADM1_PCODE"
    } else if (shp_system == "GADM") {
      shp <- subset(shp, ISO %in% reg_ids)
      id_shape_col <- "ISO"
    } else {
      stop("shp_system ", shp_system, " is not defined yet.")
    }
  
  } else if (!is.null(reg_names)) {
    id_shape_col <- NULL
    ## Method 2: Use country code & region name
    for (cntr_i in 1:length(reg_names)) {
      if (shp_system == "NUTS") {
        tmp <- subset(shp, CNTR_CODE == names(reg_names)[cntr_i])
        tmp <- subset(tmp, NUTS_NAME %in% reg_names[[cntr_i]])
        id_shape_col <- 'NUTS_ID'
      } else if (shp_system == "ADM") {
        tmp <- subset(shp, ADM0_EN == names(reg_names)[cntr_i])
        tmp <- subset(tmp, ADM1_EN %in% reg_names[[cntr_i]])
        id_shape_col <- 'ADM1_EN'
      } else if (shp_system == "GADM") {
        tmp <- subset(shp, Name %in% reg_names)
        id_shape_col <- 'Name'
      }
      if (cntr_i == 1) {
        shp_tmp <- tmp
      } else {
        shp_tmp <- rbind(shp_tmp, tmp)
      }
    }
    if (shp_system == "NUTS") {
      shp <- subset(shp_tmp, LEVL_CODE == reg_level)
    } else if (shp_system == "ADM" | shp_system == "GADM") {
      shp <- shp_tmp
    }
  }
  
  # Step (2.1): Use the reference file to get lat and lon
  if (all(tools::file_ext(ref_grid) == 'nc')) {
    ## Method 1: ref_grid is a netCDF file
    if (is.null(lat_dim) | is.null(lon_dim)) {
      var_names <- easyNCDF::NcReadVarNames(ref_grid)
      lat_dim <- var_names[which(var_names %in% .KnownLatNames())]
      lon_dim <- var_names[which(var_names %in% .KnownLonNames())]
    }
    latlon <- easyNCDF::NcToArray(ref_grid, vars_to_read = c(lat_dim, lon_dim))
    lat <- easyNCDF::NcToArray(ref_grid, vars_to_read = lat_dim)[1, ]
    lon <- easyNCDF::NcToArray(ref_grid, vars_to_read = lon_dim)[1, ]
  } else if (is.list(ref_grid)) {
    ## Method 2: ref_grid is a list of lon and lat
    lat <- ref_grid[[lat_dim]]
    lon <- ref_grid[[lon_dim]]
  }

  # Set degrees longitudes to -180 to 180
  lon       <- ifelse(lon > 180, lon - 360, lon)
  order_idx <- order(lon)
  lon       <- lon[order_idx]

  lat <- sort(lat, decreasing = FALSE)
  # Step (2.2): Create the grid
  if (compute_area_coverage) {    
    locations <- array(1:(length(lon)*length(lat)), c(lon = length(lon), 
                                                      lat = length(lat)))
    # Build data dataframe
    lonlat_df <- data.frame(lon = rep(as.vector(lon), length(lat)),
                            lat = sort(rep(as.vector(lat), length(lon)), decreasing = TRUE))

    data_df <- lonlat_df %>%
      mutate(dat = as.vector(locations))

    lonlat_df_ori <- NULL

    # NOTE: if target_proj = "ESRI:54030", Nord3v2 has different behavior from hub and ws!!
    data_df <- st_as_sf(data_df, coords = c("lon", "lat"), crs = shp_crs)
    data_df <- st_transform(data_df, crs = shp_crs)
    data_df <- data_df %>%
      mutate(long = st_coordinates(data_df)[, 1],
             lat = st_coordinates(data_df)[, 2])
    xy.sfc <- .polygonize(lonlat_df = lonlat_df, data_df = data_df, 
                         lon = lon, lat = lat, target_proj = shp_crs,
                         original_proj = shp_crs)
    region <- TRUE
  } else {
    ref.df <- data.frame(data = 0,
                         lon = rep(lon, times = length(lat)),
                         lat = rep(lat, each = length(lon)))

    ref.df <- .modifyLonsTo180(units, ref.df, order_idx)
    coord <- as.matrix(data.frame(x = ref.df$lon, y = ref.df$lat))
    
    xy.sfg <- sf::st_multipoint(coord)
    xy.sfc <- sf::st_sfc(xy.sfg)
    # Assign crs of original shapefile
    st_crs(xy.sfc) <- sf::st_crs(shp) # asign crs of shapefile

    # Check valid
    if (check_valid) {
      xy.sfc <- st_make_valid(xy.sfc)
      shp <- st_make_valid(shp)
    }
  }

  ## Step (3): Loop through each shp region to create the mask
  if (region) {
    cfun <- function(a, b) {
      if (length(dim(a)) == 2) {
        dims <- c(dim(a), 2)
      } else {
        dims <- c(dim(a)[1:2], dim(a)[length(dim(a))]+1)
      }
      return(array(c(a,b), dim = dims))
    }
  } else {
    cfun <- function(a, b) {
      a[which(b != 0)] <- b[which(b != 0)]
      return(a)
    }
  }
  shp_i <- NULL
  ## TODO: Can we replace DoParallel with multiApply?
  if (is.null(ncores)) {
    mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun') %do%
              .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, 
                           xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, 
                           find_min_dist = find_min_dist, 
                           id_shape_col = id_shape_col, 
                           max_dist = max_dist, region = region, shp_system = shp_system, ...)
  } else {
    registerDoParallel(ncores)
    mask <- foreach(shp_i = 1:nrow(shp), .combine = 'cfun', .packages = c('sf', 'dplyr')) %dopar%
              .shapetomask(shp = shp, n = shp_i, lon = lon, lat = lat, 
                           xy.sfc = xy.sfc, compute_area_coverage = compute_area_coverage, 
                           find_min_dist = find_min_dist, 
                           id_shape_col = id_shape_col, 
                           max_dist = max_dist, region = region, shp_system = shp_system, ...)
    registerDoSEQ()
  }
  if (region) {
    if (length(dim(mask)) == 2) dim(mask) <- c(dim(mask), 1)
    names(dim(mask)) <- c(lon_dim, lat_dim, "reg_id")
  } else {
    names(dim(mask)) <- c(lon_dim, lat_dim)
  }

  # Step 4: Add attributes
  attr(mask, lon_dim) <- lon
  attr(mask, lat_dim) <- lat
  #attr(mask, "index") <- 1:nrow(shp)
  attr(mask, "reg_id") <- shp[[id_shape_col]]
  ## Return all the info from shp
  attr(mask, "shapefile") <- attributes(shp)
   
  # Generate attributes related with the shape file
  #shape_attrs = list()

  # for (i in 1:nrow(shp)) {
  #   if (!is.null(shp_system)) {
  #     if (is.null(id_shape_col) && is.null(name_shape_col)) {
  #       if (shp_system == "NUTS") {
  #         id_aux   = shp$NUTS_ID[i]
  #         name_aux = shp$NUTS_NAME[i]
  #       } else if (shp_system == "ADM") {
  #         id_aux   = shp$ADM1_PCODE[i]
  #         name_aux = shp$ADM1_NAME[i]
  #       } else if (shp_system == "GADM") {
  #         id_aux   = shp$ISO[i]
  #         name_aux = shp$Name[i]
  #       } else {
  #         id_aux = NULL
  #         name_aux = NULL
  #       }
  #     } else{
  #       id_aux   = id_shape_col
  #       name_aux = name_shape_col
  #     }
  #   }
  # 
  #   entry <- list(
  #     id   = id_aux,
  #     name = name_aux,
  #     shape_val = reg_names[[1]][i]
  #   )
  #   shape_attrs <- append(shape_attrs, list(entry))
  # }

  # Step 5: Save to NetCDF
  if (!is.null(fileout)) {
    # Lat Lon Max Min
    lat_list <- attr(mask, 'lat')
    lon_list <- attr(mask, 'lon')
    reg_id_list <- attr(mask, 'reg_id')

    .transformToNc(mask, lat_list, lon_list, reg_id_list, fileout, units, lat_dim, lon_dim)
  }
  return(mask)
}

.shapetomask <- function(shp, n, lon, lat, xy.sfc, compute_area_coverage = FALSE, 
                         find_min_dist = FALSE, 
                         id_shape_col = NULL, max_dist = 50, 
                         region = FALSE, shp_system = 'NUTS', ...) {
  shpi <- shp[n, ]
  if (compute_area_coverage) {
    mask <- xy.sfc %>%
      dplyr::mutate(int = .areacov(geometry, shpi)) %>%
      dplyr::arrange(value) %>%
      dplyr::pull(int)
    dim(mask) <- c(length(lon), length(lat)) 
  } else {
    mask <- array(0, dim = c(length(lon), length(lat)))
    # NOTE: Don't know it's a problem in st_intersection or st_coordinates, tmp_coords may 
    #       not be identical as lon/lat. E.g., (29, 65) may be (29 - -3.552714e-15, 65).
    shp_pol <- sf::st_intersection(x = xy.sfc, y = shpi, ...)
    tmp_coords <- sf::st_coordinates(shp_pol)[, 1:2]
    if (length(tmp_coords) == 0) {
      dim(tmp_coords) <- NULL
    } else if (is.null(dim(tmp_coords))) {
      tmp_coords <- array(tmp_coords, dim = c(1, length(tmp_coords)))
    }
    if (!is.null(dim(tmp_coords))) {
      
      # polygon_instersection
      for (ii in 1:nrow(tmp_coords)) {
        # pt_x <- which(lon == tmp_coords[ii, 1])
        # pt_y <- which.min(abs(lat - tmp_coords[ii, 2]))
        if (!region) {
          # min(abs(lon - tmp_coords[ii, 1]))
          # min(abs(lat - tmp_coords[ii, 2]))
          mask[which.min(abs(lon - tmp_coords[ii, 1])),
               which.min(abs(lat - tmp_coords[ii, 2]))] <- n # AQUI ES DONDE ASOCIA EL N A CADA VALOR DEL SHAPE PROBABLEMENTE RAUL
        } else {
          mask[which.min(abs(lon - tmp_coords[ii, 1])),
               which.min(abs(lat - tmp_coords[ii, 2]))] <- 1 # SI SOLO HAY UNA REGION O NO HAY QUE DIVIDIR POR REGIONES
        }
      }
    } else if (find_min_dist) {
      x.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,1]
      y.centroid.shpi <- sf::st_coordinates(sf::st_centroid(shpi))[,2]
      dist <- sqrt((xy.sfc[,1] - x.centroid.shpi)**2 + (xy.sfc[,2] - y.centroid.shpi)**2)
      tmp_coords <- array(xy.sfc[which(dist == min(dist, na.rm = TRUE)),], dim = c(1,2))
      colnames(tmp_coords) <- c('X', 'Y')
      if (max(dist) <= max_dist & (any(round(lat,2) == round(tmp_coords[1,2],2)) & 
                                   any(round(lon,2) == round(tmp_coords[1,1],2))) ) {
        if (length(dim(mask)) == 2) {
          mask[which.min(abs(lon - tmp_coords[, 1])),
               which.min(abs(lat - tmp_coords[, 2]))] <- n
        } else {
          mask[which.min(abs(lon - tmp_coords[, 1])),
               which.min(abs(lat - tmp_coords[, 2]))] <- 1
        }
        # warning(paste0('The reference grid has no intersection with region ', 
        #         ifelse(is.character(id_shape_col), shpi[[id_shape_col]], paste0('n° ', n)), 
        #         ' from the shapefile; the provided grid cell is at a distance of ', dist[which(dist == min(dist, na.rm = TRUE))], 
        #         ' to the centroid of the region (units are: ° or meters depending on the crs of the shapefile).'))
      } else {
        # warning(paste0('The reference grid has no intersection with region ', 
        #         ifelse(is.character(id_shape_col), shpi[[id_shape_col]], paste0('n° ', n))))
      }
    }
  }
  
  # Flip the matrix when the area_coverage is TRUE
  if (compute_area_coverage && grepl("NUTS", shp_system)) {
    mask <- mask[, ncol(mask):1]   # Flip in the horizontal axis
    # mask <- mask[nrow(mask):1, ] # Flip in vertical axis
  }
  
  return(mask)
}

# Function to create polygons from VizRobinson code
.polygonize <- function(lonlat_df, data_df, lon, lat, target_proj, 
                       original_proj) {
  
  # Calculate polygon points from regular lat/lon
  # NOTE: The original grid must be regular grid with same space
  d_lon <- abs(lon[2] - lon[1]) / 2
  d_lat <- abs(lat[2] - lat[1]) / 2
  lon_poly <- lat_poly <- rep(NA, 4 * dim(lonlat_df)[1])
  for (ii in 1:dim(lonlat_df)[1]) {
    lon_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lon[ii] - d_lon, lonlat_df$lon[ii] + d_lon,
                                   lonlat_df$lon[ii] + d_lon, lonlat_df$lon[ii] - d_lon)
    lat_poly[(ii*4-3):(ii*4)] <- c(lonlat_df$lat[ii] - d_lat, lonlat_df$lat[ii] - d_lat,
                                   lonlat_df$lat[ii] + d_lat, lonlat_df$lat[ii] + d_lat)
  }
  
  # To prevent out-of-global lon
  #  lon_poly[which(lon_poly >= 180)] <- 179.9
  #  lon_poly[which(lon_poly < -180)] <- -180
  # To prevent out-of-global lat
  lat_poly[which(lat_poly > 90)] <- 90
  lat_poly[which(lat_poly < -90)] <- -90
  
  lonlat_df <- data.frame(lon = lon_poly, lat = lat_poly)
  # Transfer lon/lat to projection
  proj_lonlat <- st_as_sf(lonlat_df, coords = c("lon", "lat"), crs = original_proj)
  # NOTE: if target_proj = "ESRI:54030", on Nord3v2, st_transform has lon and lat swapped!
  proj_lonlat <- st_transform(proj_lonlat, crs = target_proj)
  lonlat_df_proj <- st_coordinates(proj_lonlat)
  
  # Use id to create groups for each polygon
  ids <- factor(paste0("id_", 1:dim(data_df)[1]))
  values <- data.frame(id = ids,
                       value = data_df$dat)
  positions <- data.frame(id = rep(ids, each = 4),
                          x = lonlat_df_proj[, 1],
                          y = lonlat_df_proj[, 2])
  datapoly <- merge(values, positions, by = "id")
  datapoly <- st_as_sf(datapoly, coords = c("x", "y"), crs = target_proj)
  
  datapoly <- datapoly %>%
    dplyr::group_by(.data$id) %>%
    dplyr::summarise(geometry = st_combine(geometry)) %>%
    # dplyr::summarise() %>%  # NOTE: VERY SLOW if plot global
    dplyr::mutate(value = values[order(values$id), ]$value) %>%
    st_cast("POLYGON") %>%
    st_convex_hull()  # maintain outer polygen (no bowtie shape)
  
  return(datapoly)
}

# Function to compute the coverage area ratio between the grid and the shapefile
.areacov <- function(grid, shp) {
  shp_int_grid <- sf::st_intersection(x = grid, y = shp)
  idx <- attributes(shp_int_grid)$idx
  i <- 0
  fr <- array(0, length(grid))
  for (idx_i in idx[, 1]) {
    i <- i + 1
    area_grid <- sf::st_area(grid[idx_i])
    area_int <- sf::st_area(shp_int_grid[i])
    fr[idx_i] <- round(as.numeric(area_int/area_grid), digits = 6)
  }
  return(fr)
}

.transformToNc <- function(values, lat=NULL, lon=NULL, reg_id=NULL, out_file, crs='degrees', lat_dim, lon_dim) {
  # Take the name of lat lon dimensions
  if (is.null(lon_dim)) {
    possible_lon_names <- c("lon", "longitude", "x", "i", "nav_lon")
  } else {
    possible_lon_names <- c(lon_dim)
  }
  
  if (is.null(lat_dim)) {
    possible_lat_names <- c("lat", "latitude" , "y", "j", "nav_lat")  
  } else {
    possible_lat_names <- c(lat_dim)
  }
  
  # Change latitude and longitude names to lat, lon
  names(dim(values)) <- gsub(paste(possible_lon_names, collapse = "|"), "lon", names(dim(values)), ignore.case = TRUE)
  names(dim(values)) <- gsub(paste(possible_lat_names, collapse = "|"), "lat", names(dim(values)), ignore.case = TRUE)
  
  # Add coordinates
  if (!is.null(lon) && length(lon) > 0) {
    dim(lon)               <- length(lon)
    metadata               <- list(lon = list(units = crs))
    attr(lon, 'variables') <- metadata
    names(dim(lon))        <- 'lon' # rigth_lon_name
  }
  
  if (!is.null(lat) && length(lat) > 0) {
    dim(lat)               <- length(lat)
    metadata               <- list(lat = list(units = crs))
    attr(lat, 'variables') <- metadata
    names(dim(lat))        <- 'lat' # rigth_lat_name
  }
  if (!is.null(reg_id) && length(reg_id) > 0) {
    dim(reg_id)               <- length(reg_id)
    metadata               <- list(reg_id = list())
    attr(reg_id, 'variables') <- metadata
    names(dim(reg_id))        <- 'reg_id' 
  }
  
  # Add the projection to the file
  if (crs == 'degrees') {
    crs = 'EPSG4326'
  } else {
    crs = 'EPSG3857'
  }
  
  # Add the atributes to the file
  attrs <- list(global_attrs = list(
    crs = crs    ))
  attributes(values) <- c(attributes(values), attrs)
  
  # Export to Nc 
  #add region name reg_name (reg_id)
  ArrayToNc(list(var=values, lon, lat, reg_id=reg_id), out_file)
  
}

.modifyLonsTo180 <- function(units, df, order_idx) {
  
  message('Check errors 1:')
  
  if (units == 'degrees'){
    my_list  <- c("lon", "longitude", "x")
    data     <- df[colnames(df)[1]]
    name_lon <- my_list[tolower(my_list) %in% tolower(colnames(df))]
    lon_data <- df[[name_lon]]
    data_reorder <- df[[colnames(df)[1]]]
    
    df[colnames(df)[1]] <- data_reorder[order_idx]
  }
  
  return (df)
}

Try the esviz package in your browser

Any scripts or data that you put into this service are public.

esviz documentation built on Feb. 4, 2026, 5:13 p.m.