R/jbd_coordinates_transposed.R

Defines functions jbd_coordinates_transposed

Documented in jbd_coordinates_transposed

#' Identify transposed geographic coordinates
#'
#' This function flags and corrects records when latitude and longitude appear
#' to be transposed. 
#' This function will preferably use the countryCode column generated by 
#' [bdc::bdc_country_standardized()].
#'
#' @family prefilter
#' @param data A data frame or tibble. Containing a unique identifier for each record,
#' geographical coordinates, and country names. Coordinates must be expressed
#' in decimal degrees and WGS84.
#' @param idcol A character string. The column name with a unique record identifier.
#' Default = "database_id".
#' @param sci_names A character string. The column name with species' scientific
#' names. Default = "scientificName".
#' @param lat A character string. The column name with latitudes. Coordinates must
#' be expressed in decimal degrees and WGS84. Default = "decimalLatitude".
#' @param lon A character string. The column name with longitudes. Coordinates must be
#' expressed in decimal degrees and WGS84. Default = "decimalLongitude".
#' @param country A character string. The column name with the country
#' assignment of each occurrence record. Default = "country".
#' @param countryCode A character string. The column name containing an ISO-2 country code for 
#' each record.
#' @param border_buffer Numeric. Must have value greater than or equal to 0. 
#' A distance in decimal degrees used to
#' created a buffer around each country. Records within a given country and at
#' a specified distance from the border will be not be corrected.
#' Default = 0.2 (~22 km at the equator).
#' @param save_outputs Logical. Indicates if a table containing transposed coordinates should be 
#' saved for further inspection. Default = FALSE.
#' @param fileName A character string. The out file's name.
#' @param path A character string. A path as a character vector for where to create the directories
#' and save the figures. If 
#' no path is provided (the default), the directories will be created using [here::here()].
#' @param scale Passed to rnaturalearth's ne_countries().
#' Scale of map to return, one of 110, 50, 10 or 'small', 'medium', 'large'. Default = "large".
#' @param mc.cores Numeric. If > 1, the jbd_correct_coordinates function will run in parallel
#' using mclapply using the number of cores specified. If = 1 then it will be run using a serial
#' loop. NOTE: Windows machines must use a value of 1 (see ?parallel::mclapply). Additionally,
#' be aware that each thread can use large chunks of memory.
#'  Default = 1.#'
#' @details This test identifies transposed coordinates based on mismatches between the 
#' country provided for a record and the record's latitude and longitude coordinates. Transposed
#' coordinates often fall outside of the indicated country (i.e., in other
#' countries or in the sea). Different coordinate transformations are
#' performed to correct country/coordinates mismatches. Importantly, verbatim
#' coordinates are replaced by the corrected ones in the returned database. A
#' database containing verbatim and corrected coordinates is created in
#' "Output/Check/01_coordinates_transposed.csv" if save_outputs == TRUE. The
#' columns "country" and "countryCode" can be retrieved by using the function
#' [bdc::bdc_country_standardized].
#'
#' @return A tibble containing the column "coordinates_transposed" which indicates if 
#' verbatim coordinates were not transposed (TRUE). Otherwise
#' records are flagged as (FALSE) and, in this case, verbatim coordinates are
#' replaced by corrected coordinates.
#'
#' @importFrom readr write_excel_csv
#' @importFrom dplyr tibble rename mutate select contains pull
#' @importFrom here here
#'
#' @export
#' 
#' @importFrom dplyr %>%
#'
#' @examples
#' \donttest{
#' if(requireNamespace("rnaturalearthdata")){
#' database_id <- c(1, 2, 3, 4)
#' scientificName <- c(
#'   "Rhinella major", "Scinax ruber",
#'   "Siparuna guianensis", "Psychotria vellosiana"
#' )
#' decimalLatitude <- c(63.43333, -14.43333, -41.90000, -46.69778)
#' decimalLongitude <- c(-17.90000, -67.91667, -13.25000, -13.82444)
#' country <- c("BOLIVIA", "bolivia", "Brasil", "Brazil")
#'
#' x <- data.frame(
#'   database_id, scientificName, decimalLatitude,
#'   decimalLongitude, country
#' )
#'
#' # Get country codes
#' x <- bdc::bdc_country_standardized(data = x, country = "country")
#'
#' jbd_coordinates_transposed(
#'   data = x,
#'   idcol = "database_id",
#'   sci_names = "scientificName",
#'   lat = "decimalLatitude",
#'   lon = "decimalLongitude",
#'   country = "country_suggested",
#'   countryCode = "countryCode",
#'   border_buffer = 0.2,
#'   save_outputs = FALSE,
#'   scale = "medium"
#' ) 
#' }
#' } # END if require
#'
jbd_coordinates_transposed <- function(data,
           idcol = "database_id",
           sci_names = "scientificName",
           lat = "decimalLatitude",
           lon = "decimalLongitude",
           country = "country",
           countryCode = "countryCode",
           border_buffer = 0.2,
           save_outputs = FALSE,
           fileName = NULL,
           scale = "large",
           path = NULL,
           mc.cores = 1) {
    decimalLatitude <- decimalLongitude <- database_id <- scientificName <- NULL
    
    requireNamespace("dplyr")

    suppressWarnings({
      check_require_cran("rnaturalearth")
      check_require_cran("readr")
    })
    
    # Ensure that working directories are maintain on exit from function
    oldwd <- getwd()           # code line i 
    on.exit(setwd(oldwd))        # code line i+1 
    
    # Copy original wd
    OGwd <- getwd()
    
    sf::sf_use_s2(TRUE)
    
    data <- dplyr::tibble(data)
    minimum_colnames <-
      c(idcol, sci_names, lat, lon, country, countryCode)
    
    if (length(minimum_colnames) < 6) {
      stop("Fill all function arguments: idcol, sci_names, lon, lat, and
         country")
    }
    
    if (!all(minimum_colnames %in% colnames(data))) {
      stop(
        "These columns names were not found in your database: ",
        paste(minimum_colnames[!minimum_colnames %in% colnames(data)],
              collapse = ", "),
        call. = FALSE
      )
    }

    # Temporarily change names of the collumn .summary to avoid error of duplicated coordinates
    if(".summary" %in% names(data)){
      w <- which(names(data) == ".summary")
      names(data)[w] <- "temp_summary"
    }
    
    # Standardizing columns names
    data <-
      data %>%
      dplyr::rename(
        database_id = {{ idcol }},
        decimalLatitude = {{ lat }},
        decimalLongitude = {{ lon }},
        scientificName = {{ sci_names }},
        countryCode = {{ countryCode }}
      )

    # converts coordinates columns to numeric
    data <-
      data %>%
      dplyr::mutate(
        decimalLatitude = as.numeric(decimalLatitude),
        decimalLongitude = as.numeric(decimalLongitude)
      )

    worldmap <- jbd_get_world_map(scale = scale)  # get world map and country iso

    # Correct latitude and longitude transposed
    message("Correcting latitude and longitude transposed\n")
    corrected_coordinates <-
      jbd_correct_coordinates(
        data = data,
        x = "decimalLongitude",
        y = "decimalLatitude",
        sp = "scientificName",
        idcol = idcol,
        cntr_iso2 = "countryCode",
        world_poly = worldmap,
        world_poly_iso = "iso2c",
        border_buffer = border_buffer,
        mc.cores = mc.cores
      )
    
    if (!is.null(corrected_coordinates)) {
      
      # Exports a table with verbatim and transposed xy
      corrected_coordinates <-
        corrected_coordinates %>%
        dplyr::select(database_id, scientificName, dplyr::contains("decimal"))
      
      if (save_outputs) {
        jbd_create_dir(path = path)
        setwd(path)
        corrected_coordinates %>%
          readr::write_excel_csv(
            paste(path, fileName, sep = "/"),
                           append = TRUE)
        message(
          paste(
            "\nCheck database containing coordinates corrected in:\nOutput/Check/01_coordinates_transposed.csv",
            "\n", "This will be appended to any existing rows from THIS run."
          )
        )
      }
      
      # finding the position of records with lon/lat modified
      w <-
        which(data %>% dplyr::pull(database_id) %in% (corrected_coordinates %>% dplyr::pull(database_id)))
      
      data[w, "decimalLatitude"] <-
        corrected_coordinates[, "decimalLatitude_modified"]
      
      data[w, "decimalLongitude"] <-
        corrected_coordinates[, "decimalLongitude_modified"]
      
      # Flags transposed coordinates
      data$coordinates_transposed <- TRUE
      data[w, "coordinates_transposed"] <- FALSE
      
      # Return collumn .summary
      if("temp_summary" %in% names(data)){
        w <- which(names(data) == "temp_summary")
        names(data)[w] <- ".summary"
      }
      
      message(
        paste(
          "\njbd_coordinates_transposed:\nCorrected",
          format(sum(data$coordinates_transposed == FALSE, na.rm = TRUE), big.mark = ","),
          "records.\nOne columns were added to the database.\n"
        )
      )
      
      return(data)
    } else{
      
      # Return collumn .summary
      if("temp_summary" %in% names(data)){
        w <- which(names(data) == "temp_summary")
        names(data)[w] <- ".summary"
      }
      
      message("No latitude and longitude were transposed\n")
      return(data)
    }
    setwd(OGwd)
  }

Try the BeeBDC package in your browser

Any scripts or data that you put into this service are public.

BeeBDC documentation built on Nov. 4, 2024, 9:06 a.m.