R/quality_checks.R

Defines functions temporal_resolution earliest_date spatial_resolution taxo_level

Documented in earliest_date spatial_resolution taxo_level temporal_resolution

#' Clean data based on lower taxon level
#'
#' Clean data based on lower taxon level
#'
#' @section samplePassData:
#' When resolution is Species, Subspecies and Species will pass.
#'
#' @section sampleFailData:
#' When resolution is Species, Family or Genus or any lower ranks will fail.
#'
#' @section targetDWCField:
#' taxonRank
#'
#' @section checkCategory:
#' taxonomic
#'
#' @param bddata Bio diversity data in a data frame
#' @param res The low rank of species required
#' 
#' @examples
#' 
#' if(interactive()){
#' 
#' library(rgbif)
#' occdat <- occ_data(
#'   country = 'AU', # Country code for australia
#'   classKey = 359, # Class code for mammalia
#'   limit = 50 # Get only 50 records
#' )
#' myData <- occdat$data
#' 
#' responses <- taxo_level(myData, 'SPECIES')
#' 
#' }
#'
#' @export
taxo_level <- function(bddata, res = "SPECIES") {
    assertive::assert_is_data.frame(bddata)
    assertive::assert_has_cols(bddata)
    
    ranks <-
        c("CLASS",
          "ORDER",
          "FAMILY",
          "GENUS",
          "SPECIES",
          "SUBSPECIES")
    res <- toupper(res)
    
    if (!(res %in% ranks)) {
        warning("Rank Value unknown. It should be FAMILY, GENUS, SPECIES or SUBSPECIES")
        return(bddata)
    }
    
    idx <- which(ranks == res)
    if (idx > 0 && ("taxonRank" %in% colnames(bddata))) {
        message(paste("taxoLevel:", "\n Removing records above :", res, "\n"))
        bddata[, "bdclean.taxo_level"] <- FALSE
        for (i in idx:length(ranks)) {
            bddata[which(bddata[, "taxonRank"] == ranks[i]), "bdclean.taxo_level"] <-
                TRUE
        }
    }
    
    return(bddata)
}

#' Clean data based on spatial resolution
#'
#' Clean data based on spatial resolution
#'
#' @section samplePassData:
#' When resolution is 100 meters, Coordinate Uncertainties below 100 meters will pass.
#'
#' @section sampleFailData:
#' When resolution is 100 meters, Coordinate Uncertainties above 100 meters will fail.
#'
#' @section targetDWCField:
#' coordinateUncertaintyInMeters
#'
#' @section checkCategory:
#' spatial
#'
#' @param bddata Bio diversity data in a data frame
#' @param res The highest coordinate uncertainty required
#' 
#' @examples
#' 
#' if(interactive()){
#' 
#' library(rgbif)
#' occdat <- occ_data(
#'   country = 'AU', # Country code for australia
#'   classKey = 359, # Class code for mammalia
#'   limit = 50 # Get only 50 records
#' )
#' myData <- occdat$data
#' 
#' responses <- spatial_resolution(myData, 1500)
#' 
#' }
#'
#' @export
spatial_resolution <- function(bddata, res = 100) {
    assertive::assert_is_data.frame(bddata)
    assertive::assert_has_cols(bddata)
    
    message(paste(
        "spatialResolution:",
        "\n Removing records above :",
        res,
        "\n"
    ))
    res <- as.numeric(res)
    if (res > 0 && ("coordinateUncertaintyInMeters" %in% colnames(bddata))) {
        bddata[, "bdclean.spatial_resolution"] <- FALSE
        bddata[which(bddata[, "coordinateUncertaintyInMeters"] < res), "bdclean.spatial_resolution"] <-
            TRUE
    }
    return(bddata)
}

#' Clean data based on earliest date.
#'
#' Clean data based on earliest date.
#'
#' @section samplePassData:
#' When resolution is 20-Jan-2005, records recorded after the date will pass.
#'
#' @section sampleFailData:
#' When resolution is 20-Jan-2005, records recorded before the date will fail.
#'
#' @section targetDWCField:
#' eventDate
#'
#' @section checkCategory:
#' temporal
#'
#' @param bddata Bio diversity data in a data frame
#' @param res The earliest data required
#' 
#' @examples
#' 
#' if(interactive()){
#' 
#' library(rgbif)
#' occdat <- occ_data(
#'   country = 'AU', # Country code for australia
#'   classKey = 359, # Class code for mammalia
#'   limit = 50 # Get only 50 records
#' )
#' myData <- occdat$data
#' responses <- earliest_date(myData, '2000-01-01')
#' 
#' }
#'
#' @export
earliest_date <- function(bddata, res = "1700-01-01") {
    assertive::assert_is_data.frame(bddata)
    assertive::assert_has_cols(bddata)
    
    message(paste("earliestDate:", "\n Removing records above :", res, "\n"))
    dates <- strsplit(res, " ")[[1]]
    bddata <- as.data.frame(bddata)
    ed <- try(as.Date(dates[1], format = "%Y-%m-%d"))
    if (class(ed) == "try-error" || is.na(ed) || !("eventDate" %in% colnames(bddata))) {
        warning("That date wasn't correct!")
        return(bddata)
    }
    bddata[, "bdclean.earliest_date"] <- FALSE
    bddata[which(as.Date(bddata[, "eventDate"]) > ed), "bdclean.earliest_date"] <-
        TRUE
    return(bddata)
}

#' Clean data based on temporal resolution
#'
#' Clean data based on temporal resolution
#'
#' @section samplePassData:
#' When resolution is day, records with day specified will pass.
#'
#' @section sampleFailData:
#' When resolution is month, records with NA/empty month specified will fail.
#'
#' @section targetDWCField:
#' day, month, year
#'
#' @section checkCategory:
#' temporal
#'
#' @param bddata Bio diversity data in a data frame
#' @param res restriction of records with/without data, month, year fields
#' 
#' @examples
#' 
#' if(interactive()){
#' 
#' library(rgbif)
#' occdat <- occ_data(
#'   country = 'AU', # Country code for australia
#'   classKey = 359, # Class code for mammalia
#'   limit = 50 # Get only 50 records
#' )
#' myData <- occdat$data
#' responses <- temporal_resolution(myData, 'Day')
#' 
#' }
#'
#' @export
temporal_resolution <- function(bddata, res = "Day") {
    assertive::assert_is_data.frame(bddata)
    assertive::assert_has_cols(bddata)
    
    message(paste(
        "temporalResolution:",
        "\n Removing records above :",
        res,
        "\n"
    ))
    bddata <- as.data.frame(bddata)
    
    if (res == "Day" && ("day" %in% colnames(bddata))) {
        bddata[, "bdclean.temporal_resolution"] <- FALSE
        bddata[which(!is.na(bddata$day)), "bdclean.temporal_resolution"] <-
            TRUE
    }
    if (res == "Month" && ("month" %in% colnames(bddata))) {
        bddata[, "bdclean.temporal_resolution"] <- FALSE
        bddata[which(!is.na(bddata$month)), "bdclean.temporal_resolution"] <-
            TRUE
    }
    if (res == "Year" && ("year" %in% colnames(bddata))) {
        bddata[, "bdclean.temporal_resolution"] <- FALSE
        bddata[which(!is.na(bddata$year)), "bdclean.temporal_resolution"] <-
            TRUE
    }
    return(bddata)
}
vijaybarve/bdclean documentation built on Oct. 8, 2021, 9:18 p.m.