#' Converts date columns formatted as characters to be of type datetime
#'
## Copyright(c) 2017-2020 R. Mark Sharp
## This file is part of nprcgenekeepr
#' Part of Pedigree Curation
#'
## ## rmsutilityr get_and_or_list
## ## rmsutilityr is_valid_date_str
#'
#' @return A dataframe with an updated table with date columns converted from
#' \code{character} data type to \code{Date} data type. Values that do not
#' conform to the format %Y%m%d are set to NA. NA values are left as NA.
#'
#' @examples
#' \donttest{
#' library(lubridate)
#' set_seed(10)
#' someBirthDates <- paste0(sample(seq(0, 15, by = 3), 10,
#' replace = TRUE) + 2000, "-",
#' sample(1:12, 10, replace = TRUE), "-",
#' sample(1:28, 10, replace = TRUE))
#' someBadBirthDates <- paste0(sample(1:12, 10, replace = TRUE), "-",
#' sample(1:28, 10, replace = TRUE), "-",
#' sample(seq(0, 15, by = 3), 10,
#' replace = TRUE) + 2000)
#' someDeathDates <- sample(someBirthDates, length(someBirthDates),
#' replace = FALSE)
#' someDepartureDates <- sample(someBirthDates, length(someBirthDates),
#' replace = FALSE)
#' ped1 <- data.frame(birth = someBadBirthDates, death = someDeathDates,
#' departure = someDepartureDates)
#' someDates <- ymd(someBirthDates)
#' ped2 <- data.frame(birth = someDates, death = someDeathDates,
#' departure = someDepartureDates)
#' ped3 <- data.frame(birth = someBirthDates, death = someDeathDates,
#' departure = someDepartureDates)
#' someNADeathDates <- someDeathDates
#' someNADeathDates[c(1, 3, 5)] <- ""
#' someNABirthDates <- someDates
#' someNABirthDates[c(2, 4, 6)] <- NA
#' ped4 <- data.frame(birth = someNABirthDates, death = someNADeathDates,
#' departure = someDepartureDates)
#'
#' ## convertDate identifies bad dates
#' result = tryCatch({
#' convertDate(ped1)
#' }, warning = function(w) {
#' print("Warning in date")
#' }, error = function(e) {
#' print("Error in date")
#' })
#'
#' ## convertDate with error flag returns error list and not an error
#' convertDate(ped1, reportErrors = TRUE)
#'
#' ## convertDate recognizes good dates
#' all(is.Date(convertDate(ped2)$birth))
#' all(is.Date(convertDate(ped3)$birth))
#'
#' ## convertDate handles NA and empty character string values correctly
#' convertDate(ped4)
#' }
#'
#' @param ped a dataframe of pedigree information that may contain birth,
#' death, departure, or exit dates. The fields are optional, but will be used
#' if present.(optional fields: birth, death, departure, and exit).
#' @param time.origin date object used by \code{as.Date} to set \code{origin}.
#' @param reportErrors logical value if TRUE will scan the entire file and
#' make a list of all errors found. The errors will be returned in a
#' list of list where each sublist is a type of error found.
#' @importFrom stringi stri_trim_both stri_c
#' @export
convertDate <- function(ped, time.origin = as.Date("1970-01-01"),
reportErrors = FALSE) {
## Ignore records added because of unknown parents
if (any("recordStatus" %in% names(ped))) {
addedPed <- ped[ped$recordStatus == "added", ]
ped <- ped[ped$recordStatus == "original", ]
if (nrow(ped) == 0)
return(rbind(ped, addedPed))
}
headers <- tolower(names(ped))
headers <- headers[headers %in% getDateColNames()]
format <- "%Y-%m-%d"
invalid_date_rows <- NULL
for (header in headers) {
dates <- ped[[header]]
if (any(class(dates) %in% c("factor","logical", "integer"))) {
dates <- as.character(dates)
}
if (class(dates) == "Date") {
dates <- removeEarlyDates(dates, 1000)
originalNAs <- is.na(dates)
dates <- dates[!originalNAs]
} else if (class(dates) == "character") {
dates[stri_trim_both(dates) == ""] <- NA
ped[[header]] <- dates
originalNAs <- is.na(dates)
dates <- dates[!originalNAs]
if (length(dates) > 0) {
dates <- insertSeparators(dates)
dates <- as.Date(dates, format = format, origin = time.origin,
optional = TRUE)
dates <- removeEarlyDates(dates, 1000)
}
} else {
stop(stri_c("class(dates) is not 'character', 'factor', 'integer', or ",
"'Date' it is == ", class(dates)))
}
if (any(is.na(dates))) {
goodAndBadDates <- ifelse(is.na(dates), "bad", "good")
originalDates <- as.character(ped[[header]])
originalDates[originalNAs] <- "good"
originalDates[!originalNAs] <- goodAndBadDates
if (reportErrors) {
invalid_date_rows <- c(invalid_date_rows,
seq_along(originalDates)[originalDates == "bad"])
next
}
rowNums <- get_and_or_list(
seq_along(originalDates)[originalDates == "bad"], "and")
stop(paste0("Column '", header, "' has invalid dates on row(s) ",
rowNums, "."))
}
ped[!originalNAs, header] <- dates
ped[originalNAs, header] <- NA # For those NAs from dates <= 1000 CE
ped[[header]] <- as.Date(as.integer(ped[[header]]), origin = time.origin)
}
if (reportErrors) {
if (!is.null(invalid_date_rows))
invalid_date_rows <- as.character(sort(invalid_date_rows))
return(invalid_date_rows)
} else {
## Add back records of unknown parents
if (any("recordStatus" %in% names(ped)))
ped <- rbind(ped, addedPed)
return(ped)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.