#' testCharacterDate
#'
#' This function is intended to test a characters date that was
#' recently converted from a text file.
#'
#' It can extract features about the date using regular expressions
#'
#' This function assumes the dateSeparator is "-", ".", or "/"
#'
#' format = "american" ... date order is (1) mm (2) day (3) year
#' format = "british" ... date order is (1) day (2) mm (3) year
#' format = "international" ... date order is (1) year (2) mm (3) day
#'
#' This function outputs a list of named parameters
#'
#' dateSeparator - character used to separate dates
#' dateTimeSeparator - chacters used to separate date and time
#' american - is this american formatted date
#' british - is this a British formatted date
#' international - is this an internationl formatted date
#' fullYear - is a full year given
#' partialYear - is a partial year given
#' withTime - is the time given
#' withoutTime - is time not given
#' formatStyle - American, British, International, or Unknown
#' yearFormat - Full or Partial
#' timePresent - is the time given
#'
#' @param characterDate the date and or time an event occured
#' @param testUpperLimit number of tests to perform to validate
#' @param testPercentPositive percent positive to confirm one date type
#' @keywords time
#' @examples
#'
#' testCharacterDate("2018-01-05 13:45:01")
#' testCharacterDate("2018/04/30 15:07:41")
#' testCharacterDate("2018/04/30 15:07")
#'
#'
#' # This date outputs an unknown format style because the
#' # American and British styles can become confused.
#' testCharacterDate("01-01-2010 12:00:00")
#'
#' @export
testCharacterDate <- function(characterDate,
testUpperLimit = 100,
testPercentPositive = 50) {
out <- list(dateSeparator = NULL,
dateTimeSeparator = NULL,
american = FALSE,
british = FALSE,
international = FALSE,
fullYear = FALSE,
partialYear = FALSE,
withTime = FALSE,
withoutTime = FALSE,
formatStyle = NULL,
yearFormat = NULL,
timePresent = NULL,
secondsPresent = NULL
)
if(length(characterDate) < testUpperLimit){
testUpperLimit <- length(characterDate)
}
if (testUpperLimit < 1){testUpperLimit <- 1}
if (testPercentPositive > 100){testPercentPositive <- 100}
if (testPercentPositive < 0){testPercentPositive <- 0}
# identify dateSeparators
out$dateSeparator <- if(grepl("[/]", characterDate[1])){
"/"
}else if(grepl("[/.]", characterDate[1])){
"."
}else if(grepl("[-]", characterDate[1])){
"-"
}else{
" "
}
# Regular Expression Parts
sepRegEx <- "[\\/\\.\\-]"
ddRegEx <- "[0-3]?[0-9]"
mmRegEx <- "[0-1]?[0-9]"
yyRegEx <- "[0-9][0-9]"
YYRegEx <- "[0-2][0-9][0-9][0-9]"
HHMMSSRegEx <- "[0-2]?[0-9]:[0-6]?[0-9]:[0-6]?[0-9]"
HHMMRegEx <- "[0-2]?[0-9]:[0-6]?[0-9]"
HHMMSSorNoSSRegEx <- "[0-2]?[0-9]:[0-6]?[0-9]:?[0-6]?[0-9]?"
# Regular Expression Order
dateTypesRegEx <- list(partialYear =
list(american = c(mmRegEx, ddRegEx, yyRegEx),
british = c(ddRegEx, mmRegEx, yyRegEx),
international = c(yyRegEx, mmRegEx, ddRegEx)),
fullYear =
list(american = c(mmRegEx, ddRegEx, YYRegEx),
british = c(ddRegEx, mmRegEx, YYRegEx),
international = c(YYRegEx, mmRegEx, ddRegEx)))
for(yearType in c("partialYear", "fullYear")){
for(formatType in c("american", "british", "international")){
DateRegEx <- paste0(dateTypesRegEx[[yearType]][[formatType]][1], sepRegEx,
dateTypesRegEx[[yearType]][[formatType]][2], sepRegEx,
dateTypesRegEx[[yearType]][[formatType]][3])
# test for no time
testVector <- as.logical(sapply(characterDate[1:testUpperLimit],
function(x){grepl(paste0("^", DateRegEx, "$"), x)}))
if(sum(testVector) / length(testVector) * 100 > testPercentPositive){
out[[yearType]] <- TRUE
out[[formatType]] <- TRUE
out$withoutTime <- TRUE
out$secondsPresent <- FALSE
} else {
# test for time present with seconds
testVector <- as.logical(
sapply(characterDate[1:testUpperLimit],
function(x){grepl(
paste0("^", DateRegEx, "(\\D)", HHMMSSRegEx, "$"), x)}))
if(sum(testVector) / length(testVector) * 100 > testPercentPositive){
out[[yearType]] <- TRUE
out[[formatType]] <- TRUE
out$withTime <- TRUE
out$secondsPresent <- TRUE
out$dateTimeSeparator <- gsub(
paste0("^", DateRegEx, "(\\D)", HHMMSSorNoSSRegEx, "$"), "\\1",
characterDate[min(which(testVector == TRUE))])
} else {
# test for time present with no seconds
testVector <- as.logical(
sapply(characterDate[1:testUpperLimit],
function(x){grepl(
paste0("^", DateRegEx, "\\D", HHMMRegEx, "$"), x)}))
if(sum(testVector) / length(testVector) * 100 > testPercentPositive){
out[[yearType]] <- TRUE
out[[formatType]] <- TRUE
out$withTime <- TRUE
out$secondsPresent <- FALSE
out$dateTimeSeparator <- gsub(
paste0("^", DateRegEx,"(\\D)", HHMMSSorNoSSRegEx,
"$"), "\\1", characterDate[min(which(testVector == TRUE))])
}
}
}
}
}
if(sum(c(out$american, out$british, out$international)) == 1){
if(out$american){
out$formatStyle <- "American"
} else if (out$british){
out$formatStyle <- "British"
} else if (out$international){
out$formatStyle <- "International"
} else {
out$formatStyle <- "Unknown"
}
}else{
out$formatStyle <- "Unknown"
}
if(sum(c(out$fullYear, out$partialYear)) == 1){
if(out$fullYear){
out$yearFormat <- "Full"
} else if(out$partialYear) {
out$yearFormat <- "Partial"
} else {
out$yearFormat <- "Unknown"
}
}else if(sum(c(out$fullYear, out$partialYear)) > 1){
warning("Format type unknown because multiple formats could be valid.")
out$yearFormat <- "Unknown"
}else{
out$yearFormat <- "Unknown"
}
if(out$withTime && !out$withoutTime){
out$timePresent <- TRUE
}else if(!out$withTime && out$withoutTime){
out$timePresent <- FALSE
}else{
out$timePresent <- "unknown"
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.