#' Resolves messy dates into a single value
#'
#' This collection of S3 methods 'resolve' messy dates into a single date
#' according to some explicit bias,
#' such as returning the minimum or maximum date,
#' the mean, median, or modal date,
#' or a random date from among the possible resolutions for each messy date.
#' If the date is not 'messy' (i.e. has no annotations)
#' then just that precise date is returned.
#' This can be useful for various descriptive or inferential projects.
#' @param ... a mdate object
#' @param na.rm Should NAs be removed? True by default.
#' @importFrom stringr str_detect
#' @return A single scalar or vector of dates
#' @examples
#' d <- as_messydate(c("2008-03-25", "?2012-02-27", "2001-01?", "2001~",
#' "2001-01-01..2001-02-02", "{2001-01-01,2001-02-02}",
#' "{2001-01,2001-02-02}", "2008-XX-31"))
#' d
#' min(d)
#' max(d)
#' mean(d)
#' median(d)
#' modal(d)
#' random(d)
#' @name resolve
NULL
#> NULL
#' @rdname resolve
#' @export
min.mdate <- function(..., na.rm = TRUE) {
x <- list(...)
y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)),
recursive = FALSE)
y <- unlist(lapply(y, function(x) min(x, na.rm = na.rm)),
recursive = FALSE)
y
}
#' @rdname resolve
#' @export
max.mdate <- function(..., na.rm = TRUE) {
x <- list(...)
y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)),
recursive = FALSE)
y <- unlist(lapply(y, function(x) max(x, na.rm = na.rm)),
recursive = FALSE)
y
}
#' @rdname resolve
#' @importFrom stats median
#' @export
median.mdate <- function(..., na.rm = TRUE) {
x <- list(...)
y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)),
recursive = FALSE)
y <- unlist(lapply(y, function(z) {
if (length(z) %% 2 == 0) {
z <- unlist(z[-1])
z <- as.character(median(z, na.rm = na.rm))
z
}
else{
z <- as.character(median(z, na.rm = na.rm))
z
}
}), recursive = FALSE)
y
}
#' @rdname resolve
#' @param trim the fraction (0 to 0.5) of observations to be trimmed
#' from each end of x before the mean is computed.
#' Values of trim outside that range are taken as the nearest endpoint.
#' @importFrom lubridate as_date
#' @export
mean.mdate <- function(..., trim = 0, na.rm = TRUE) {
x <- list(...)
y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)),
recursive = FALSE)
y <- unlist(lapply(y, function(x) {
if (length(x) > 1 & stringr::str_detect(x[1], "^-", negate = TRUE)) {
x <- as.character(mean(as.Date(x), trim = 0, na.rm = TRUE))
}
if (length(x) > 1 & stringr::str_detect(x[1], "^-")) {
x <- paste0("-", as.character(mean(lubridate::as_date(x),
trim = 0, na.rm = TRUE)))
x <- zero_padding(x)
}
x
}), recursive = FALSE)
y
}
#' @rdname resolve
#' @export
modal <- function(..., na.rm = FALSE) UseMethod("modal")
#' @rdname resolve
#' @export
modal.mdate <- function(..., na.rm = TRUE) {
x <- list(...)
y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)),
recursive = FALSE)
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
y <- unlist(lapply(y, function(x) {
if (length(x) > 1) x <- as.character(getmode(x))
x
}), recursive = FALSE)
y
}
#' @rdname resolve
#' @param size a non-negative integer giving the number of items to choose.
#' @param replace should sampling be with replacement?
#' @param prob a vector of probability weights
#' for obtaining the elements of the vector being sampled.
#' @export
random <- function(..., size,
replace = FALSE,
prob = NULL) UseMethod("random")
#' @rdname resolve
#' @export
random.mdate <- function(...,
size,
replace = FALSE,
prob = NULL) {
x <- list(...)
y <- unlist(lapply(x, function(y) ifelse(!is_precise(y), expand(y), y)),
recursive = FALSE)
y <- unlist(lapply(y, function(x) {
if (length(x) > 1) x <- as.character(sample(x, size = 1))
x
}), recursive = FALSE)
y
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.