#' Overwrites the get.incidence function for obkData objects to support doBy
#'
#' Creates different time series based on the list of factor variables.
#' This function should eventually migrate back into the OutbreakTools package.
#'
#' @param x The object (typically obkdata) to compute the incidence for.
#' @param ... Whatever
#'
#' @author Michael Höhle
#' @export
setGeneric("get.incidence2", function(x, ...) standardGeneric("get.incidence2"))
#' Modified obkData method such that doBy statements are possible
#'
#' @param x See obkData::get.incidence
#' @param data See obkData::get.incidence
#' @param where See obkData::get.incidence
#' @param val.min See obkData::get.incidence
#' @param val.max See obkData::get.incidence
#' @param val.kept See obkData::get.incidence
#' @param regexp See obkData::get.incidence
#' @param from See obkData::get.incidence
#' @param to See obkData::get.incidence
#' @param interval See obkData::get.incidence
#' @param add.zero See obkData::get.incidence
#' @param doBy A character list of factor variables in the individuals slot to compute the ts for.
#' @param ... See obkData::get.incidence
#' @export
setMethod("get.incidence2", "obkData", function(x, data, where=NULL, val.min=NULL, val.max=NULL, val.kept=NULL, regexp=NULL,
from=NULL, to=NULL, interval=1, add.zero=TRUE, doBy=NULL, ...){
## HANDLE ARGUMENTS ##
if(is.null(val.min)) val.min <- -Inf
if(is.null(val.max)) val.max <- Inf
## GET DATA ##
df <- get.data(x, data=data, where=where, showSource=TRUE)
if(is.null(df)) stop(paste("Data",data,"cannot be found in this obkData object"))
## call specific procedures if applicable ##
if(inherits(df, c("obkSequences", "obkContacts"))) {
return(get.incidence(df, from=from, to=to,
interval=interval, add.zero=add.zero))
}
## OTHERWISE: DATA ASSUMED TAKEN FROM RECORDS ##
## if data=='records', keep the first data.frame of the list ##
if(is.list(df) && !is.data.frame(df) && is.data.frame(df[[1]])) df <- df[[1]]
## get dates ##
if(!"date" %in% names(df)) stop("no date in the data")
dates <- df$date
## get optional values associated to the dates ##
## keep 'data' if it is there
if(data %in% names(df)){
values <- df[[data]]
} else { ## else keep first optional field
temp <- !names(df) %in% c("individualID","date") # fields being not "individualID" or "date"
if(any(temp)) {
values <- df[,min(which(temp))]
} else {
values <- NULL
}
}
## EXTRACT RELEVANT DATES ##
if(!is.null(values)){
toKeep <- rep(TRUE, length(values))
## if 'values' is numeric ##
if(is.numeric(values)){
toKeep <- toKeep & (values>=val.min & values<=val.max)
}
## if val.kept is provided ##
if(!is.null(val.kept)) {
toKeep <- toKeep & (values %in% val.kept)
}
## if regexp is provided ##
if(!is.null(regexp)) {
temp <- rep(FALSE, length(values))
temp[grep(regexp, values, ...)] <- TRUE
toKeep <- toKeep & temp
}
dates <- dates[toKeep]
}
##If there are no dates we are done.
if(length(dates)==0) return(NULL)
##Prepare the return list
res <- list()
#If there is no from-to specification make
#sure it's not data subset dependend, but is the
#same for each subset.
if (is.null(from) & is.null(to)) {
from <- min(dates)
to <- max(dates)
}
##Loop over all variables in doBy
if (!is.null(doBy)) {
for (i in seq_len(length(doBy))) {
# browser()
theData <- get.data(x, data=doBy[[i]], showSource=TRUE)
if (is.null(theData)) stop(paste0("Data for ",doBy[[i]]," cannot be found in this obkData object."))
if (!is.factor(theData[,doBy[[i]]])) stop("The variable ",doBy[[i]]," is not a factor.")
res[[doBy[i]]] <- tapply(dates, INDEX=theData[,doBy[[i]]], FUN=get.incidence, from=from, to=to, interval=interval, add.zero=add.zero,simplify=FALSE)
}
} else {
res <- list(get.incidence(dates, from=from, to=to, interval=interval, add.zero=add.zero))
}
## RETURN OUTPUT ##
return(res)
}) # end obkData method
#' Helper function to format a get.incidence list of data.frames
#' to a multivariate xts object
#'
#' @param incList List of lists containing the data.frames from get.incidence2
#' @return An xts object corresponding to the flattened incList
#' @export
inc2xts <- function(incList) {
#This was really the most efficient way of collapsing it.
df <- sapply(unlist(incList,recursive=FALSE), "[[", 2)
#Create xts object
xts <- xts::xts(x=df, order.by=incList[[1]][[1]]$date)
return(xts)
}
#' Simple Date based aggregation based on the data.table function.
#'
#' The function allows to split by a single factor variable. More factors
#' as well as factor interactions are currently not supported.
#'
#' @details The formula has to be of the type LHSVar ~ RHSVar, where LHSVar is
#' a column of class Dates and RHSVar is a column containing a factor variable.
#' Currently, the formula is unrwapped manually. A better solution is to use
#' formula syntax. Note: A multivariate time series for all levels of RHSVar
#' (including NA if present) is generated.
#'
#' @param formula Formula of type LHSVar ~ RHSVar, where LHSVar is
#' a column of class Dates and RHSVar is a column containing a factor variable
#' @param dateProjFun Date projection function, i.e. how are dates projected down (i.e. monday, start of month, etc.). Result must be a Date again!
#' @param dRange Vector of length two giving the start and stop dates, the actual range will be range(dateProjFun(dRange)).
#' @param data data.frame containing the LHSVar RHSVar variables.
#' @return xts time series
#' @import data.table
#' @export
linelist2xts <- function(formula, dateProjFun=identity, dRange=NULL, data) {
#Dirty unwrapping of the formula. ToDo: Use boxplot way.
if (formula[[1]] != "~") stop("Not a valid formula.")
LHSVar <- as.character(formula[[2]])
if (length(formula[[3]]==1)) {
RHSVar <- as.character(formula[[3]])
} else {
stop("Only formulas of the form LHSVar ~ RHSVar are handled. No higher terms are allowed.")
}
#Checks. If the RHS is "1" then we employ some tricks.
if (!class(data[,LHSVar]) == "Date") stop("LHSVar is not a vector of dates.")
if (RHSVar == "1") {
data[,RHSVar] <- factor( rep("all", nrow(data), levels=c("all")))
}
if (!is.factor(data[,RHSVar])) stop("RHSVar is not a vector of dates.")
#Convert data.frame to a data.table
dt <- data.table(data)
#Select which date column to do the aggregation by and make a column data
dt$mydate <- dt[[LHSVar]]
dt$split <- dt[[RHSVar]]
#Note: Levels does not capture NA's. (if there are any)
#RHSLevels <- levels(dt[[RHSVar]])
RHSLevels <- names(table(dt[[RHSVar]],useNA="ifany"))
## data.table of all dates, to fill in incince
if (is.null(dRange)) {
dRange <- range(dt[[LHSVar]])
}
#Make a data.table with zeroes spanning the necessary range
dRangeSeq <- unique(dateProjFun(seq.Date(dRange[1], dRange[2], by = 1)))
emptySeries <- data.table(mydate = dRangeSeq)
#Allocate empty result object and build it with cbind
res <- xts::xts(x=NULL, order.by=dRangeSeq)
for (i in seq_len(length(RHSLevels))) {
#Aggregate for each factor level. Note: Instead of ifelse, is it possible to just write
#a variant of (dt$split == RHSLevels[i]) and have this work for the level NA as well?
splitBool <- ifelse(rep(is.na(RHSLevels[i]),nrow(dt)), is.na(dt$split), dt$split == RHSLevels[i])
tsWithGaps <- dt[!is.na(mydate) & splitBool, list(freq = .N), by = list(mydate=dateProjFun(mydate))]
#browser()
#Merge with the empty series
ts <- merge(tsWithGaps, emptySeries, by = "mydate", all.y = TRUE)
#Convert NA's to zeroes
ts[is.na(freq), freq := 0]
#Append result
res <- cbind(res, xts::xts(x=ts[["freq"]],order.by=dRangeSeq))
}
#Nice col names
colnames(res) <- RHSLevels
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.