##' Calculate incidence from flusurvey data
##'
##' This calculates incidence according to a given column in the supplied data.
##' @param data a data table, usually generated by \code{\link{merge_data}}
##' @param incidence.columns one or more columns which specificy incidence; default: "ili"
##' @param aggregation the timescale of aggregation, by default "week"
##' @param denominator what to use as denominator, active members or all reports in a week
##' @param min.N the minimum denominator, by default 1
##' @param by one or more variables by which to group
##' @import data.table
##' @importFrom lubridate floor_date
##' @return a data.table with the incidence
##' @author seb
##' @export
get_incidence <- function(data, incidence.columns = "ili", aggregation = c("week", "day", "month", "year"), denominator = c("active.members", "reports"), min.N = 1, by = NULL)
{
aggregation <- match.arg(aggregation)
denominator <- match.arg(denominator)
dt <- copy(data)
bouts <- bouts_of_illness(data)
## required date columns
columns <- c("symptoms.start.date", "date", "min.date", "max.date")
names(columns) <- columns
## create new date columns if aggregation is not by day
if (aggregation != "day")
{
columns <- sub("date$", paste(aggregation, "date", sep = "."), columns)
for (col_id in seq_along(columns))
{
col <- columns[col_id]
if (aggregation == "year") {
dt[, paste(col) :=
as.Date(paste(substr(season, 1, 4), 11, 1, sep = "-" ))]
bouts[, paste(col) :=
as.Date(paste(substr(season, 1, 4), 11, 1, sep = "-" ))]
} else {
dt[, paste(col) := floor_date(get(names(col)), unit=aggregation)]
bouts[, paste(col) := floor_date(get(names(col)), unit=aggregation)]
}
}
}
id_column <- grep("_id$", colnames(dt), value = TRUE)
## calculate incidence based on symptom start date
incidence <- list()
for (incidence_column in incidence.columns)
{
id_incidence <-
bouts[get(incidence_column) == "t", list(bouts = .N),
by = c(columns[["symptoms.start.date"]], id_column, "season", by)]
incidence[[incidence_column]] <-
id_incidence[, list(new.cases = .N),
by = c(columns[["symptoms.start.date"]], "season", by)]
incidence[[incidence_column]][, type := incidence_column]
}
incidence <- rbindlist(incidence)
setnames(incidence, columns[["symptoms.start.date"]], aggregation)
incidence <-
dcast(incidence,
as.formula(paste0(paste(c(aggregation, by, "season"), collapse="+"),
"~ type")),
value.var="new.cases")
for (incidence_column in intersect(incidence.columns, colnames(incidence)))
{
incidence[is.na(get(incidence_column)), paste(incidence_column) := 0]
}
## work out denominator of active members
if (denominator == "active.members")
{
active_reports <- lapply(unique(incidence[, get(aggregation)]), function(x)
{
id_active <- dt[get(columns[["min.date"]]) <= x &
get(columns[["max.date"]]) >= x,
list(reports = .N),
by = c(id_column, "season", by)]
id_active <- id_active[, .N, by = c("season", by)]
id_active[, paste(aggregation) := x]
id_active
})
active_reports <- rbindlist(active_reports)
incidence <-
merge(incidence, active_reports,
by = c(aggregation, "season", by), all.y=TRUE)
} else
{
id_reports <- dt[, list(reports = .N), by = c(columns[["date"]], id_column, by)]
reports <- id_reports[, .N, by = c(columns[["date"]], by)]
setnames(reports, columns[["date"]], aggregation)
incidence <- merge(incidence, reports, by = c(aggregation, by))
}
incidence <- incidence[N >= min.N]
setkeyv(incidence, aggregation)
return(incidence)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.