Nothing
#' Estimates the effective reproductive number
#'
#' The function \code{estimateR} uses the relative transmission probabilities to estimate
#' the individual-level, time-level, and average effective reproductive numbers
#' for an outbreak.
#'
#' The effective reproductive number is the average number of cases an infectious case
#' will produce in a population of both susceptible and non-susceptibe individuals.
#' The rational behind this reproductive number estimation is Wallinga and Teunis (2004)
#' where the individual-level reproductive number is estimated by summing
#' the relative probability that the individual infected any other individual.
#'
#' If \eqn{p_{ij}} equals the relative probability that case \eqn{i} was infected by
#' case \eqn{j}, then the individual-level reproductive number (\eqn{R_j}) is calculated by:
#'
#' \deqn{R_j = \sum_{m \ne j} {p_{mj}}}
#'
#' The time-level reproductive number is then estimated by averaging the individual-level
#' reproductive numbers for all individuals observed in the time frame (can specify days,
#' weeks, months, years).
#'
#' Finally, the time-level reproductive numbers are averaged to
#' estimate the average effective reproductive number within \code{rangeForAvg}.
#' To get the best estimate of the average effective reproductive number, one should
#' only consider the stable portion of the outbreak (exclude the beginning and end).
#'
#' If \code{bootSamples > 0}, bootstrap confidence intervals will be estimated for
#' both the time-level and average reproductive numbers using parametric bootstrapping.
#'
#'
#' @param df The name of the dateset with transmission probabilities (column \code{pVar}),
#' individual IDs (columns \code{<indIDVar>.1} and \code{<indIDVar>.2}), and the dates of
#' observation (columns \code{<dateVar>.1} and \code{<dateVar>.2}).
#' @param indIDVar The name (in quotes) of the individual ID columns
#' (data frame \code{df} must have variables called \code{<indIDVar>.1}
#' and \code{<indIDVar>.2}).
#' @param dateVar The name (in quotes) of the columns with the dates that the individuals are
#' observed (data frame \code{df} must have variables called \code{<dateVar>.1} and
#' \code{<dateVar>.2}) which must be date or date-time (POSIXt) objects.
#' @param pVar The column name (in quotes) of the transmission probabilities.
#' @param timeFrame The time frame used to calculate Rt
#' (one of \code{"days", "months", "weeks", "years"}).
#' @param rangeForAvg A vector with the start and ending time period to be used to calculate
#' the average effective reproductive number.
#' @param bootSamples The number of bootstrap samples; if 0, then no confidence intervals
#' are calculated.
#' @param alpha The alpha level for the confidence intervals.
#' @param progressBar A logical indicating if a progress bar should be printed (default is TRUE).
#'
#' @return A list with five elements:
#'\enumerate{
#' \item \code{RiDf} - a data frame with the individual-level reproductive numbers. Column names:
#' \itemize{
#' \item \code{<indIDVar>} - the individual ID with name specified.
#' \item \code{<dateVar>} - the date the individual was observed with name specified.
#' \item \code{Ri} - the individual-level reproductive number.
#' \item \code{nInfectees} - the number of possible infectees for this individual.
#' }
#' \item \code{RtDf} - a data frame with the time-level reproductive numbers. Column names:
#' \itemize{
#' \item \code{time} - the time frame corresponding to the reproductive number estimate
#' (day for "days" and "weeks", month for "months", year for "years").
#' \item \code{timeRank} - the rank of the time frame.
#' \item \code{Rt} - the time-level reproductive number for this time frame.
#' \item \code{ciLower} - lower bound of confidence interval for Rt
#' (only if bootSamples > 0).
#' \item \code{ciUpper} - upper bound of confidence interval for Rt
#' (only if bootSamples > 0).
#' }
#' \item \code{RtAvgDf} - a data frame with the average effective reproductive. Column names:
#' \itemize{
#' \item \code{RtAvg} - the average time-level reproductive number between the range
#' specified in \code{rangeForAvg}.
#' \item \code{ciLower} - lower bound of confidence interval for Rt
#' (only if bootSamples > 0).
#' \item \code{ciUpper} - upper bound of confidence interval for Rt
#' (only if bootSamples > 0).
#' }
#' \item \code{timeFrame} - a vector with the timeFrame input
#' \item \code{rangeForAvg} - a vector with the rangeForAvg input
#' }
#'
#'
#' @examples
#'
#' ## Use the nbResults data frame included in the package which has the results
#' ## of the nbProbabilities() function on a TB-like outbreak.
#'
#' ## Getting initial estimates of the reproductive number
#' # (without specifying rangeForAvg and without confidence intervals)
#' rInitial <- estimateR(nbResults, dateVar = "infectionDate",
#' indIDVar = "individualID", pVar = "pScaled",
#' timeFrame = "months")
#'
#' ## Finding the stable portion of the outbreak for rangeForAvg using plot of Rt
#' cut1 <- 25
#' cut2 <- 125
#'
#' # Optional plot to determine the cutpoints above
#' # ggplot(data = rInitial$RtDf, aes(x = timeRank, y = Rt)) +
#' # geom_point() +
#' # geom_line() +
#' # geom_hline(data = rInitial$RtAvgDf, aes(yintercept = RtAvg), size = 0.7) +
#' # geom_vline(aes(xintercept = cut1), linetype = 2, size = 0.7) +
#' # geom_vline(aes(xintercept = cut2), linetype = 2, size = 0.7)
#'
#' ## Finding the final reproductive number estimates with confidence intervals
#' # NOTE should run with bootSamples > 2.
#' rFinal <- estimateR(nbResults, dateVar = "infectionDate",
#' indIDVar = "individualID", pVar = "pScaled",
#' timeFrame = "months", rangeForAvg = c(cut1, cut2),
#' bootSamples = 2, alpha = 0.05)
#'
#' rFinal$RtAvgDf
#'
#' @seealso \code{\link{nbProbabilities}} \code{\link{estimateRi}}
#' \code{\link{estimateRt}} \code{\link{estimateRtAvg}}
#'
#'
#' @references
#' Wallinga J, Teunis P. Different epidemic curves for severe acute respiratory
#' syndrome reveal similar impacts of control measures.
#' \emph{American Journal of Epidemiology}. 2004 Sep 15;160(6):509-16.
#'
#' @export
estimateR <- function(df, indIDVar, dateVar, pVar,
timeFrame = c("days", "months", "weeks", "years"),
rangeForAvg = NULL, bootSamples = 0, alpha = 0.05,
progressBar = TRUE){
df <- as.data.frame(df)
#Creating variables with the individual ID and date variables
indIDVar1 <- paste0(indIDVar, ".1")
indIDVar2 <- paste0(indIDVar, ".2")
dateVar1 <- paste0(dateVar, ".1")
dateVar2 <- paste0(dateVar, ".2")
#Checking that the named variables are in the data frame
if(!indIDVar1 %in% names(df)){
stop(paste0(indIDVar1, " is not in the data frame."))
}
if(!indIDVar2 %in% names(df)){
stop(paste0(indIDVar2, " is not in the data frame."))
}
if(!dateVar1 %in% names(df)){
stop(paste0(dateVar1, " is not in the data frame."))
}
if(!dateVar2 %in% names(df)){
stop(paste0(dateVar2, " is not in the data frame."))
}
if(!pVar %in% names(df)){
stop(paste0(pVar, " is not in the data frame."))
}
#Checking that the date variables are in a date form
if(lubridate::is.Date(df[, dateVar1]) == FALSE &
lubridate::is.POSIXt(df[, dateVar1]) == FALSE){
stop(paste0(dateVar1, " must be either a date or a date-time (POSIXt) object."))
}
if(lubridate::is.Date(df[, dateVar2]) == FALSE &
lubridate::is.POSIXt(df[, dateVar2]) == FALSE){
stop(paste0(dateVar2, " must be either a date or a date-time (POSIXt) object."))
}
#Making sure timeFrame is one of the options
if(!timeFrame %in% c("days", "months", "weeks", "years")){
stop(paste0("timeFrame must be one of: ",
paste0( c("days", "months", "weeks", "years"), collapse = ", ")))
}
#Calculating the individual-level reproductive number
riEst <- estimateRi(df, pVar = pVar, indIDVar = indIDVar, dateVar = dateVar)
#Calculating the time-level reproductibe number
rtEst <- estimateRt(riEst, dateVar = dateVar, timeFrame = timeFrame)
#Calculating the average effective reproductive number
rtAvgEst <- estimateRtAvg(rtEst, rangeForAvg)
if(bootSamples == 0){
return(list("RiDf" = riEst, "RtDf" = rtEst, "RtAvgDf" = rtAvgEst,
"timeFrame" = timeFrame, "rangeForAvg" = rangeForAvg))}
if (bootSamples > 0){
## Finding the CI for Rt ##
#Use manual list instead if replicate for progress bar
if(progressBar == TRUE){
pb <- utils::txtProgressBar(min = 0, max = bootSamples, style = 3)
}
bootRt <- data.frame(c("timeRank" = integer(), "Rt" = numeric(),
"time" = character()), "rep" = integer(),
stringsAsFactors = FALSE)
for(i in 1:bootSamples){
oneRep <- estimateRt(simulateRi(df, riEst, pVar = pVar, indIDVar = indIDVar),
dateVar = dateVar, timeFrame = timeFrame)
oneRep$rep <- i
bootRt <- rbind(bootRt, oneRep)
if(progressBar == TRUE){
utils::setTxtProgressBar(pb, i)
}
}
#Finding the quantiles of the distribution of Rt
cilb <- stats::aggregate(bootRt$Rt, by = list(bootRt$timeRank),
stats::quantile, probs = 1-alpha/2)
names(cilb) <- c("timeRank", "lb")
ciub <- stats::aggregate(bootRt$Rt, by = list(bootRt$timeRank),
stats::quantile, probs = alpha/2)
names(ciub) <- c("timeRank", "ub")
#Combing the quantiles with Rt data
ciRt1 <- merge(rtEst, cilb, by = "timeRank", all = TRUE)
ciRt2 <- merge(ciRt1, ciub, by = "timeRank", all = TRUE)
#Calculating the CI bounds
ciRt2$ciLower <- ifelse(ciRt2$Rt - (ciRt2$lb - ciRt2$Rt) > 0,
ciRt2$Rt - (ciRt2$lb - ciRt2$Rt), 0)
ciRt2$ciUpper <- ciRt2$Rt - (ciRt2$ub - ciRt2$Rt)
ciDataRt <- ciRt2[, !names(ciRt2) %in% c("lb", "ub")]
#Reordering columns
ciDataRt <- ciDataRt[, c("time", "timeRank", "Rt", "ciLower", "ciUpper")]
## Finding the CI for RtAvg ##
#Calculating RtAvg for each bootstrap Rt sample
bootRtAvgL <- by(bootRt, INDICES = list(bootRt$rep),
FUN = estimateRtAvg, rangeForAvg = rangeForAvg)
bootRtAvg <- do.call(c, bootRtAvgL)
#Calculating the CI values
ciLower <- rtAvgEst - (stats::quantile(bootRtAvg, 1-alpha/2) - rtAvgEst)
ciUpper <- rtAvgEst - (stats::quantile(bootRtAvg, alpha/2) - rtAvgEst)
ciDataRtAvg <- cbind.data.frame(rtAvgEst, ciLower, ciUpper, row.names = NULL)
names(ciDataRtAvg) <- c("RtAvg", "ciLower", "ciUpper")
return(list("RiDf" = riEst, "RtDf" = ciDataRt, "RtAvgDf" = ciDataRtAvg,
"timeFrame" = timeFrame, "rangeForAvg" = rangeForAvg))
}
}
#' Estimates individual-level reproductive numbers
#'
#' The function \code{estimateRi} uses relative transmission probabilities to estimate the
#' individual-level reproductive number.
#'
#' This function is meant to be called by \code{\link{estimateR}}
#' which estimates the individual-level, time-level, and average reproductive numbers,
#' but it can also be run directly.
#'
#' @param df The name of the dateset with transmission probabilities
#' @param indIDVar The variable name (in quotes) of the individual ID varaibles
#' (data frame \code{df} must have variables called \code{<indIDVar>.1}
#' and \code{<indIDVar>.2}).
#' @param dateVar The variable name (in quotes) of the dates that the individuals
#' are observed (data frame \code{df} must have variables called \code{<dateVar>.1}
#' and \code{<dateVar>.2}).
#' @param pVar The variable name (in quotes) of the transmission probabilities.
#'
#' @return A data frame with the individual-level reproductive numbers. Column names:
#' \itemize{
#' \item \code{<indIDVar>} - the individual ID with name specified.
#' \item \code{<dateVar>} - the date the individual was observed with name specified.
#' \item \code{Ri} - the individual-level reproductive number.
#' \item \code{nInfectees} - the number of possible infectees for this individual.
#' }
#'
#' @seealso \code{\link{estimateR}} \code{\link{estimateRt}} \code{\link{estimateRtAvg}}
#'
#' @export
estimateRi <- function(df, indIDVar, dateVar, pVar){
df <- as.data.frame(df)
#Creating variables with the individual indID and date variables
indIDVar1 <- paste0(indIDVar, ".1")
indIDVar2 <- paste0(indIDVar, ".2")
dateVar1 <- paste0(dateVar, ".1")
dateVar2 <- paste0(dateVar, ".2")
#### Calculating individual-level reproductive number ####
#Summing the scaled probabilities to get individual-level reproductive numer
sumP <- stats::aggregate(df[, pVar], by = list(df[, indIDVar1]), sum)
names(sumP) <- c(indIDVar, "Ri")
#Finding the number of possible infectors per infectee
nInf <- as.data.frame(table(df[, indIDVar1]))
names(nInf) <- c(indIDVar, "nInfectees")
#Extracting the date of infection for each infectee
dateInfo <- df[!duplicated(df[, indIDVar1]), c(indIDVar1, dateVar1)]
names(dateInfo) <- c(indIDVar, dateVar)
#Combining these variables to create Ri data frame
ri1 <- merge(dateInfo, sumP, by = indIDVar, all = TRUE)
ri <- merge(ri1, nInf, by = indIDVar, all = TRUE)
#If nInfectees is missing and RI is missing, this is the latest case
#so the values should be 0
ri[is.na(ri$Ri), "Ri"] <- 0
ri[is.na(ri$nInfectees), "nInfectees"] <- 0
return(ri)
}
#' Estimates time-level reproductive numbers
#'
#' The function \code{estimateRt} estimates the time-level effective reproductive number
#' from individual-level reproductive numbers.
#'
#' This function is meant to be called by \code{\link{estimateR}}
#' which estimates the individual-level and time-level, and average reproductive numbers,
#' but it can also be run directly.
#'
#' @param riData The name of the dateset with individual-level reproductive numbers.
#' @param dateVar The variable name (in quotes) of the dates that the individuals are observed
#' (data frame \code{riData} must have a variable called \code{<dateVar>}).
#' @param timeFrame The time frame used to calculate Rt
#' (one of \code{"days", "months", "weeks", "years"}).
#'
#' @return A data frame with the time-level reproductive numbers. Column names:
#' \itemize{
#' \item \code{time} - the time frame corresponding to the reproductive number estimate
#' (day for "days" and "weeks", month for "months", year for "years").
#' \item \code{timeRank} - the rank of the time frame.
#' \item \code{Rt} - the time-level reproductive number for this time frame.
#' }
#'
#' @seealso \code{\link{estimateR}} \code{\link{estimateRi}} \code{\link{estimateRtAvg}}
#'
#' @export
estimateRt <- function(riData, dateVar, timeFrame = c("days", "weeks", "months", "years")){
#### Creating ranks for the time interval ####
#Finding the full range of time frames
day <- 60*60*24
times <- seq(trunc(min(riData[, dateVar]), unit = "day"),
trunc(max(riData[, dateVar]), unit = "day"), by = timeFrame)
#Defining the format for the time frame
if(timeFrame == "days"){
timeFormat <- "%Y-%m-%d"
}else if(timeFrame == "months"){
timeFormat <- "%Y-%m"
}else if(timeFrame == "years"){
timeFormat <- "%Y"
}
if(timeFrame %in% c("days", "months", "years")){
timeDf <- cbind.data.frame(time = format(times, timeFormat),
timeRank = 1:length(times), stringsAsFactors = FALSE)
riData$time <- format(riData[, dateVar], timeFormat)
riDataFinal <- merge(riData, timeDf, by = "time", all.x = TRUE)
}
#Need to deal with weeks differently to figure out the proper time frame
if(timeFrame == "weeks"){
#Creating a data frame of week labels and their ranks
timeDf <- cbind.data.frame(time = format(times, "%Y-%m-%d"),
timeRank = 1:length(times), stringsAsFactors = FALSE)
#Ranking the days
days <- seq(min(riData[, dateVar]), max(riData[, dateVar]), by = "days")
dayDf <- cbind.data.frame(day = format(days, "%Y-%m-%d"),
dayRank = 1:length(days), stringsAsFactors = FALSE)
#Finding the week rank based on the day rank / 7 and then merging back
#with the time data frame to get the week label
riData$day <- format(riData[, dateVar], "%Y-%m-%d")
riData2 <- merge(riData, dayDf, by = "day", all.x = TRUE)
riData2$timeRank <- ceiling(riData2$dayRank / 7)
riData3 <- merge(riData2, timeDf, by = "timeRank")
riDataFinal <- riData3[, !names(riData3) %in% c("day", "dayRank")]
}
## Calculating Rt ##
#Finding the mean Ri value by time period (Rt)
meanRi <- stats::aggregate(riDataFinal$Ri, by = list(riDataFinal$timeRank), mean)
names(meanRi) <- c("timeRank", "Rt")
#Extracting the time for each Rt value
timeInfo <- riDataFinal[!duplicated(riDataFinal$timeRank), c("timeRank", "time")]
#Combining these data frames to create Rt data frame
rt <- merge(meanRi, timeInfo, by = "timeRank", all = TRUE)
#Ordering columns
rt <- rt[, c("time", "timeRank", "Rt")]
return(rt)
}
#' Estimates the average effective reproductive number
#'
#' Averages the time-level reproductive numbers within a certain range to estimate the overall
#' reproductive number for an oubreak.
#'
#' This function is meant to be called by \code{\link{estimateR}}
#' which estimates the individual-level and time-level, and average reproductive numbers,
#' but it can also be run directly.
#'
#' @param rtData The name of the dateset with time-level reproductive numbers.
#' @param rangeForAvg A vector with the start and ending time period to be used to calculate the
#' average effective reproductive number.
#'
#' @return A data frame with the average effective reproductive. Column names:
#' \itemize{
#' \item \code{RtAvg} - the average time-level reproductive number between the range
#' specified in \code{rangeForAvg}.
#' }
#'
#' @seealso \code{\link{estimateR}} \code{\link{estimateRi}} \code{\link{estimateRt}}
#'
#' @export
estimateRtAvg <- function(rtData, rangeForAvg = NULL){
#Printing a note if you do not specify a range for the average
if(is.null(rangeForAvg)){
#Finding the mean of the time-level reproductive numbers
RtAvg <- mean(rtData$Rt, na.rm = TRUE)
message("Please choose the stable portion of the outbreak to calculate the average Rt")
}else{
#Restricting to the time-level reproductive numbers within the rangeForAverage.
rtCut <- rtData[rtData$timeRank > rangeForAvg[1] &
rtData$timeRank < rangeForAvg[2], ]
RtAvg <- mean(rtCut$Rt, na.rm = TRUE)
}
return(as.data.frame(RtAvg))
}
#Function that simulates Ri from the probabilities
simulateRi <- function(df, riEst, pVar, indIDVar){
#Creating variables with the individual indID and date variables
indIDVar1 <- paste0(indIDVar, ".1")
indIDVar2 <- paste0(indIDVar, ".2")
#Making a matrix of probabilities for speedy calculation
dfM <- unclass(stats::xtabs(df[, pVar] ~ df[, indIDVar1] + df[, indIDVar2]))
Ri <- apply(dfM, 1, function(x) poisbinom::rpoisbinom(n = 1, pp = x))
riNew <- cbind.data.frame(names(Ri), Ri, stringsAsFactors = "FALSE")
names(riNew) <- c(indIDVar, "Ri")
#Merging the new Ri value with the individual information
riEst$indIDChar <- as.character(riEst[, indIDVar])
riEst2 <- riEst[!names(riEst) %in% c("Ri")]
riNew2 <- merge(riEst2, riNew, by = c("indIDChar" = indIDVar))
riNew2[is.na(riNew2$Ri), "Ri"] <- 0
riNew3 <- riNew2[, !names(riNew2) %in% c("indIDChar")]
return(riNew3)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.