R/risk_ratio_confidence.R

Defines functions risk.ratio.confidence

Documented in risk.ratio.confidence

#' @title Confidence intervals for the Risk Ratio Integral
#'   
#' @description Function that calculates confidence interval for the integral 
#'   \deqn{ \int RR(x;\theta)f(x)dx } where \eqn{f(x)} is the density function
#'   of the exposure X, \eqn{ RR(x;\theta)} the relative risk of the exposure
#'   with associated parameter \eqn{\theta}.
#'   
#' @param X         Random sample (\code{data.frame}) which includes exposure 
#'   and covariates.
#'   
#' @param thetahat  Estimator (\code{vector}) of \code{theta} for the Relative 
#'   Risk function.
#'   
#' @param thetavar   Estimator of variance of \code{thetahat}
#'   
#' @param rr        \code{function} for Relative Risk which uses parameter 
#'   \code{theta}. The order of the parameters shound be \code{rr(X, theta)}.
#'   
#'   **Optional**
#'   
#' @param weights   Normalized survey \code{weights} for the sample \code{X}.
#'   
#' @param nsim      Number of simulations.
#'   
#' @param confidence Confidence level \% (default: \code{95}).
#'   
#' @param check_thetas Checks that \code{theta} parameters are correctly
#'   inputed.
#'   
#' @param force.min Boolean indicating whether to force the \code{rr} to have a 
#'   minimum value of \code{1} instead of \code{0} (not recommended).
#'   
#' @note The \code{force.min} option forces the relative risk \code{rr} to have
#'   a minimum of \code{1} and thus an \code{rr < 1} is NOT possible. This is
#'   only for when absolute certainty is had that \code{rr > 1} and should be
#'   used under careful consideration. The confidence interval to acheive such
#'   an \code{rr} is based on the paper by Do Le Minh and Y. .s. Sherif
#'   
#' @seealso \link{risk.ratio.approximate.confidence} for a method when only
#'   \code{mean(X)} and \code{var(X)} are known.
#'   
#' @references Sherif, Y. .s. (1989). The lower confidence limit for the mean of
#'   positive random variables. Microelectronics Reliability, 29(2), 151-152.
#'   
#' @author Rodrigo Zepeda-Tello \email{rzepeda17@gmail.com}
#' @author Dalia Camacho-GarcĂ­a-FormentĂ­ \email{daliaf172@gmail.com}
#'   
#' @examples 
#' \dontrun{
#' #Example 1: Exponential Relative Risk
#' #--------------------------------------------
#' set.seed(18427)
#' X        <- data.frame(rnorm(100))
#' thetahat <- 0.1
#' thetavar <- 0.2
#' rr       <- function(X, theta){exp(theta*X)}
#' risk.ratio.confidence(X, thetahat, rr, thetavar)
#' 
#' #We can force RR'.s CI to be >= 1.
#' #This should be done with extra methodological (philosophical) care as 
#' #RR>= 1 should only be assumed with absolute mathematical certainty
#' risk.ratio.confidence(X, thetahat, rr, thetavar, force.min = TRUE)
#' 
#' 
#' #Example 2: Multivariate Relative Risk
#' #--------------------------------------------
#' set.seed(18427)
#' X1        <- rnorm(1000)
#' X2        <- runif(1000)
#' X         <- data.frame(cbind(X1,X2))
#' thetahat  <- c(0.02, 0.01)
#' thetavar  <- matrix(c(0.1, 0, 0, 0.4), byrow = TRUE, nrow = 2)
#' rr        <- function(X, theta){exp(theta[1]*X[,1] + theta[2]*X[,2])}
#' risk.ratio.confidence(X, thetahat, rr, thetavar) 
#' 
#' #Example 3: Categorical Relative Risk & Exposure
#' #--------------------------------------------
#' set.seed(18427)
#' X        <- as.data.frame(sample(c("Normal","Overweight","Obese"), 100, 
#'                    replace = TRUE, prob = c(0.4, 0.1, 0.5)))
#' thetahat <- c(1, 1.2, 1.5)
#' thetavar  <- diag(c(0.1, 0.2, 0.4))
#' 
#' #Categorical relative risk function
#' rr <- function(X, theta){
#' 
#'    #Create return vector with default risk of 1
#'    r_risk <- rep(1, length(X))
#'    
#'    #Assign categorical relative risk
#'    r_risk[which(X == "Normal")]      <- thetahat[1]
#'    r_risk[which(X == "Overweight")]  <- thetahat[2]
#'    r_risk[which(X == "Obese")]       <- thetahat[3]
#'    
#'    return(r_risk)
#' }
#' 
#' risk.ratio.confidence(data.frame(X), thetahat, rr, thetavar)
#' 
#' #Example 4: Categorical Relative Risk & continuous exposure
#' #----------------------------------------------------------
#' set.seed(18427)
#' BMI      <- data.frame(rlnorm(100, 3.1, sdlog = 0.1))
#' thetahat <- c(Malnourished = 2.2, Normal = 1, Overweight = 1.8, Obese = 2.5)
#' thetavar <- diag(c(0.5, 0.1, 0.1, 0.2))
#' rr       <- function(X, theta){
#'      
#'      #Create return vector with default risk of 1
#'      r_risk <- rep(1, length(X))
#'    
#'      #Assign categorical relative risk
#'      r_risk[which(X < 20)]                             <- theta[1] #Malnourished
#'      r_risk[intersect(which(X >= 20), which(X < 25))]  <- theta[2] #Normal
#'      r_risk[intersect(which(X >= 25), which(X < 30))]  <- theta[3] #Overweight
#'      r_risk[which(X >= 30)]                            <- theta[4] #Obese
#'    
#'    return(r_risk)
#' }
#' 
#' risk.ratio.confidence(BMI, thetahat, rr, thetavar)
#' 
#' #Example 5: Bivariate exposure and rr ("classical rr")
#' #------------------------------------------------------------------
#' set.seed(18427)
#' X        <- sample(c("Exposed","Unexposed"), 1000, replace = TRUE, prob = c(0.1, 0.9))
#' thetahat <- c("Exposed" = 2.5, "Unexposed" = 1.2)  
#' thetavar <- matrix(c(0.1, 0.2, 0.2, 0.4), ncol = 2)
#' rr <- function(X, theta){
#'    
#'    #Create relative risk function
#'    r_risk <- rep(1, length(X))
#'    
#'    #Assign values of relative risk
#'    r_risk[which(X == "Unexposed")] <- theta["Unexposed"]
#'    r_risk[which(X == "Exposed")]   <- theta["Exposed"]
#'    
#'    return(r_risk)
#' }    
#' 
#' risk.ratio.confidence(data.frame(X), thetahat, rr, thetavar)
#' }
#' @importFrom MASS mvrnorm
#' @importFrom stats qnorm
#' @keywords internal
#' @export

risk.ratio.confidence <- function(X, thetahat, rr, thetavar, 
                                  weights =  rep(1/nrow(as.matrix(X)),nrow(as.matrix(X))),
                                  nsim = 1000, confidence = 95, 
                                  check_thetas = TRUE, force.min = FALSE){
  
  #Get confidence
  check.confidence(confidence)
  .alpha <- max(0, 1 - confidence/100)
  .Z     <- qnorm(1-.alpha/2)
  
  #Check the thetas
  .thetavar <- as.matrix(thetavar)
  if(check_thetas){ check.thetas(.thetavar, thetahat, NA, NA, "risk.ratio") }
  
  #Get number of simulations
  .nsim  <- max(10, ceiling(nsim))
  
  #To matrix
  .X     <- as.data.frame(X)
  
  #Calculate the conditional expected value as a function of theta
  .Risk  <- function(.theta){
    .paf <- paf(X = X, thetahat = .theta, rr = rr, weights = weights, 
                Xvar = NA, method = "empirical", check_exposure = FALSE,
                check_rr = FALSE, check_integrals = FALSE)
    return( 1/(1-.paf) )
  }
  
  #Calculate the conditional variance as a function of theta
  .Variance <- function(.theta){
    .s   <- sum(weights)
    .s2  <- sum(weights^2)
    .var <- ( .s / (.s^2 - .s2) ) * weighted.mean(as.matrix(rr(.X, .theta) - 
                                                     .Risk(.theta))^2, weights) *.s2
    return(.var)
  }
  
  #Get expected value and variance of that
  .meanvec   <- rep(NA, .nsim)
  .varvec    <- rep(NA, .nsim)
  .thetasim  <- mvrnorm(.nsim, thetahat, .thetavar, empirical = TRUE)
  for (i in 1:.nsim){
    .meanvec[i]  <- .Risk(.thetasim[i,])
    .varvec[i]   <- .Variance(.thetasim[i,])
  }
  
  #Get variance of that
  .inversevarpaf <- var(.meanvec) + mean(.varvec)
  
  #Create the confidence intervals
  .squareroot <- .Z*sqrt(.inversevarpaf)
  .ciup       <- .Risk(thetahat) + .squareroot
  .cilow      <- (.Risk(thetahat)^2)/.ciup #Force rr > 0
  
  #If minimum is forced to 1 correct CI
  if (force.min){
    .cilow      <- ((.Risk(thetahat) - 1)^2)/(.ciup-1) + 1
  }
  
  #Compute the Risk Ratio intervals
  .cirisk        <- c("Lower_CI"       = .cilow, 
                      "Point_Estimate" = .Risk(thetahat) , 
                      "Upper_CI"       = .ciup )
  
  #Return variance
  return(.cirisk)
  
}

Try the pifpaf package in your browser

Any scripts or data that you put into this service are public.

pifpaf documentation built on May 1, 2019, 9:11 p.m.