R/main.R

Defines functions NLpredict InteractionProb NLint

Documented in InteractionProb NLint NLpredict

#' Estimate nonlinear association and identify interactions
#' 
#' This function takes in the observed data (y, x, c) and estimates
#' a potentially nonlinear, interactive effect between x and y while
#' adjusting for c linearly
#'
#' @param Y              The outcome to be analyzed.
#' @param X              an n by p matrix of exposures to evaluate. These should be continuous.
#' @param C              An n by q matrix of additional covariates to adjust for. If there are no
#'                       additional covariates (i.e q=0), then C should be set to NULL when specifying
#'                       the model
#' 
#' @param nScans         The number of MCMC scans to run.
#' @param nBurn          The number of MCMC scans that will be dropped as a burn-in.
#' @param thin           This number represents how many iterations between each scan.
#' 
#' @param c              The first hyperparameter of the inverse gamma prior on the residual variance
#' @param d              The second hyperparameter of the inverse gamma prior on the residual variance
#' 
#' @param sigB           Either a numeric value to be used for the value of sigma_beta,
#'                       the variance of the slab distribution,
#'                       or "EB" is specified to indicate that it will be estimated
#'                       via empirical Bayes. We recommend "EB" unless the user has
#'                       strong prior beliefs about the magnitude of the nonzero regression
#'                       coefficients
#' @param k              The number of total components to allow in the model
#' @param ns             The degrees of freedom of the splines used to estimate nonlinear functions of the exposures
#' @param alph           The first hyperparameter for the beta prior on the probability of an exposure being included
#'                       in a given component
#' @param gamm           The second hyperparameter for the beta prior on the probability of an exposure being included
#'                       in a given component
#' @param intMax         The highest order interaction the user wants to allow in the model. This can be set to p to allow
#'                       any order interaction, though the default is 3.
#' @param threshold      thresholding parameter when finding the lower bound for the slab variance. This parameter represents
#'                       the percentage of time a null association enters the model when tau_h = 0.5. Smaller values are more
#'                       conservative and prevent false discoveries. We recommend either 0.25 or 0.1. This lower bound is only
#'                       calculated if the "EB" option is selected, as it is used to make sure the empirical Bayes variance isn't
#'                       too small 
#' @param speed          Default is set to speed=TRUE. We have two approaches to sampling from our model. The faster option is
#'                       approximate, though we have found it gives nearly identical answers in all scenarios explored. The slower
#'                       option is theoretically guaranteed to sample from the correct target distribution, but can be slower in
#'                       certain scenarios.                                                                                                                                                                                            
#'
#' @return A list containing the full posterior draws of all parameters in the model, 
#'         the waic associated with the model, the posterior inclusion probabilities (PIPs)
#'         for each exposure entering into the model, and the matrix of 2-way interaction
#'         probabilities
#'         
#'
#' @export
#' @examples
#'
#' n = 200
#' p = 10
#' pc = 1
#' 
#' sigma = matrix(0.3, p, p)
#' diag(sigma) = 1
#' X = rmvnorm(n, mean=rep(0,p), sigma = sigma)
#' 
#' C = matrix(rnorm(n*pc), nrow=n)
#' 
#' TrueH = function(X) {
#'   return(0.5*(X[,2]*X[,3]) - 0.6*(X[,4]^2 * X[,5]))
#' }
#' 
#' Y = 5 + C + TrueH(X) + rnorm(n)
#' 
#' NLmod = NLint(Y=Y, X=X, C=C)
#' 
#' ## Print posterior inclusion probabilities
#' NLmod$MainPIP
#' 
#' ## Show the two way interaction probability matrix
#' NLmod$InteractionPIP
#' 



NLint = function(Y=Y, X=X, C=C, nChains = 2, nIter = 10000, 
                 nBurn = 2000, thin = 8, c = 0.001, d = 0.001,
                 sigB="EB", k = 15, ns = 3, alph=3, gamm=dim(X)[2], 
                 threshold = 0.1, intMax=3, speed=TRUE) {
  
  n = dim(X)[1]
  p = dim(X)[2]
  
  designC = cbind(rep(1, dim(X)[1]), C)
  
  SigmaC = 1000*diag(dim(designC)[2])
  muC = rep(0, dim(designC)[2])
  
  ## Prior mean vector
  muB = rep(0, ns)
  
  ## Design matrix with splines
  Xstar = array(NA, dim=c(n,p,ns+1))
  Xstar[,,1] = 1
  for (j in 1 : p) {
    Xstar[,j,2:(ns+1)] = scale(splines::ns(X[,j], df=ns))
  }
  
  if (speed == TRUE) {
    
    ## Empirical Bayes if the user wants to use it
    if (sigB == "EB") {
      SigMin = MCMCmixtureMinSig(Y=Y, X=X, C=C, Xstar = Xstar, nPerms = 10, 
                                 nIter = 500, c = c, d = d,
                                 sigBstart=0.5, muB=muB,
                                 SigmaC=SigmaC, muC=muC, 
                                 k = k, ns = ns, threshold=threshold)
      
      print("Finding the empirical bayes estimate of the slab variance")
      
      SigEstEB = MCMCmixtureEB(Y=Y, X=X, C=C, Xstar = Xstar, nChains = nChains, nIter = nIter, 
                               nBurn = nBurn, thin = thin, c = c, d = d,
                               sigBstart=0.5, muB=muB,
                               SigmaC=SigmaC, muC=muC, SigMin = SigMin,
                               k = k, ns = ns, alph=alph, gamm=gamm, intMax=intMax)
      
      SigEst = max(SigEstEB, SigMin)
      
      print("Now fitting the posterior conditional on EB estimate of slab variance")
      
      posterior = MCMCmixture(Y=Y, X=X, C=C, Xstar = Xstar, nChains = nChains, nIter = nIter, 
                              nBurn = nBurn, thin = thin, c = c, d = d,
                              sigB=SigEst, muB=muB,
                              SigmaC=SigmaC, muC=muC, 
                              k = k, ns = ns, alph=alph, gamm=gamm, intMax=intMax) 
    } else {
      print("Fitting the posterior using user selecte value for slab variance")
      
      posterior = MCMCmixture(Y=Y, X=X, C=C, Xstar = Xstar, nChains = nChains, nIter = nIter, 
                              nBurn = nBurn, thin = thin, c = c, d = d,
                              sigB=sigB, muB=muB,
                              SigmaC=SigmaC, muC=muC, 
                              k = k, ns = ns, alph=alph, gamm=gamm, intMax=intMax) 
    }
    
    keep = nBurn + (1:floor((nIter - nBurn) / thin))*thin
    totalScans = length(keep)
    
    waic = WaicMixture(Y=Y, Xstar=Xstar, designC=designC, totalScans=totalScans, 
                       nChains=nChains, zetaPost = posterior$zeta, 
                       betaList = posterior$beta, betaCPost = posterior$betaC, 
                       sigmaPost = posterior$sigma, n=n, k=k, ns=ns)
    
    intMean = InteractionMatrix(zetaPost = posterior$zeta, totalScans = totalScans,
                                nChains = 2, p = p, k = k)
    
    inclusions = InclusionVector(zetaPost = posterior$zeta, totalScans = totalScans,
                                 nChains = 2, p = p, k = k)
    
  } else {
    
    ## Empirical Bayes if the user wants to use it
    if (sigB == "EB") {
      SigMin = MCMCmixtureMinSig_MH(Y=Y, X=X, C=C, Xstar = Xstar, nPerms = 10, 
                                 nIter = 500, c = c, d = d,
                                 sigBstart=0.5, muB=muB,
                                 SigmaC=SigmaC, muC=muC, 
                                 k = k, ns = ns, threshold=threshold)
      
      print("Finding the empirical bayes estimate of the slab variance")
      
      SigEstEB = MCMCmixtureEB_MH(Y=Y, X=X, C=C, Xstar = Xstar, nChains = nChains, nIter = nIter,
                                  nBurn = nBurn, thin = thin, c = c, d = d,
                                  sigBstart=0.5, muB=muB,
                                  SigmaC=SigmaC, muC=muC,
                                  k = k, ns = ns, alph=alph, gamm=gamm,
                                  SigMin = SigMin, probSamp1 = 0.95, intMax=intMax)
      
      SigEst = max(SigEstEB, SigMin)
      
      print("Now fitting the posterior conditional on EB estimate of slab variance")
      
      posterior = MCMCmixture_MH(Y=Y, X=X, C=C, Xstar = Xstar, nChains = nChains, nIter = nIter,
                                 nBurn = nBurn, thin = thin, c = c, d = d,
                                 sigB=SigEst, muB=muB,
                                 SigmaC=SigmaC, muC=muC, intMax=intMax,
                                 k = k, ns = ns, alph=alph, gamm=gamm, probSamp1=0.95) 

    } else {
      print("Fitting the posterior using user selecte value for slab variance")
      
      posterior = MCMCmixture_MH(Y=Y, X=X, C=C, Xstar = Xstar, nChains = nChains, nIter = nIter,
                                 nBurn = nBurn, thin = thin, c = c, d = d,
                                 sigB=sigB, muB=muB,
                                 SigmaC=SigmaC, muC=muC, intMax=intMax,
                                 k = k, ns = ns, alph=alph, gamm=gamm, probSamp1=0.95) 
    }
    
    keep = nBurn + (1:floor((nIter - nBurn) / thin))*thin
    totalScans = length(keep)
    
    waic = WaicMixture_MH(Y=Y, Xstar=Xstar, designC=designC, totalScans=totalScans, 
                       nChains=nChains, zetaPost = posterior$zeta, 
                       betaList = posterior$beta, betaCPost = posterior$betaC, 
                       sigmaPost = posterior$sigma, n=n, k=k, ns=ns)
    
    intMean = InteractionMatrix_MH(zetaPost = posterior$zeta, totalScans = totalScans,
                                nChains = 2, p = p, k = k)
    
    inclusions = InclusionVector_MH(zetaPost = posterior$zeta, totalScans = totalScans,
                                 nChains = 2, p = p, k = k)
  }
  
  
  l = list(posterior = posterior,
           waic = waic,
           InteractionPIP = intMean,
           MainPIP = inclusions,
           ns = ns,
           k = k,
           speed = speed)
  return(l)
}







#' Calculate posterior inclusion probabilities for an interaction
#' 
#' This function takes a fitted NLint object and can return the posterior
#' probability that any given set of variables are jointly interacting with each other.
#'
#' @param NLmod              The fitted NLint object
#' @param Xsub               The exposures for which you want to evaluate whether there is an interaction
#'
#' @return A number indicating the posterior probability that the exposures in Xsub are jointly interacting together.
#'         If Xsub is a scalar then the marginal posterior inclusion probability is returned, while if Xsub is a vector
#'         of length 2, the two way interaction is returned and so on.
#'         
#'
#' @export
#' @examples
#'
#' n = 200
#' p = 10
#' pc = 1
#' 
#' sigma = matrix(0.3, p, p)
#' diag(sigma) = 1
#' X = rmvnorm(n, mean=rep(0,p), sigma = sigma)
#' 
#' C = matrix(rnorm(n*pc), nrow=n)
#' 
#' TrueH = function(X) {
#'   return(0.5*(X[,2]*X[,3]) - 0.6*(X[,4]^2 * X[,5]))
#' }
#' 
#' Y = 5 + C + TrueH(X) + rnorm(n)
#' 
#' NLmod = NLint(Y=Y, X=X, C=C)
#' 
#' ## Find 2 way interaction probability between exposure 4 and 5
#' InteractionProb(NLmod=NLmod, Xsub=c(4,5))


InteractionProb = function(NLmod, Xsub) {
  zeta = NLmod$posterior$zeta
  nChains = dim(zeta)[1]
  totalScans = dim(zeta)[2]
  p = dim(zeta)[3]
  k = dim(zeta)[4]
  
  intMat = matrix(NA, totalScans, nChains)
  for (ni in 1 : totalScans) {
    for (nc in 1 : nChains) {
      int_ind = 0
      for (h in 1 : k) {
        if (all(zeta[nc,ni,Xsub,h] == rep(1, length(Xsub)))) int_ind = 1
      }
      intMat[ni,nc] = int_ind
    }
  }
  return(mean(intMat))
}









#' Predict outcome at a given set of locations
#' 
#' This function takes in new data points and estimates
#' the posterior predictive distribution of outcome at these
#' locations
#'
#' @param NLmod          A model built from the NLint function that predictions will be based on
#' @param X              The original matrix of exposures used to build NLmod
#' @param Xnew           The new set of exposures at which predictions are to be made for
#' @param Cnew           Matrix of additional covariates at which to make predictions
#' 
#'
#' @return The posterior means, 95% pointwise credible intervals, and full posterior draws
#'         for the predicted outcome
#'         
#'
#' @export
#' @examples
#'
#' n = 200
#' p = 10
#' pc = 1
#' 
#' sigma = matrix(0.3, p, p)
#' diag(sigma) = 1
#' X = rmvnorm(n, mean=rep(0,p), sigma = sigma)
#' 
#' C = matrix(rnorm(n*pc), nrow=n)
#' 
#' TrueH = function(X) {
#'   return(0.5*(X[,2]*X[,3]) - 0.6*(X[,4]^2 * X[,5]))
#' }
#' 
#' Y = 5 + C + TrueH(X) + rnorm(n)
#' 
#' NLmod = NLint(Y=Y, X=X, C=C)
#' 
#' Xnew = matrix(rnorm(200*p), 200, p)
#' Cnew = rnorm(200)
#' 
#' predictions = NLpredict(NLmod=NLmod,
#'                         X=X, Xnew=Xnew,
#'                         Cnew=Cnew)



NLpredict = function(NLmod=NLmod, X=X, Xnew=Xnew, 
                     Cnew=Cnew) {
  
  n = dim(X)[1]
  p = dim(X)[2]
  
  ns = NLmod$ns
  k = NLmod$k
  
  n2 = dim(Xnew)[1]
  designC = cbind(rep(1,n2), Cnew)
  
  Xstar = array(NA, dim=c(n,p,ns+1))
  Xstar[,,1] = 1
  for (j in 1 : p) {
    Xstar[,j,2:(ns+1)] = scale(splines::ns(X[,j], df=ns))
  }
  
  XstarNew = array(NA, dim=c(n2,p,ns+1))
  XstarNew[,,1] = 1
  for (j in 1 : p) {
    temp_ns_object = splines::ns(X[,j], df=ns)
    temp_means = apply(temp_ns_object, 2, mean)
    temp_sds = apply(temp_ns_object, 2, sd)
    XstarNew[,j,2:(ns+1)] = cbind(t((t(predict(temp_ns_object, 
                                               Xnew[,j])) - temp_means) / temp_sds))
  }
  
  if (NLmod$speed == TRUE) {
    
    predictions = PredictionsMixture(XstarOld=Xstar, XstarNew=XstarNew, 
                                     designC=designC, totalScans=dim(NLmod$posterior$zeta)[2], 
                                     nChains=dim(NLmod$posterior$zeta)[1], 
                                     zetaPost=NLmod$posterior$zeta, 
                                     betaList=NLmod$posterior$beta, 
                                     betaCPost=NLmod$posterior$betaC, k=k, ns=ns)
  } else {
    predictions = PredictionsMixture_MH(XstarOld = Xstar, XstarNew = XstarNew, designC = designC,
                                     totalScans = dim(NLmod$posterior$zeta)[2], 
                                     nChains = dim(NLmod$posterior$zeta)[1],
                                     zetaPost=NLmod$posterior$zeta, 
                                     betaList=NLmod$posterior$beta, 
                                     betaCPost = NLmod$posterior$betaC, k=k,
                                     ns=ns)
  }
  
  
  l = list(mean = apply(predictions$PredictedPost, 3, mean, na.rm=TRUE),
           CIlower = apply(predictions$PredictedPost, 3, quantile, c(.025), na.rm=TRUE),
           CIupper = apply(predictions$PredictedPost, 3, quantile, c(.975), na.rm=TRUE),
           posterior = predictions$PredictedPost)
  return(l)
}
jantonelli111/NLinteraction documentation built on Aug. 7, 2020, 10:02 p.m.