#' @include bayesModelFunctions.R
NULL
#' Estimate parameters from the covariate and outcome model
#' via Double Metropolis Hastings algorithm.
#'
#' \code{agcParam} takes a data frame containing a treatment,
#' outcome, and various covariates to be modeled via an MCMC.
#' The network of observations (rows in the data frame) should
#' be contained in the adjacency matrix (adjmat).
#'
#' B and R specify the number of iterations to be run for the
#' various Bayesian sampling procedures. Defaults are conservative.
#'
#' The seed variable requires a vector of integers to
#' run potentially multiple chains.
#'
#' @param data A data.frame containing variables to be considered
#' for the covariate model.
#'
#' @param treatment A string specifying the column in data
#' that is the treatment effect in the underlying model.
#' Must be a binary (0/1) value.
#'
#' @param outcome A string specifying the column in data
#' that is the outcome of interest in the underlying model.
#' Must be a binary (0/1) value.
#'
#' @param adjmat An adjacency matrix (0/1s) specifying
#' the network structure. The number of rows and columns should
#' match the number of rows in the data object.
#'
#' @param B The number of iterations for the MCMC outer loop.
#' Default = 25,000
#'
#' @param R The number of iterations for the Gibbs inner loop.
#' Default = 10.
#'
#' @param seed An integer value for \code{set.seed}.
#' By default, only \code{1}, which runs one chain with a seed of 1.
#'
#' @param scale.alpha A numeric vector of values for the
#' parameters of the covariate model proposal distribution
#' variance. By default, a scalar of .005 is applied to all values. If you
#' wish to replace you can supply another scalar or the appropriate length
#' vector.
#'
#' @param scale.beta A numeric vector of values for the
#' parameters of the outcome model proposal distribution
#' variance. By default, a scalar of .005 is applied to all values. If you
#' wish to replace you can supply another scalar or the appropriate length
#' vector.
#'
#' @param prior.alpha A binary (0/1) indicator for the proposal distribution
#' for the covariate model parameters. If 1, the noninformative prior described in
#' the paper will be used. If 0, an improper prior (i.e. proportional to 1) will
#' be used. Default is 1.
#'
#' @param prior.beta A binary (0/1) indicator for the proposal distribution
#' for the outcome model parameters. If 1, the noninformative prior described in
#' the paper will be used. If 0, an improper prior (i.e. proportional to 1) will
#' be used. Default is 1.
#'
#' @param additional_nu An integer (0/1) specifying whether neighbor
#' cross terms will be evaluated (i.e. non zero)
#'
#' @return An S3 object of type \code{agcParamClass} that contains essential
#' values for the covariate model.
#'
#' @importFrom stats plogis quantile rbinom rmultinom runif var
#' @importFrom utils setTxtProgressBar txtProgressBar
#' @importFrom mvtnorm dmvt
#' @import methods
#' @import Rcpp
#'
#' @examples
#'
#' rdsIn <- readRDS(paste0(system.file('extdata',package='autognet'),"/agc-example.rds"))
#' adjmat <- rdsIn[[1]]
#' data <- rdsIn[[2]]
#' treatment <- "treatment"
#' outcome <- "outcome"
#' B = 10
#' R = 5
#' seed = 1
#'
#' mod <- agcParam(data, treatment, outcome, adjmat,
#' B = B, R = R, seed = c(1))
#'
#' @export
setGeneric(name = "agcParam",
def = function(data, treatment, outcome, adjmat,
B = 25000, R = 10, seed = 1,
scale.alpha=.005,scale.beta=.005,
prior.alpha=1,prior.beta=1,
additional_nu = FALSE)
standardGeneric("agcParam"))
#' @rdname agcParam
setMethod("agcParam", signature("data.frame", "character", "character", "ANY",
"ANY", "ANY", "ANY", "ANY", "ANY", "ANY", "ANY"),
definition = function(data, treatment, outcome, adjmat,
B, R, seed,
scale.alpha,scale.beta,
prior.alpha,prior.beta, additional_nu){
"%ni%" <- Negate("%in%")
# Sanity checks for user-specified parameters
possibilities <- colnames(data)
stopifnot(outcome %in% possibilities)
stopifnot(treatment %in% possibilities)
stopifnot(length(outcome) == 1)
stopifnot(length(treatment) == 1)
stopifnot(length(seed) == 1)
stopifnot(prior.alpha %in% c(0,1))
stopifnot(prior.beta %in% c(0,1))
# Setup covariate dataframe
covariate_data_frame <- data[ , !(names(data) %in% c(treatment, outcome))]
stopifnot(dim(covariate_data_frame)[2] > 1)
# Verify binary outcome and treatment
outcome_vec <- data[,outcome]
treatment_vec <- data[,treatment]
stopifnot(all(outcome_vec %in% c(0,1)))
stopifnot(all(treatment_vec %in% c(0,1)))
# Verify dimensions of adjacency matrix
nadj1 <- dim(adjmat)[1]
nadj2 <- dim(adjmat)[2]
ndata <- dim(data)[1]
if(!all.equal(nadj1, nadj2, ndata)){
stop("Error: ensure the same number of rows/columns matches the number of rows in the supplied data frame")
}
#put in an error if lenth of scale.alpha or scale.beta is incorrect
# Preprocess covariates and process output of helper function
process_covariate <-.covariate_process(covariate_data_frame) # categorical --> binary
covariate <- process_covariate[[1]]
zero_pairs <- process_covariate[[2]]
group_lengths <- process_covariate[[3]]
group_functions <- process_covariate[[4]]
data <- cbind(outcome_vec, treatment_vec, covariate) # final dataframe
# Stopping condition
any_multinomal_variables <- !(all(group_functions==1))
if(any_multinomal_variables & additional_nu){
stop("Error: additional nu's with a multinomal variable is not a supported use case.")
}
# create a new dataframe of indep persons (i.e. no neighbors)
weights <- apply(adjmat,1,sum) # number of neighbors for everyone
data.indep <- data[weights==0,] # independent data
n.indep <- nrow(data.indep)
# create a new dataframe for non-indep persons (i.e. at least 1 neighbor)
data <- data[weights>0,] # non-indpendent data
adjmat <- adjmat[weights>0,weights>0] # non-independent adjacency
weights <- apply(adjmat,1,sum) # number of neighbors
N <- nrow(adjmat) # number of non-independent persons
adjacency <- list(NA) #adjacency list (lists ID for each persons neighbors)
for (i in 1:N){
adjacency[[i]] <- which(adjmat[i,]==1)
}
#Parameters
ncov <- ncol(data)-2 #number of covariates / confounders
nrho <- choose(ncov,2) #number of rho terms in covariate model
L <- ncov*2 + nrho + (ncov^2 - ncov)*as.numeric(additional_nu) #number of params in covariate model
P <- 2 + 2 + ncov*2 #number of params in outcome model
# Adjust the scaling of the variances
stopifnot((L == length(scale.alpha)) | length(scale.alpha) == 1)
stopifnot((P == length(scale.beta)) | length(scale.beta) == 1)
#individual terms
##interconnected units
outcome.i <- as.matrix(data[1])
trt.i <- as.matrix(data[2])
cov.i <- as.matrix(data[3:ncol(data)])
##independent units
outcome.ind <- as.matrix(data.indep[1])
trt.ind <- as.matrix(data.indep[2])
cov.ind <- as.matrix(data.indep[3:ncol(data.indep)])
#sum of neighbors terms
outcome.n <- (adjmat%*%outcome.i)/weights
trt.n <- (adjmat%*%trt.i)/weights
cov.n <- apply(cov.i,2,function(x) {(adjmat%*%x)/weights})
#sufficient statistics for covariate model
##establish which rhos we want to keep or not (i.e. categorical b)
grid <- t(combn(1:ncov, 2))
use_rho <- apply(grid, 1, function(pair){
char <- paste(pair, collapse = "_")
as.numeric(char %ni% zero_pairs)
})
if(additional_nu){
use_rho_all <- 1 # because we do not allow categorial AND additional_nu, this is trivial
} else {
use_rho_all <- c(rep(1,ncov), use_rho, rep(1,ncov))
}
##interconnected units
if(additional_nu){
# Transpose the product to get the desired order
# See as.vector(t(matrix(c(1,2,3,4,5,6,7,8,9),ncol = 3, byrow = TRUE)))
third_term <- as.vector(t(t(cov.i) %*% cov.n))
} else {
third_term <- unname(colSums(cov.i*cov.n))
}
sum.l <- c(unname(colSums(cov.i)),
unname(colSums(cov.i[,grid[,1], drop = FALSE] * cov.i[,grid[,2], drop = FALSE]))*use_rho,
third_term)
##independent units
sum.l.indep <- c(unname(colSums(cov.ind)),
unname(colSums(cov.ind[,grid[,1], drop = FALSE] * cov.ind[,grid[,2], drop = FALSE]))*use_rho)
#sufficient statistics for outcome model
##interconnected units
sum.y <- c(sum(outcome.i), #sum individuals outcome
sum(outcome.i*trt.i), #sum individuals outcome * treatment
(t(outcome.i)%*%cov.i)[1,], #sum individuals outcome * all covs
(t(outcome.i)%*%outcome.n)[1,1], #sum neighbors outcome
(t(outcome.i)%*%trt.n)[1,1], #sum neighbors treatment
(t(outcome.i)%*%cov.n)[1,]#sum covariates treatment
)
##independent units
sum.y.indep <- c(sum(outcome.ind),
sum(outcome.ind*trt.ind),
apply(cov.ind,2,function(x) {sum(outcome.ind*x)}))
# See if there are independent units?
indepedents_present <- dim(data.indep)[1] != 0
# Includes the independent individual terms from the outcome model
if(indepedents_present) design.mat.indep <- as.matrix(cbind(1,data.indep[-1]))
# Establish parameters for the MCMC
alpha <- matrix(NA,B+1,L)
beta <- matrix(NA,B+1,P)
accept_alpha <- NA
accept_beta <- NA
#for independent terms denominator
l_grid <- unname(data.matrix(expand.grid(replicate(ncov, c(0,1), simplify=FALSE))))
# Remove zero pairs
if (length(zero_pairs) != 0){
for (j in 1:length(zero_pairs)){
l_grid <- l_grid[-which(apply(l_grid[,as.numeric(strsplit(zero_pairs,"_")[[j]])],1,sum)>1),]
}
}
# Have to change the index of adjacency
for (i in 1:N){
adjacency[[i]] <- adjacency[[i]] - 1
}
# Last parameters needed for MCMC
J <- dim(covariate)[2] # number of covariates
set.seed(seed)
# Main loop for the MCMC
pb <- txtProgressBar(min = 1, max = B, style = 3)
for (b in 1:B){
#Step 0. Starting values
if (b==1) {
alpha[b,] <- MASS::mvrnorm(1,rep(0,L),1*diag(L))
beta[b,] <- MASS::mvrnorm(1,rep(0,P),1*diag(P))
}
##ALPHA##
#Step 1. Proposal
alpha.p <- MASS::mvrnorm(1,alpha[b,],scale.alpha*diag(L))*use_rho_all
#assign tau, rho, nu
tau.p <- alpha.p[1:ncov]
rho.p <- alpha.p[(ncov+1):(ncov+nrho)]
nu.p <- alpha.p[(ncov+nrho+1):L]
# Make into a matrix for nu to handle off diag terms
if(additional_nu){
nu.p.mat <- matrix(nu.p, nrow = ncov, byrow = TRUE)
} else {
nu.p.mat <- diag(nu.p)
}
# Make symmetrical matrix of the rho values
rho_mat <- matrix(0, nrow = J, ncol = J)
rho_mat[lower.tri(rho_mat, diag=FALSE)] <- rho.p; rho_mat <- rho_mat + t(rho_mat)
# First call to Cpp function
aux.p.mat <- auxVarCpp(tau.p, rho.p, nu.p.mat, N, R, J, rho_mat,
adjacency,cov.i,weights,group_lengths,group_functions,
as.numeric(additional_nu))
aux.p.cov.n <- apply(aux.p.mat,2,function(x) {(adjmat%*%x)/weights})
if(additional_nu){
third_term_sap <- as.vector(t(t(aux.p.mat) %*% aux.p.cov.n))
} else {
third_term_sap <- unname(colSums(aux.p.mat*aux.p.cov.n))
}
sum.aux.p <- c(unname(colSums(aux.p.mat)),
unname(colSums(aux.p.mat[,grid[,1, drop = FALSE], drop = FALSE] * aux.p.mat[,grid[,2, drop = FALSE], drop = FALSE]))*use_rho,
third_term_sap)
#Step 3. Calculate accept-reject ratio (everything is log-ed!)
#interconnected units
h.l1.p <- alpha.p%*%sum.l
h.l1.c <- alpha[b,]%*%sum.l
h.aux.p <- alpha.p%*%sum.aux.p
h.aux.c <- alpha[b,]%*%sum.aux.p
#priors
if(prior.alpha==1){
prior.p <- mvtnorm::dmvt(alpha.p,delta=rep(0,L),sigma=4*diag(L),df=3,log=TRUE)
prior.c <- mvtnorm::dmvt(alpha[b,],delta=rep(0,L),sigma=4*diag(L),df=3,log=TRUE)
} else {
prior.p <- 0
prior.c <- 0
}
#independent units
if(indepedents_present){
f.p.num <- alpha.p[1:(ncov+nrho)]%*%sum.l.indep
f.p.denom <- log(sum(v_get_sum(1:dim(l_grid)[1], l_grid, tau.p, rho.p, ncov)))
f.c.num <- alpha[b,1:(ncov+nrho)]%*%sum.l.indep
f.c.denom <- log(sum(v_get_sum(1:dim(l_grid)[1], l_grid, alpha[b,1:ncov], alpha[b,(ncov+1):(ncov+nrho)], ncov)))
ratio <- (prior.p - prior.c + h.l1.p + h.aux.c - h.l1.c - h.aux.p +
f.p.num - n.indep*f.p.denom - f.c.num + n.indep*f.c.denom)
} else {
ratio <- (prior.p - prior.c + h.l1.p + h.aux.c - h.l1.c - h.aux.p)
}
accept <- rbinom(1,1,min(1,exp(ratio)))
if (accept == 1){alpha[b+1,] <- alpha.p} else { alpha[b+1,] <- alpha[b,] }
accept_alpha[b] <- accept
##BETA##
#Step 1. Proposal
beta.p <- MASS::mvrnorm(1,beta[b,],scale.beta*diag(P))
#Step 2. Auxilary variable based on proposal
# Second call to Rcpp function
#aux.p <- aux.var.outcome.cl(beta.p,trt.i,cov.i,N,10,adjacency,outcome.i,weights)
aux.p <- auxVarOutcomeCpp(beta.p,trt.i[,1],cov.i,
N,R,adjacency,unname(outcome.i[,1]),weights)
sum.aux.p <- c(sum(aux.p),
sum(aux.p*trt.i),
apply(cov.i,2,function(x) {sum(aux.p*x)}),
sum(aux.p*(adjmat%*%aux.p)/weights),
t(aux.p)%*%trt.n,
apply(cov.n,2,function(x) {t(aux.p)%*%x}))
#Step 3. Calculate accept-reject ratio (everything is log-ed!)
#interconnected units
h.p <- beta.p%*%sum.y
h.c <- beta[b,]%*%sum.y
h.aux.p <- beta.p%*%sum.aux.p
h.aux.c <- beta[b,]%*%sum.aux.p
#prior
if(prior.beta==1){
prior.p.beta <- mvtnorm::dmvt(beta.p,delta=rep(0,P),sigma=4*diag(P),df=3,log=TRUE)
prior.c.beta <- mvtnorm::dmvt(beta[b,],delta=rep(0,P),sigma=4*diag(P),df=3,log=TRUE)
} else {
prior.p.beta <- 0
prior.c.beta <- 0
}
if(indepedents_present){
#independent units
f.p.num <- beta.p[1:(2+ncov)]%*%sum.y.indep
f.p.denom <- sum(log(1+ exp(design.mat.indep%*%beta.p[1:(2+ncov)])))
f.c.num <- beta[b,1:(2+ncov)]%*%sum.y.indep
f.c.denom <- sum(log(1+exp(design.mat.indep%*%beta[b,1:(2+ncov)])))
ratio <- (prior.p.beta - prior.c.beta + h.p + h.aux.c - h.c - h.aux.p + f.p.num - f.p.denom - f.c.num + f.c.denom)
} else {
ratio <- (prior.p.beta - prior.c.beta + h.p + h.aux.c - h.c - h.aux.p)
}
accept <- rbinom(1,1,min(1,exp(ratio)))
if (accept == 1){beta[b+1,] <- beta.p} else { beta[b+1,] <- beta[b,] }
accept_beta[b] <- accept
setTxtProgressBar(pb, b)
}
#save labels
if(additional_nu){
indices_go <- apply(expand.grid(1:ncov,1:ncov)[,c(2,1)], 1, paste, collapse = "_")
} else {
indices_go <- as.character(1:ncov)
}
rho.terms <- apply(grid, 1, function(pair){char <- paste(pair, collapse = "_")})
cov.lab <- c(colnames(covariate),rho.terms,paste0("neighbors_", indices_go))
outcome.lab <- c(colnames(data[,1:2]),colnames(covariate),
paste("neighbors",colnames(data[,1:2])),
paste("neighbors",colnames(covariate)))
colnames(alpha) <- cov.lab
colnames(beta) <- outcome.lab
outlist <- list(alpha,beta,accept_alpha,accept_beta,use_rho_all,group_lengths,group_functions, adjmat)
names(outlist) <- c("alpha", "beta", "accept_alpha", "accept_beta", "use_rho_all", "group_lengths", "group_functions", "adjmat")
# Name according to the chain
class(outlist) <- append(class(outlist),"agcParamClass")
return(outlist)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.