principalstratmod <- function(formula, data, trt, coords, formula_h, denom, nsamp, thin,
tuning = list(A = 0.1, psi = 0.2, theta = 1, alpha0=alpha0prop, alpha1=alpha1prop, Y=ypropsd),
prior = list(KIG = rep(.01,2), psi = rep(.01,2), theta1 = rep(.6,2), theta2 = rep(10,2)),
starting = list(B=NULL, A = NULL, psi = NULL, theta = NULL, alpha0=NULL, alpha1=NULL),
outputbinsize = NULL, outputfilename = NULL){
require(spBayes)
require(corpcor)
require(MASS)
ncov <- dim(model.matrix(formula, data))[2]
print("Note: function does not accept missing data in covariates.")
logit = function(theta, a, b){
return(log((theta-a)/(b-theta)))
}
logitInv = function(z, a, b){
return ( b-(b-a)/(1+exp(z)) );
}
###########################
##### Format the Data #####
###########################
n <- dim(data)[1]
q <- 2
#pollution model
names <- colnames(model.matrix(formula, data))
data$a <- data[,trt]
data$y <- data[,all.vars(formula)[1]]
data$ytemp <- 1
nterm <- length(labels(terms(formula)))
terms <- NULL
for(ii in 1:nterm){
if(ii == 1) terms <- labels(terms(formula))[ii]
if(ii > 1) terms <- paste(terms, "+", labels(terms(formula))[ii], sep=" ")
}
formulatemp <- as.formula(paste("ytemp ~ ", terms))
covs <- model.matrix(formulatemp, data)
#health model
names_h <- colnames(model.matrix(formula_h, data))
data$h <- data[,all.vars(formula_h)[1]]
data$ytemp <- 1
nterm <- length(labels(terms(formula_h)))
terms <- NULL
for(ii in 1:nterm){
if(ii == 1) terms <- labels(terms(formula_h))[ii]
if(ii > 1) terms <- paste(terms, "+", labels(terms(formula_h))[ii], sep=" ")
}
formulatemp <- as.formula(paste("ytemp ~ ", terms))
covs_h <- model.matrix(formulatemp, data)
makeXmat = function(Xvals){
#Xvals = cbind(rep(1,dim(Xvals)[[1]]), Xvals)
p = dim(Xvals)[[2]]
Xout = matrix(0, n*q, p*q)
Xout[seq(1,n*q,q),1:p] = Xvals
Xout[seq(2,n*q,q),(p+1):(2*p)]= Xvals
return(Xout)
}
X = makeXmat(covs)
p = ncol(X)
## Design matrix for the health model WIITHOUT the posttreatment y
X_h = covs_h
p_h = ncol(X_h) + 1 ## the +1 is to account for the posttreatment y to be added later
y0 = rep(NA,n)
y1 = rep(NA,n)
y0[data$a==0] = data$y[data$a==0]
y1[data$a==1] = data$y[data$a==1]
Y = rep(-999,n*q)
Y[seq(1,n*q,q)] = y0
Y[seq(2,n*q,q)] = y1
ismissy = (is.na(Y))
nwithmissing = sum(ismissy)
H = rep(-999,n*2)
H[seq(1,n*2,2)][data$a==0] = data$h[data$a==0]
H[seq(2,n*2,2)][data$a==1] = data$h[data$a==1]
denom=data[,denom]
a = rep(NA, n*q)
a[seq(1,n*q,q)] = data$a
a[seq(2,n*q,q)] = data$a
###########################
##### Select Priors #######
###########################
####Priors for AA' (Inverse Wishart)
#####With q=2, this is just two inverse gamma priors
KIG_a = prior$KIG
KIG_b = prior$KIG
####Priors for Psi (Inverse gamma)
psiig_a = prior$psi
psiig_b = prior$psi
####Priors for theta (uniform)
thetaunif_a = prior$theta1 #miles
thetaunif_b = prior$theta2 #miles
##################################
##### Select Starting Values #####
##################################
my0 = summary(lm(y0~data$ps))
my1 = summary(lm(y1~data$ps))
missy1 = ismissy[seq(1,n*q,q)]
missy2 = ismissy[seq(2,n*q,q)]
Y[seq(1,n*q,q)][missy1==1] = rnorm(sum(missy1==1), mean(Y[seq(1,n*q,q)], na.rm=T),
sd(Y[seq(1,n*q,q)], na.rm=T))
Y[seq(2,n*q,q)][missy2==1] = rnorm(sum(missy2==1), mean(Y[seq(2,n*q,q)], na.rm=T),
sd(Y[seq(2,n*1,q)], na.rm=T))
H[seq(1,n*2,2)][data$a==1]=rpois(sum(data$a==1), 10)
H[seq(2,n*2,2)][data$a==0]=rpois(sum(data$a==0), 10)
#Regression Coefficients
if(is.null(starting$B) == F){
B = starting$B
}
if(is.null(starting$B) == T){
B = rep(0, ncov*2)
}
#Spatial variance parameters, log transform for proposal
if(is.null(starting$A) == T){
initA1 = log(my0$sigma^2)
initA2 = log(my1$sigma^2)
}else{
initA1 = starting$A
initA2 = starting$A
}
#Vector of log of residual variances (Psi)
if(is.null(starting$psi) == T){
initpsis = log(c(my0$sigma^2, my1$sigma^2)*.1)
}else{
initpsis = log(rep(starting$psi, 2))
}
#Spatial Range parameters
if(is.null(starting$theta) == T){
inittheta = c(1,1) # original scale
inittheta = logit(inittheta,thetaunif_a, thetaunif_b) #logit scale for proposals
}else{
inittheta = logit(starting$theta,thetaunif_a, thetaunif_b)
}
#Health Model Coefficients
if(is.null(starting$alpha0) == T){
initalpha = rep(0, 2*p_h)
}else{
initalpha = c(starting$alpha0, starting$alpha1)
}
##################################
##### Select Tuning Parameters ###
##################################
####Proposal SDS for A1 and A2
A1propsds = tuning$A
A2propsds = tuning$A
####Proposal SDS for log(diag(Psi))
psipropsds = rep(tuning$psi, 2)
###Proposal SDS for logit(theta) (aka, phi)
thetapropsds = rep(tuning$theta, 2)
###Propoisal covariance matrices for alpha from poisson models
alpha0propcov = tuning$alpha0
alpha1propcov = tuning$alpha1
###Proposal SDS for missing Y
propysds = tuning$Y
#################################
##### Define Some Functions #####
#################################
loglike_sp = function(paramvals,Yvals){
llike = 0
K1 = exp(paramvals[Aindex[1]])
K2 = exp(paramvals[Aindex[2]])
K = createspatialsig(K1,K2,rho)
if (!is.na(K)[[1]]){
### Untransform theta to the original scale
thetatemp = logitInv(paramvals[thetaindex], thetaunif_a, thetaunif_b)
det = mvCovInvLogDet(coords=coords, cov.model="exponential",
V=K, Psi=diag(exp(paramvals[psiindex]), nrow=q),
theta=thetatemp, modified.pp=FALSE, SWM=TRUE)
## q inverse gamma priors for spatial variance
###Jacobian for log transformation: \sum log(sigmasq)
llike = llike+ sum(-(KIG_a+1)*paramvals[Aindex] -KIG_b/exp(paramvals[Aindex]) + paramvals[Aindex])
## q inverse gamma priors
###Jacobian for log transformation: \sum log(sigmasq)
llike = llike+ sum(-(psiig_a+1)*paramvals[psiindex] -psiig_b/exp(paramvals[psiindex]) + paramvals[psiindex])
## Uniform part for theta- this is the jacobian for the LogitInv transformation = (theta-a)(b-theta) / (b-a)
llike = llike+ sum(log(thetatemp - thetaunif_a) + log(thetaunif_b - thetatemp))
outlist = list(llike, det$C, det$C.inv, det$log.det)
names(outlist) = c("ll","C","Cinv", "logdet")
}# !isna(K)
if (is.na(K)[[1]]){
outlist = list(-Inf, 1)
names(outlist) = c("ll", "notpd")
}
return(outlist)
}
loglike_norm = function(Yvals,Bvals,Cinvval,logdetval){
##Normal Part
YXB = (Yvals-X%*%Bvals)
llike = -.5*logdetval -.5*t(YXB)%*%Cinvval%*%YXB
return(llike)
}
loglike_pois = function(Yvals,alphavals,h0vals,h1vals){
##Poisson Part
ya0 = Yvals[seq(1,n*q,q)]
ya1 = Yvals[seq(2,n*q,q)]
Xmat0 = as.matrix(cbind(X_h, ya0))
alpha0 = alphavals[1:p_h]
lambda0 = exp(Xmat0%*%alpha0 + log(denom))
Xmat1 = as.matrix(cbind(X_h, ya1))
alpha1 = alphavals[(p_h+1):length(alphavals)]
lambda1 = exp(Xmat1%*%alpha1 + log(denom))
llike = sum( h0vals*log(lambda0) - lambda0 - lfactorial(h0vals) )
llike = llike + sum( h1vals*log(lambda1) - lambda1 - lfactorial(h1vals) )
return(llike)
}
MHstep_sp = function(index, paramvals, Bvals, Yvals, Cval, Cinvval, logdetval,currentllsp, currentllnorm){
accept = 0
notpd = 1
llretsp = currentllsp
llretnorm = currentllnorm
currentll = currentllsp+currentllnorm
Cret = Cval
Cinvret = Cinvval
logdetret = logdetval
currentparams = paramvals
props = currentparams
props[index] = rnorm(length(index), paramvals[index], propsds[index])
llanddet = loglike_sp(props,Yvals)
Cprop = llanddet$C
Cinvprop = llanddet$Cinv
logdetprop = llanddet$logdet
llpropsp = llanddet$ll
if (llanddet$ll != -Inf){
notpd = 0
llpropnorm = loglike_norm(Yvals,Bvals,Cinvprop,logdetprop)
llprop = llpropsp+llpropnorm
ratio = exp(llprop-currentll)
ratio[ratio>1] = 1
if (runif(1)<=ratio){
currentparams[index] = props[index]
llretsp = llpropsp
llretnorm = llpropnorm
Cret = Cprop
Cinvret = Cinvprop
logdetret = logdetprop
accept = 1
}
}## llanddet$ll !=-Inf
outlist = list(currentparams, llretsp,llretnorm, accept, Cret, Cinvret, logdetret, notpd)
names(outlist) = c("params", "llsp","llnorm", "accepted", "C", "Cinv", "logdet", "notpd")
return(outlist)
}
MHstep_pois = function(whichalphas, Yvals, alphavals, h0vals, h1vals, currentll){
accept=0
llret=currentll
alpharet=alphavals
alphaprop=alphavals
index=1:p_h
if (whichalphas==1){
index=(p_h+1):length(alphavals)
}
alphaprop[index]=mvrnorm(1, alpharet[index], alphapropcovs[[whichalphas+1]])
llprop=loglike_pois(Yvals, alphaprop, h0vals, h1vals)
ratio=exp(llprop-currentll)
ratio[ratio>1]=1
if (runif(1)<=ratio){
alpharet=alphaprop
llret=llprop
accept=1
}
outlist=list(alpharet, llret, accept)
names(outlist)=c("alpha", "ll","accepted")
return(outlist)
}
updateBeta = function(Cinvval, Yvals){
S_beta = solve(t(X)%*%Cinvval%*%X)
Mu_beta = S_beta%*%t(X)%*%Cinvval%*%Yvals
return(mvrnorm(1,Mu_beta,S_beta))
}
createspatialsig = function(K1mat, K2mat, rho){
sig = matrix(0,q,q)
sig[1,1] = K1mat
sig[2,2] = K2mat
sig[1,2] = rho*sqrt(K1mat*K2mat)
sig[2,1] = sig[1,2]
if(!is.positive.definite(sig)){
sig = NA
}
return(sig)
}
sampleH=function(whicha, alphavals, Yvals){
ya0 = Yvals[seq(1,n*q,q)]
ya1 = Yvals[seq(2,n*q,q)]
index = 1:p_h
Xmat = cbind(X_h, ya0)
if (whicha==1){
index = (p_h+1) : length(alphavals)
Xmat = cbind(X_h, ya1)
}
alphs=alphavals[index]
lamda=exp(Xmat%*%alphs + log(denom))
h=rpois(sum(data$a==(1-whicha)), lamda[data$a==(1-whicha)])
return(h)
}
sampleY_r=function(ids, Yvals, Bvals, Cinvval, logdetval, alphavals, h0vals, h1vals, currentllpois, currentllnorm){
accept = rep(0, length(which(ids)))
currentll = currentllnorm + currentllpois
llretnorm = currentllnorm
llretpois = currentllpois
Yret = Yvals
Yprop = Yvals
Yprop[ids] = rnorm( length(which(ids)), Yvals[ids], propysds[ids])
llpropnorm = loglike_norm(Yprop, Bvals, Cinvval, logdetval)
llproppois = loglike_pois(Yprop, alphavals, h0vals, h1vals)
llprop=llpropnorm+llproppois
ratio=exp(llprop-currentll)
ratio[ratio>1]=1
if (runif(1)<=ratio){
Yret=Yprop
llretnorm=llpropnorm
llretpois=llproppois
accept= rep(1, length(which(ids)))
}
outlist=list(Yret, llretnorm,llretpois, accept)
names(outlist)=c("Y", "llnorm", "llpois","accepted")
return(outlist)
}
########################################
##### Initialize Sampling Matrices #####
########################################
params = c(initA1,initA2, initpsis, inittheta, initalpha) #Everything here is on the transformed scale for proposals
nparams = length(params)
propsds = c(A1propsds, A2propsds, psipropsds, thetapropsds)
alphapropcovs = list(alpha0propcov, alpha1propcov)
Aindex = 1:2
psiindex = 3:4
thetaindex = 5:6
alpha0index = 7:(6+p_h)
alpha1index = (max(alpha0index)+1) : (max(alpha0index)+ p_h)
accepted = rep(0, nparams)
accepted_y = rep(0, n*q)
binsize = 200
rho = 0
notpd = 0
saveindex = seq(1, nsamp, by = thin)
samples = matrix(NA, nrow=length(saveindex), ncol=nparams+1+length(B))
dimnames(samples)[[2]] = c("K[1,1]", "K[2,1]", "K[2,2]", paste("Psi", 1:q, sep=""), paste("Theta", 1:2, sep=""), paste("alpha0", (0:(p_h-1)), sep=""), paste("alpha1", (0:(p_h-1)), sep=""),
paste("B0", 0:((p/2)-1), sep=""), paste("B1", 0:((p/2)-1), sep=""))
ysims = matrix(NA, nrow=length(saveindex), ncol=length(Y))
hsims = matrix(NA, nrow=length(saveindex), ncol=length(H))
##Calculate initial log likelihood
initsp = loglike_sp(params,Y)
Cinv = initsp$Cinv
C = initsp$C
logdet = initsp$logdet
llsp = initsp$ll
llnorm = loglike_norm(Y,B,Cinv,logdet)
llpois = loglike_pois(Y, params[c(alpha0index, alpha1index)], H[seq(1,n*2,2)], H[seq(2,n*2,2)])
ll = llsp + llnorm + llpois
iterno = 1
donesampling= FALSE
kk=1
while (donesampling==FALSE){
B = updateBeta(Cinv,Y)
llnorm= loglike_norm(Y,B,Cinv,logdet)
ll = llsp+llnorm+llpois
######## PROPOSE #########
###### Metropolis steps for spatial parameters #########
#a0 parameters
paramindex = c(Aindex[1], psiindex[1], thetaindex[1])
mhstep = MHstep_sp(paramindex,params,B,Y, C, Cinv, logdet, llsp, llnorm)
params = mhstep$params
llsp = mhstep$llsp
llnorm = mhstep$llnorm
Cinv = mhstep$Cinv
C = mhstep$C
logdet = mhstep$logdet
accepted[paramindex] = accepted[paramindex]+mhstep$accepted
notpd = notpd+mhstep$notpd
#a1parameters
paramindex = c(Aindex[2], psiindex[2], thetaindex[2])
mhstep = MHstep_sp(paramindex,params,B,Y, C, Cinv, logdet, llsp, llnorm)
params = mhstep$params
llsp = mhstep$llsp
llnorm = mhstep$llnorm
Cinv = mhstep$Cinv
C = mhstep$C
logdet = mhstep$logdet
accepted[paramindex] = accepted[paramindex]+mhstep$accepted
notpd = notpd+mhstep$notpd
ll = llsp+llnorm+llpois
###### Metropolis steps for Poisson parameters in h0/h1 model ##########
#a0 parameters
mhstep = MHstep_pois(0, Y, params[c(alpha0index, alpha1index)], H[seq(1,2*n,2)], H[seq(2,2*n,2)], llpois)
params[c(alpha0index,alpha1index)] = mhstep$alpha
llpois = mhstep$ll
accepted[alpha0index] = accepted[alpha0index] + mhstep$accepted
#a1 parameters
mhstep = MHstep_pois(1, Y, params[c(alpha0index, alpha1index)], H[seq(1,2*n,2)], H[seq(2,2*n,2)], llpois)
params[c(alpha0index,alpha1index)] = mhstep$alpha
llpois = mhstep$ll
accepted[alpha1index] = accepted[alpha1index] + mhstep$accepted
##################################
###### Simulate Missing Y ########
##################################
######### Fully Bayesian sampling (with H) using R ##########
for ( whichysamp in which(ismissy) ){
whichy=rep(FALSE, n*q)
whichy[whichysamp]=TRUE
mhstep = sampleY_r(whichy, Y, B, Cinv, logdet, params[c(alpha0index, alpha1index)], H[seq(1,n*2,2)], H[seq(2,n*2,2)], llpois, llnorm)
Y = mhstep$Y
llnorm = mhstep$llnorm
llpois = mhstep$llpois
accepted_y[whichy] = accepted_y[whichy] + mhstep$accepted
}
# whichy = ismissy #this needs to be an n*q dimensional vector
# whichy[seq(2,n*q,q)] = FALSE #only sample a=0
# mhstep = sampleY_r(whichy, Y, B, Cinv, logdet, params[c(alpha0index, alpha1index)], H[seq(1,n*2,2)], H[seq(2,n*2,2)], llpois, llnorm)
# Y = mhstep$Y
# llnorm = mhstep$llnorm
# llpois = mhstep$llpois
# accepted_y[whichy] = accepted_y[whichy] + mhstep$accepted
# whichy=ismissy
# whichy[seq(1,n*q,q)] = FALSE #only sample a=1
# mhstep = sampleY_r(whichy, Y, B, Cinv, logdet, params[c(alpha0index, alpha1index)], H[seq(1,n*2,2)], H[seq(2,n*2,2)], llpois, llnorm)
# Y = mhstep$Y
# llnorm = mhstep$llnorm
# llpois = mhstep$llpois
# accepted_y[whichy] = accepted_y[whichy] + mhstep$accepted
######### Sample without using H (not fully Bayesian) ##########
# Sy_ystar = C[!ismissy, ismissy]
# Sy_y = C[!ismissy,!ismissy]
# Sy_y_inv = solve(Sy_y)
# Systar_ystar= C[ismissy,ismissy]
# m_pred = X[ismissy,]%*%B + t(Sy_ystar)%*%Sy_y_inv%*%(Y[!ismissy] - X[!ismissy,]%*%B)
# S_pred = Systar_ystar - t(Sy_ystar)%*%Sy_y_inv%*%Sy_ystar
# Y[ismissy] = mvrnorm(1, m_pred,S_pred)
# llnorm = loglike_norm(Y,B,Cinv,logdet)
# llpois = loglike_pois(Y, params[c(alpha0index, alpha1index)], H[seq(1,n*2,2)], H[seq(2,n*q,2)])
#Simulate Missing H
H[seq(1,2*n,2)][data$a==1] = sampleH(0, params[c(alpha0index, alpha1index)], Y)
H[seq(2,2*n,2)][data$a==0] = sampleH(1, params[c(alpha0index, alpha1index)], Y)
llpois = loglike_pois(Y, params[c(alpha0index, alpha1index)], H[seq(1,n*2,2)], H[seq(2,n*q,2)])
ll = llsp + llnorm + llpois
### Create output variables on the original scale
K1out = exp(params[Aindex[1]])
K2out = exp(params[Aindex[2]])
Kout = createspatialsig(K1out, K2out,rho)
thetaout = logitInv(params[thetaindex], thetaunif_a, thetaunif_b)
if (iterno%%thin==0){
samples[kk,] = c(Kout[lower.tri(diag(1,q), TRUE)], exp(params[psiindex]), thetaout,params[c(alpha0index, alpha1index)], B)
ysims[kk,] = Y
hsims[kk,] = H
kk = kk+1
}
if (!is.null(outputbinsize) & iterno%%outputbinsize==0){
out <- list()
out$samples <- samples[1:(kk-1), ]
out$y0 <- ysims[1:(kk-1),seq(1,n*q,q)]
out$y1 <- ysims[1:(kk-1),seq(2,n*q,q)]
out$h0 <- hsims[1:(kk-1),seq(1,n*2,2)]
out$h1 <- hsims[1:(kk-1),seq(2,n*2,2)]
out$coords <- coords
out$trt <- data$a
out$formula <- formula
out$formula_h <- formula_h
save(out, file=outputfilename)
rm(out)
}
if (iterno%%binsize==0){
print(paste("Iteration:", iterno))
print(c("Accepted:", round(accepted/iterno,3), round(mean(accepted_y[ismissy]/iterno),3)))
print(c("Number of not PD:", notpd))
}
if (iterno>=nsamp){donesampling=TRUE}
iterno = iterno+1
}#while donesampling==FALSE
out <- list()
out$samples <- samples
out$y0 <- ysims[,seq(1,n*q,q)]
out$y1 <- ysims[,seq(2,n*q,q)]
out$h0 <- hsims[,seq(1,n*2,2)]
out$h1 <- hsims[,seq(2,n*2,2)]
out$coords <- coords
out$trt <- data$a
out$formula <- formula
out$formula_h <- formula_h
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.