#' visualize the etiology regression with a continuous covariate
#'
#' This function visualizes the etiology regression against one continuous covariates, e.g.,
#' enrollment date. (NB: dealing with NoA, multiple-pathogen causes, other continuous covariates?)
#'
#' @param DIR_NPLCM File path to the folder containing posterior samples
#' @param stratum_bool a vector of TRUE/FALSE with TRUE indicating the rows of subjects to include
#' @param plot_basis TRUE for plotting basis functions; Default to FALSE
#' @param truth a list of truths computed from true parameters in simulations; elements: Eti, FPR, PR
#' which are matrices of # of rows = # of subjects, # columns: \code{length(cause_list)} for Eti
#' and \code{ncol(data_nplcm$Mobs$MBS$MBS1)}; Default to \code{NULL} for real data analyses.
#'
#' @return A figure of etiology regression curves and some marginal positive rate assessment of
#' model fit; See example for the legends.
#'
#' @import graphics
#' @examples
#' \dontrun{
#' # legend.text = c("[UPPER FIGURES]",
#' "observed prevalence: cases",
#' "observed prevalence: controls",
#' "fitted prevalence: cases",
#' "fitted prevalence: controls",
#' "true positive rate: mean",
#' "true positive rate: 95%CI",
#' "[BOTTOM FIGURES]",
#' "etiology curve: mean",
#' "overall etiology: mean",
#' "overall etiology: 95%CI","","","")
#' legend.col=c("white","black","dodgerblue2","black","dodgerblue2","red","red",
#' "white","springgreen4","orange","orange","white","white","white")
#' legend.lty=c(1,2,2,1,1,1,2,1,1,1,2)
#' legend.lwd=c(2,2,2,2,2,2,2,2,2,2,2,2,2,2)
#' legend("topleft",legend=legend.text,
#' lty=legend.lty,lwd=legend.lwd,
#' col=legend.col,ncol=2,
#' y.intersp=1.5,cex=1.6,box.col=NA)
#' }
#' @references See example figures:
#' \url{https://github.com/zhenkewu/baker/blob/master/inst/figs/visualize_etiology_regression_SITE=1.pdf}
#' and its legends:
#' \url{https://github.com/zhenkewu/baker/blob/master/inst/figs/legends_visualize_etiology_regression.png}
#' @family visualization functions
#' @export
plot_etiology_regression <- function(DIR_NPLCM,stratum_bool,plot_basis=FALSE,truth=NULL){
old_par <- graphics::par(graphics::par("mfrow", "mar"))
on.exit(graphics::par(old_par))
if (!is_jags_folder(DIR_NPLCM)){
stop("==[baker] oops, not a folder baker recognizes.
Try a folder generated by baker, e.g., a temporary folder?==")
}
# JAGS:
#
# Read data from DIR_NPLCM:
#
data_nplcm <- dget(file.path(DIR_NPLCM,"data_nplcm.txt"))
new_env <- new.env()
source(file.path(DIR_NPLCM,"jagsdata.txt"),local=new_env)
bugs.dat <- as.list(new_env)
rm(new_env)
res_nplcm <- coda::read.coda(file.path(DIR_NPLCM,"CODAchain1.txt"),
file.path(DIR_NPLCM,"CODAindex.txt"),
quiet=TRUE)
print_res <- function(x) plot(res_nplcm[,grep(x,colnames(res_nplcm))])
get_res <- function(x) res_nplcm[,grep(x,colnames(res_nplcm))]
# structure the posterior samples:
ncol_dm_FPR_1 <- ncol(bugs.dat$Z_FPR_1)
JBrS_1 <- ncol(bugs.dat$MBS_1)
n_samp_kept <- nrow(res_nplcm)
ncol_dm_Eti <- ncol(bugs.dat$Z_Eti)
Jcause <- bugs.dat$Jcause
Nd <- bugs.dat$Nd
Nu <- bugs.dat$Nu
betaFPR_samp <- array(t(get_res("betaFPR_1")),c(ncol_dm_FPR_1,JBrS_1,n_samp_kept))
betaEti_samp <- array(t(get_res("betaEti")),c(ncol_dm_Eti,Jcause,n_samp_kept))
thetaBS_1_samp <- get_res("thetaBS_1")
linpred <- function(beta,design_matrix){design_matrix%*%beta}
out_FPR_linpred <- array(apply(betaFPR_samp,3,linpred,design_matrix=bugs.dat$Z_FPR_1),
c(Nd+Nu,JBrS_1,n_samp_kept))
out_Eti_linpred <- array(apply(betaEti_samp,3,linpred,design_matrix=bugs.dat$Z_Eti),
c(Nd,Jcause,n_samp_kept))
#
# 2. use this code if date is included in etiology and false positive regressions:
#
# false positive rates:
subset_FPR_ctrl <- data_nplcm$Y==0 & stratum_bool # <--- specifies who to look at.
plotid_FPR_ctrl <- which(subset_FPR_ctrl)[order(data_nplcm$X$std_date[subset_FPR_ctrl])]
curr_date_FPR <- data_nplcm$X$std_date[plotid_FPR_ctrl]
FPR_prob_scale <- expit(out_FPR_linpred[plotid_FPR_ctrl,,])
FPR_mean <- apply(FPR_prob_scale,c(1,2),mean)
FPR_q <- apply(FPR_prob_scale,c(1,2),quantile,c(0.025,0.975))
# positive rates for cases:
fitted_margin_case <- function(pEti_ord,theta,psi,template){
mixture <- pEti_ord
tpr <- t(t(template)*theta)
fpr <- t(t(1-template)*psi)
colSums(tpr*mixture + fpr*mixture)
}
subset_FPR_case <- data_nplcm$Y==1 & stratum_bool # <--- specifies who to look at.
plotid_FPR_case <- which(subset_FPR_case)[order(data_nplcm$X$std_date[subset_FPR_case])]
curr_date_FPR_case <- data_nplcm$X$std_date[plotid_FPR_case]
FPR_prob_scale_case <- expit(out_FPR_linpred[plotid_FPR_case,,])
# etiology:
subset_Eti <- data_nplcm$Y==1 & stratum_bool # <--- specifies who to look at.
plotid_Eti <- which(subset_Eti)[order(data_nplcm$X$std_date[subset_Eti])]
curr_date_Eti <- data_nplcm$X$std_date[plotid_Eti]
Eti_prob_scale <- apply(out_Eti_linpred[plotid_Eti,,],c(1,3),softmax)
Eti_mean <- apply(Eti_prob_scale,c(1,2),mean)
Eti_q <- apply(Eti_prob_scale,c(1,2),quantile,c(0.025,0.975))
Eti_overall <- apply(Eti_prob_scale,c(1,3),mean)
Eti_overall_mean <- rowMeans(Eti_overall)
Eti_overall_q <- apply(Eti_overall,1,quantile,c(0.025,0.975))
PR_case <- array(NA,c(length(plotid_Eti),JBrS_1,n_samp_kept))
for (i in 1:(length(plotid_Eti))){
for (t in 1:n_samp_kept){
PR_case[i,,t] <- fitted_margin_case(Eti_prob_scale[,i,t],
thetaBS_1_samp[t,],
FPR_prob_scale_case[i,,t],
bugs.dat$templateBS_1[1:Jcause,]
)
}
}
PR_case_mean <- apply(PR_case,c(1,2),mean)
PR_case_q <- apply(PR_case,c(1,2),quantile,c(0.025,0.975))
##################
# plot results:
#################
par(mfcol=c(2,Jcause),oma=c(3,0,3,0))
for (j in 1:Jcause){ # <--- the marginal dimension of measurements.
# need to fix this for NoA! <------------------------ FIX!
#
# Figure 1 for case and control positive rates:
#
par(mar=c(2,5,0,1))
{ #<------------------------ FIX!
# if (j == Jcause){
# plot(0,0.5,type="l",ylim=c(0,1),pch="n",
# xaxt="n",xlab="",ylab="positive rate",las=2,bty="n")
#
# mtext("other",side = 3,cex=1.5,line=1)
# } else{ #<------------------------ FIX!
plot(curr_date_FPR,FPR_mean[,j],type="l",ylim=c(0,1),
xaxt="n",xlab="",ylab="positive rate",las=2,bty="n")
polygon(c(curr_date_FPR, rev(curr_date_FPR)),
c(FPR_q[1,,j], rev(FPR_q[2,,j])),
col = grDevices::rgb(0, 1, 1,0.5),border = NA)
# rug plot:
rug(curr_date_FPR[data_nplcm$Mobs$MBS$MBS1[plotid_FPR_ctrl,j]==1],side=3)
rug(curr_date_FPR[data_nplcm$Mobs$MBS$MBS1[plotid_FPR_ctrl,j]==0],side=1)
if(!is.null(truth$FPR)){lines(curr_date_FPR,truth$FPR[plotid_FPR_ctrl,j],col="blue",lwd=3)}
if(plot_basis){matplot(curr_date_FPR,bugs.dat$Z_FPR_1[plotid_FPR_ctrl,],col="blue",type="l",add=TRUE)}
mtext(names(data_nplcm$Mobs$MBS[[1]])[j],side = 3,cex=1.5,line=1)
points(curr_date_FPR_case,PR_case_mean[,j],type="l",ylim=c(0,1))
polygon(c(curr_date_FPR_case, rev(curr_date_FPR_case)),
c(PR_case_q[1,,j], rev(PR_case_q[2,,j])),
col = grDevices::rgb(1, 0, 0,0.5),border = NA)
if(!is.null(truth$PR)){lines(curr_date_FPR_case,truth$PR[plotid_FPR_case,j],col="black",lwd=3)}
# rug plot:
rug(curr_date_FPR_case[data_nplcm$Mobs$MBS$MBS1[plotid_FPR_case,j]==1],side=3,col="dodgerblue2",line=-1)
rug(curr_date_FPR_case[data_nplcm$Mobs$MBS$MBS1[plotid_FPR_case,j]==0],side=1,col="dodgerblue2",line=-1)
abline(h=colMeans(thetaBS_1_samp)[j],col="red")
abline(h=quantile(thetaBS_1_samp[,j],0.025),col="red",lty=2)
abline(h=quantile(thetaBS_1_samp[,j],0.975),col="red",lty=2)
# add raw moving average dots:
ma <- function(x,n=60){stats::filter(x,rep(1/n,n), sides=2)}
ma_cont <- function(y,x,hw=0.35){
res <- rep(NA,length(y))
for (i in seq_along(y)){
res[i] <- mean(y[which(x>=x[i]-hw & x<=x[i]+hw)])
}
res
}
response.ctrl <- bugs.dat$MBS_1[plotid_FPR_ctrl,j]
dat_ctrl <- data.frame(std_date=data_nplcm$X$std_date[plotid_FPR_ctrl])[!is.na(response.ctrl),,drop=FALSE]
dat_ctrl$runmean <- ma_cont(response.ctrl[!is.na(response.ctrl)],dat_ctrl$std_date[!is.na(response.ctrl)])
points(runmean ~ std_date,data=dat_ctrl[!is.na(response.ctrl),],lty=2,pch=1,cex=0.5,type="o",col="dodgerblue2")
response.case <- bugs.dat$MBS_1[plotid_FPR_case,j]
dat_case <- data.frame(std_date=data_nplcm$X$std_date[plotid_FPR_case])[!is.na(response.case),,drop=FALSE]
dat_case$runmean <- ma_cont(response.case[!is.na(response.case)],dat_case$std_date[!is.na(response.case)])
points(runmean ~ std_date,data=dat_case,lty=2,pch=1,cex=0.5,type="o")
}
#
# Figure 2 for Etiology Regression:
#
par(mar=c(2,5,0,1))
plot(curr_date_Eti,Eti_mean[j,],type="l",ylim=c(0,1),xlab="standardized date",
ylab="etiologic fraction",bty="n",xaxt="n",yaxt="n",las=2)
## ONLY FOR SIMULATIONS <---------------------- FIX!
if(!is.null(truth$Eti)){points(curr_date_Eti,truth$Eti[plotid_Eti,j],type="l",lwd=3,col="black")}
if(plot_basis){matplot(curr_date_Eti,bugs.dat$Z_Eti[plotid_Eti,],col="blue",type="l",add=TRUE)}
# overall pie:
abline(h=Eti_overall_mean[j],col="black",lwd=2)
abline(h=Eti_overall_q[,j],col="black",lty=2,lwd=1.5)
mtext("Overall Pie",side=3,line=-0.5,cex=1.2)
mtext(paste0(round(Eti_overall_mean[j],3)*100,"%"),side=3,line=-2,cex=1.2)
mtext(paste0(round(Eti_overall_q[1,j],3)*100,"%"),side=3,line=-2.5,cex=1,adj=0.15)
mtext(paste0(round(Eti_overall_q[2,j],3)*100,"%"),side=3,line=-2.5,cex=1,adj=0.85)
# add x-axis for dates:
X <- data_nplcm$X
Y <- data_nplcm$Y
# some date transformations:
X$date_plot <- as.Date(X$ENRLDATE)
X$date_month_centered <- as.Date(cut(X$date_plot,breaks="2 months"))+30
X$date_month <- as.Date(cut(X$date_plot,breaks="2 months"))
color2 <- grDevices::rgb(190, 190, 190, alpha=200, maxColorValue=255)
color1 <- grDevices::rgb(216,191,216, alpha=200, maxColorValue=255)
#cases:
last_interval <- max(X$date_month)
lubridate::month(last_interval) <- lubridate::month(last_interval) +2
axis(1, X$std_date[c(plotid_FPR_case)],
format(c(X$date_month[c(plotid_FPR_case)]), "%Y %b"),
cex.axis = .7,las=1)
axis(2,at = seq(0,1,by=0.2),labels=seq(0,1,by=0.2),las=2)
polygon(c(curr_date_Eti, rev(curr_date_Eti)),
c(Eti_q[1,j,], rev(Eti_q[2,j,])),
col = grDevices::rgb(0.5,0.5,0.5,0.5),border = NA)
}
}
#' visualize the etiology estimates for each discrete levels
#'
#' This function visualizes the etiology estimates against one discrete covariate, e.g.,
#' age groups.
#'
#' @param DIR_NPLCM File path to the folder containing posterior samples
#' @param strata_weights a vector of weights that sum to one; for each pathogen
#' the weights specify how the j-th etiology fraction should be combined across all
#' levels of the discrete predictors in the data
#' @param truth a list of true values, e.g.,
#' \code{truth=list(allEti = a list of etiology fractions)}
#'
#' @import graphics
#' @family visualization functions
#' @export
plot_etiology_strat <- function(DIR_NPLCM,strata_weights=NULL,truth=NULL){
old_par <- graphics::par(graphics::par("mfrow", "mar"))
on.exit(graphics::par(old_par))
if (!is_jags_folder(DIR_NPLCM)){
stop("==[baker] oops, not a folder baker recognizes.
Try a folder generated by baker, e.g., a temporary folder?==")
}
#
# Read data from DIR_NPLCM:
#
data_nplcm <- dget(file.path(DIR_NPLCM,"data_nplcm.txt"))
model_options <- dget(file.path(DIR_NPLCM,"model_options.txt"))
new_env <- new.env()
source(file.path(DIR_NPLCM,"jagsdata.txt"),local=new_env)
bugs.dat <- as.list(new_env)
rm(new_env)
res_nplcm <- coda::read.coda(file.path(DIR_NPLCM,"CODAchain1.txt"),
file.path(DIR_NPLCM,"CODAindex.txt"),
quiet=TRUE)
print_res <- function(x) plot(res_nplcm[,grep(x,colnames(res_nplcm))])
get_res <- function(x) res_nplcm[,grep(x,colnames(res_nplcm))]
# structure the posterior samples:
JBrS_1 <- ncol(bugs.dat$MBS_1) # number of pathogens in MBS1
n_samp_kept <- nrow(res_nplcm) # number of posterior sample after burn-in
Jcause <- bugs.dat$Jcause # number of causes
Nd <- bugs.dat$Nd # case size
Nu <- bugs.dat$Nu # control size
n_unique_Eti_level <- bugs.dat$n_unique_Eti_level # number of stratums
# etiology:
plotid_Eti <- which(data_nplcm$Y==1) # <--- specifies who to look at.
Eti_prob_scale <- array(get_res("pEti"),c(n_samp_kept,n_unique_Eti_level,Jcause))
# posterior etiology mean for each cause for each site
Eti_mean <- apply(Eti_prob_scale,c(2,3),mean)
# posterior etiology quantiles for each cause for each site
Eti_q <- apply(Eti_prob_scale,c(2,3),quantile,c(0.025,0.975))
# marginalized posteior etiology ignoring site
Eti_overall <- apply(Eti_prob_scale,c(3,1),mean)
# posteior etiology mean for each cause across all sites
Eti_overall_mean <- rowMeans(Eti_overall)
# posteior etiology quantiles for each cause across all sites
Eti_overall_q <- apply(Eti_overall,1,quantile,c(0.025,0.975))
# weight to marginalize posterior etiology distributions
user_weight <- rep(1/n_unique_Eti_level,n_unique_Eti_level) # c(0.3,0.2,0.1,0.1,0.1,0.1,0.1)
if(!is.null(strata_weights) && length(strata_weights==n_unique_Eti_level)){
user_weight <- strata_weights
}
# marginalized posterior etiology over all sites using user-defined weights
Eti_overall_usr_weight <- apply(Eti_prob_scale,1,function(S) t(S)%*%matrix(user_weight,ncol=1))
# marginalized posterior etiology mean using user-defined weights
Eti_overall_mean_usr_weight <- rowMeans(Eti_overall_usr_weight)
# marginalized posterior etiology quantiles using user-defined weights
Eti_overall_q_usr_weight <- apply(Eti_overall_usr_weight,1,quantile,c(0.025,0.975))
# plot posterior distribution for etiology probability
par(mfcol=c(1+n_unique_Eti_level,Jcause),mar=c(3,10,1,0))
for (j in 1:Jcause){
# if (j==1) {par(mar=c(3,0,2,0))}
# if (j>1) {par(mar=c(3,0,1,0))}
for (site in 1:n_unique_Eti_level){
hist(Eti_prob_scale[,site,j],xlim=c(0,1),breaks="Scott",freq=FALSE,main="",xlab="")
if (!is.null(truth$allEti)){
abline(v = truth$allEti[[site]][[j]], col="blue", lwd=3, lty=2) # mark the truth.
q_interval <- quantile(Eti_prob_scale[,site,j],c(0.025,0.975))
is_included <- truth$allEti[[site]][[j]] < q_interval[2] && truth$allEti[[site]][[j]] > q_interval[1]
abline(v = q_interval,col=c("gray","red")[2-is_included],lwd=2,lty=1)
}
mtext(text = paste0('level',levels(as.factor(data_nplcm$X$SITE))[site],": ",
Jcause[j]),3,-1,cex=2,adj = 0.9)
if (j==1){
mtext(paste0(round(user_weight[site],4)),2,5,cex=2,col="blue",las=1)
if (site==ceiling(n_unique_Eti_level/2)) {mtext("User-specified weight towards overall pie:", 2,12, cex=3)}
}
}
hist(Eti_overall_usr_weight[j,],xlim=c(0,1),breaks="Scott",freq=FALSE,main="",col="blue",
xlab="Etiology")
if (!is.null(truth$allEti)){
truth_overall <- c(matrix(user_weight,nrow=1)%*%do.call(rbind,truth$allEti))
abline(v=truth_overall[site],col="orange",lty=1,lwd=2) # mark the truth.
q_interval_Eti_overall <- quantile(Eti_overall_usr_weight[j,],c(0.025,0.975))
is_included_Eti_overall <- truth_overall < q_interval_Eti_overall[2] &&
truth_overall > q_interval_Eti_overall[1]
abline(v = q_interval_Eti_overall,col=c("gray","red")[2-is_included_Eti_overall],lwd=2,lty=1)
}
mtext(text = paste0("Overall: ",model_options$likelihood$cause_list[j]),3,adj=0.9,cex=2,col="blue")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.