#' MCMC for source apportionment
#'
#' \code{mcmcsa} performs MCMC for source apportionment
#'
#' This function aims to draw from the posterior distributions of the source
#' profiles, the source contributions/concentrations, the variance of the data
#' and the other parameters. Eventually, this function will incorporate
#' options for concentrations observed below the MDL
#'
#' @param dat data frame of daily constituent concentrations with date as first column
#' @param L number of sources
#' @param lamcon Matrix of sources by constituents with 1 and 0 for conditions
#' @param mdls data frame of daily MDL values of the same dimensions of data
#' @param guessvec starting values (do not need to provide)
#' @param burnin number of burnin samples
#' @param N number of samples
#' @export
#' @examples
#' data(nycdat)
#' data(nycmdl)
#' data(lamcon)
#' #fix data totals
#' pm <- nycdat$PM25
#' whPM <- which(colnames(nycdat) == "PM25")
#' nycdat <- nycdat[, -whPM]
#' whPM <- which(colnames(nycmdl) == "PM25")
#' nycmdl <- nycmdl[, -whPM]
#' L <- 4
#'
#' mcmcsa(nycdat, L, lamcon)
mcmcsa <- function(x, ...) UseMethod("mcmcsa")
#' @rdname mcmcsa
#' @export
mcmcsa.default <- function(dat, lamcon, mdls = NULL,
guessvec = NULL, burnin = 10000, N = 100000, fix = NULL){
#first confirm first column is date
if(class(dat[, 1]) != "Date") {
stop("First column is not a date")
}
#separate dates and data
dates <- dat[, 1]
dat <- dat[, -1]
#set dimensions
P <- ncol(dat)
T1 <- nrow(dat)
L <- nrow(lamcon)
#if mdl, then censor data
if(!(is.null(mdls))) {
bdls <- 1 * (dat < mdls)
dat[which(bdls == 1, arr.ind = T)] <- NA
}else{
bdls <- NULL
}
#get indices correspond to id constraints
cond <- which(!(is.na(lamcon)), arr.ind = T)
#create arrays for output
lamstar <- array(dim = c(L, P, N - burnin ))
lfmat <- array(dim = c(T1, L, N - burnin))
# sigma2 <- array(dim = c(P, P, N - burnin))
sigma2 <- array(dim = c(P, N - burnin))
mu <- array(dim = c(L, N - burnin))
xi2 <- array(dim = c(L, N - burnin))
#dimnames
cons <- colnames(dat)
days <- seq(1, T1)
sources <- rownames(lamcon)
iters <- seq(1, dim(lamstar)[3])
dimnames(lamstar) <- list(sources, cons, iters)
dimnames(lfmat) <- list(days, sources, iters)
dimnames(sigma2) <- list(cons, iters)
dimnames(mu) <- list(sources, iters)
dimnames(xi2) <- list(sources, iters)
names1 <- c("lamstar", "lfmat", "sigma2", "mu", "xi2")
#set first guesses
if(is.null(guessvec)) {
#truncated normal for lamstar
lamstarG <- matrix(rtruncnorm(L * P, a = 0), nrow = L)
lamstarG[cond] <- lamcon[cond]
#MVnormal for logF
lfmatG <- matrix((rnorm(T1 * L)), nrow = T1)
# sigma2G <- diag(1, P)
sigma2G <- rep(.0001, P)
# muG <- rep(0, L)
muG <- rep(c(0.453, 1.916, 1.703, 0.582), length = L)
#xi2G <- rep(1, L)
xi2G <- rep(c(0.035, 0.002, 0.003, 0.012), length = L)
guessvec <- list(lamstarG, lfmatG, sigma2G, muG, xi2G)
names(guessvec) <- names1
}
#get guess for log data
if(!(is.null(mdls))) {
names1 <- c(names1, "ly")
ldat <- array(dim = c(T1, P, N - burnin))
#get initial guess
ldatG <- log(dat)
for(t in 1 : T1) {
for(p in 1 : P) {
if(bdls[t, p] == 1) {
#start with uniform between 0 and mdls
ldatG[t, p] <- log(runif(1, 0, mdls[t, p]))
}
}
}
guessvec[["ly"]] <- ldatG
}else{
guessvec[["ly"]] <- log(dat)
}
#get lfhist
lfhist <- list(length = T1)
for(t in 1 : T1) {
lfhist[[t]] <- lfmatG[t, ]
}
#for each iteration (N large)
for (i in 1 : N) {
print(i)
# if(i == 53) {browser()}
#update all parameters
for(j in 1 : length(names1)) {
out <- gibbsfun(guessvec = guessvec,
lamcon = lamcon,
type = names1[j], lfhist = lfhist, iter = i,
mdls = mdls, bdls = bdls, fix = fix)
lfhist <- out[["lfhist"]]
guessvec <- out[["guessvec"]]
}
#if we are past the burnin period, save
if (i > burnin) {
lamstar[, , i - burnin] <- guessvec[["lamstar"]]
lfmat[, , i - burnin] <- guessvec[["lfmat"]]
# sigma2[, , i - burnin] <- guessvec[["sigma2"]]
sigma2[, i - burnin] <- guessvec[["sigma2"]]
mu[, i - burnin] <- guessvec[["mu"]]
xi2[, i - burnin] <- guessvec[["xi2"]]
if(!(is.null(mdls))) {
ldat[, , i - burnin] <- guessvec[["ly"]]
}
}
}
#create output
listout <- list(lamstar, lfmat, sigma2, mu, xi2)
names(listout) <- names1
if(!(is.null(mdls))) {
listout[["ly"]] <- ldat
}
listout
}
print.mcmcsa <- function(x) {
cat("Call:\n")
print(x$call)
cat("Posterior mean unscaled profiles:\n")
print(apply(x$lamstar, c(1, 2), mean))
}
plot.mcmcsa <- function(x, type, names1 = NULL, selects = NULL, ls1 = NULL, ps = NULL, ts1 = NULL) {
# Select output
out <- x[[type]]
# Get index
P <- ncol(x$lamstar)
L <- nrow(x$lamstar)
T1 <- nrow(x$lfmat)
if(is.null(ls1)) {
ls1 <- sample(1 : L, 1)
}
if(is.null(ps)) {
ps <- sample(1 : P, 16, replace = F)
}
if(is.null(ts1)) {
ts1 <- sample(1 : T1, 16, replace = F)
}
#print(c(ts1, ls1, ps))
# Main title
mains <- paste(type, seq(1 : P))
# Subset for lambda, F
if(type == "lamstar") {
out <- out[ls1, ps, ]
mains <- paste0(type, ", Source:", ls1, ", Cons:", ps)
} else if(type == "lfmat" ) {
out <- out[ts1, ls1, ]
mains <- paste0(type, ", Source:", ls1, ", Day:", ts1)
} else if(type == "sigma2") {
out <- out[ps, ]
mains <- paste0(type, ", Cons:", ps)
}
# Only plot some
if(!is.null(selects)) {
out <- out[, selects]
}
print(dim(out))
par(mfrow = c(4, 4))
for(i in 1 : P) {
if(i <= nrow(out)) {
plot(out[i, ], main = mains[i], pch = 16)
}
}
}
##########################################
#update to new guess
##########################################
gibbsfun <- function(guessvec, lamcon, type, lfhist,
iter, mdls, bdls, fix = NULL) {
# print(type)
if(type == "lamstar" && !(type %in% fix)) {
guessvec <- lamfun(guessvec, lamcon)
} else if(type == "lfmat" && !(type %in% fix)) {
out <- ffun(guessvec, lfhist, iter)
guessvec <- out[["guessvec"]]
lfhist <- out[["lfhist"]]
} else if(type == "mu" && !(type %in% fix)) {
guessvec <- mufun(guessvec)
} else if(type == "sigma2" && !(type %in% fix)) {
guessvec <- sig2fun(guessvec)
} else if(type == "xi2" && !(type %in% fix)) {
guessvec <- xi2fun(guessvec)
} else if(type == "ly" && !(type %in% fix)) {
guessvec <- yfun(guessvec, mdls, bdls)
}else if (!(type %in% fix)){
stop("Error: type not recognizeds")
}
out <- list(guessvec, lfhist)
names(out) <- c("guessvec", "lfhist")
out
}
#####
# normal/inv-gamma variance sampling for sigma
# guessvec is list of guesses
sig2fun <- function(guessvec) {
ldat <- guessvec[["ly"]]
lfmat <- guessvec[["lfmat"]]
fmat <- exp(lfmat)
lamstar <- guessvec[["lamstar"]]
lambda <- sweep(lamstar, 1, rowSums(lamstar), "/")
#get dimensions
P <- ncol(ldat)
T1 <- nrow(ldat)
#get mean
mean <- log(fmat %*% lambda)
#get priors
pra <- 0.01
prb <- 0.0001
# Revise prior for invgamma
#pra <- 2
#prb <- 1
#get posterior parameters
a2 <- pra + T1 / 2
diffs2 <- (ldat - mean)^2
b2 <- prb + colSums(diffs2) / 2
#sample from inv gamma
sigma2 <- 1/ rgamma(P, a2, scale = b2)
# sigma2 <- diag(sigma2)
#update guess
guessvec[["sigma2"]] <- sigma2
guessvec
}
#####
# normal/inv-gamma variance sampling for xi
# guessvec is list of guesses
xi2fun <- function(guessvec) {
lfmat <- guessvec[["lfmat"]]
mu <- guessvec[["mu"]]
#get dimensions
L <- ncol(lfmat)
T1 <- nrow(lfmat)
#set prior values
# pra <- 0.01
# prb <- 0.01
# Revise prior for invgamma
# pra <- 2
#prb <- 1
#pra <- 2.1
#pra <- 3
# prb <- c(0.04, 0.002, 0.003, 0.01)
#based on check_priors
pra <- 1.3
prb <- 1.5
#get posterior paramters
a2 <- pra + T1 / 2
diffs2 <- (sweep(lfmat, 2, mu))^2
b2 <- prb + colSums(diffs2) / 2
#sample for all sources
xi2 <- 1 / rgamma(L, a2, scale = b2)
#update guess
guessvec[["xi2"]] <- xi2
guessvec
}
#####
# normal/normal mean sampling
# guessvec is list of guesses
mufun <- function(guessvec) {
xi2 <- guessvec[["xi2"]]
lfmat <- guessvec[["lfmat"]]
#get dimensions
L <- ncol(lfmat)
T1 <- nrow(lfmat)
#set prior values
prmean <- 0
#prmean <- c(0.45, 1.9, 1.7, 0.58)
prvar <- 10
#prvar <- 1
#get variance for all sources
invars <- 1 / prvar + T1 / xi2
vars <- 1 / invars
#get mean for all sources
num <- prmean / prvar + colSums(lfmat) / xi2
mn <- num * vars
#sample from independent normals
mu <- rnorm(L, mean = mn, sd = sqrt(vars))
#update guess
guessvec[["mu"]] <- mu
guessvec
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.