# R/bfse.R In geoBayes: Analysis of Geostatistical Data using Bayes and Empirical Bayes Methods

#### Documented in bmbfse

##' Compute the standard errors for the Bayes factors estimates.
##'
##' Uses the batch means method to compute the standard errors for
##' Bayes factors.
##' @title Batch means, Bayes factors standard errors
##' @param pargrid A data frame with components "linkp", "phi", "omg",
##'   "kappa". Each row gives a combination of the parameters to
##'   compute the new standard errors.
##' @param runs A list with outputs from the function
##' @param bfsize1 A scalar or vector of the same length as
##'   \code{runs} with all integer values or all values in (0, 1]. How
##'   many samples (or what proportion of the sample) to use for
##'   estimating the Bayes factors at the first stage. The remaining
##'   sample will be used for estimating the standard errors in the
##'   second stage. Setting it to 1 will perform only the first stage.
##' @param nbatch1 A scalar or vector of the same length as
##'   \code{runs}. All values must be integers or less than 1. This is
##'   used for calculating how many batches to split each of the
##'   sample in runs for the calculation of the Bayes factors standard
##'   errors for the parameters corresponding to \code{runs}.
##' @param nbatch2 A scalar or vector of the same length as
##'   \code{runs}. All values must be integers or less than 1. This is
##'   used for calculating how many batches to split each of the
##'   sample in runs for the calculation of the Bayes factors standard
##'   errors for the parameters corresponding to \code{pargrid}.
##' @param method Which method to use to calculate the Bayes factors:
##'   Reverse logistic or Meng-Wong.
##' @param bvmethod Which method to use for the calculation of the
##'   batch variance. The standard method splits to disjoint batches.
##'   The second and third method use the spectral variance method
##'   with different lag windows.
##' @param reference Which model goes in the denominator.
##' @param transf Whether to use a transformed sample for the
##'   computations. If \code{"no"} or \code{FALSE}, it doesn't. If
##'   \code{"mu"} or \code{TRUE}, it uses the samples for the mean. If
##'   \code{"wo"} it uses an alternative transformation. The latter
##'   can be used only for the families indicated by
##'   \code{.geoBayes_models$haswo}. ##' @return A list with components ##' \itemize{ ##' \item \code{pargrid} The inputted pargrid augmented with the computed standard ##' errors. ##' \item \code{bfEstimate} The estimates of the Bayes factors ##' \item \code{bfSigma} The covariance matrix for the Bayes factors ##' estimates. ##' } ##' @references Roy, V., Tan, A. and Flegal, J. (2018). Estimating ##' standard errors for importance sampling estimators with multiple ##' Markov chains, Statistica Sinica, 28 1079-1101. ##' ##' Roy, V., & Evangelou, E. (2018). Selection of proposal ##' distributions for generalized importance sampling estimators. ##' arXiv preprint arXiv:1805.00829. ##' @useDynLib geoBayes bfse_no bfse_mu bfse_wo bfse_tr ##' @export bmbfse <- function(pargrid, runs, bfsize1 = 0.80, nbatch1 = 0.5, nbatch2 = 0.5, method = c("RL", "MW"), bvmethod = c("Standard", "TukeyHanning", "Bartlett"), reference = 1, transf = c("no", "mu", "wo")) { ## Method method <- match.arg(method) imeth <- match(method, eval(formals()$method))

## Batch variance method
bvmethod <- match.arg(bvmethod)
ibvmeth <- match(bvmethod, eval(formals()$bvmethod)) ## Check input if (any(sapply(runs, class) != "geomcmc")) { stop ("Input runs is not a list with elements of class geomcmc") } nruns <- length(runs) if (nruns == 0) stop ("No runs specified") reference <- as.integer(reference) if (isTRUE(reference < 1L | reference > nruns)) { stop("Argument reference does not correspond to a run in runs") } ## Check if fixed phi and omg if (!all(sapply(runs, function(x) length(x$FIXED$phi) == 1))) { stop("Each input runs must have a fixed value phi.") } if (!all(sapply(runs, function(x) length(x$FIXED$omg) == 1))) { stop("Each input runs must have a fixed value omg.") } ## Extract data and model nm_DATA <- c("response", "weights", "modelmatrix", "locations", "longlat") nm_MODEL <- c("family", "corrfcn", "betm0", "betQ0", "ssqdf", "ssqsc", "tsqdf", "tsqsc", "dispersion") DATA <- runs[[1]]$DATA[nm_DATA]
MODEL <- runs[[1]]$MODEL[nm_MODEL] if (nruns > 1) { for (i in 2:nruns) { if (!identical(runs[[i]]$DATA[nm_DATA], DATA)) {
stop("MCMC chains don't all correspond to the same data.")
}
if (!identical(runs[[i]]$MODEL[nm_MODEL], MODEL)) { stop("MCMC chains don't all correspond to the same model.") } } } y <- DATA$response
n <- as.integer(length(y))
l <- DATA$weights F <- DATA$modelmatrix
p <- NCOL(F)
loc <- DATA$locations dm <- sp::spDists(loc, longlat = DATA$longlat)
family <- MODEL$family ## ifam <- .geoBayes_family(family) corrfcn <- MODEL$corrfcn
icf <- .geoBayes_correlation(corrfcn)
betm0 <- MODEL$betm0 betQ0 <- MODEL$betQ0
ssqdf <- MODEL$ssqdf ssqsc <- MODEL$ssqsc
tsqdf <- MODEL$tsqdf tsqsc <- MODEL$tsqsc
dispersion <- MODEL$dispersion ## Skeleton points phi_pnts <- as.double(sapply(runs, function(r) r$FIXED$phi)) omg_pnts <- as.double(sapply(runs, function(r) r$FIXED$omg)) nu_pnts <- as.double(sapply(runs, function(r) r$FIXED$linkp_num)) if (.geoBayes_corrfcn$needkappa[icf]) {
kappa_pnts <- sapply(runs, function(r) r$FIXED$kappa)
kappa_pnts <- .geoBayes_getkappa(kappa_pnts, icf)
} else {
kappa_pnts <- rep(0, nruns)
}

## MCMC Sizes
Nout <- sapply(runs, function(x) x$MCMC$Nout)
Nout1 <- getsize(bfsize1, Nout, "*")
Ntot1 <- sum(Nout1)
Nout2 <- Nout - Nout1
Ntot2 <- sum(Nout2)

## Sizes for SE
nb1 <- getsize(nbatch1, Nout1, "^")
nb2 <- if(Ntot2 > 0) getsize(nbatch2, Nout2, "^") else rep(0L, nruns)

## Transformed sample
getsample <-
transfsample(runs,
list(response = y, family = family), transf)
sample <- matrix(unlist(getsample$sample), n) itr <- getsample$itr
transf <- getsample$transf real_transf <- getsample$real_transf
ifam <- getsample$ifam froutine <- paste0("bfse_", real_transf) if (real_transf == "wo" || real_transf == "tr") { ifam <- -ifam } ## New parameter values pargrid <- check_pargrid(pargrid, family, corrfcn) nnew <- nrow(pargrid) phi <- pargrid$phi
omg <- pargrid$omg kappa <- pargrid$kappa
nu <- pargrid$nu ## Split the sample sel1 <- rep(rep(c(TRUE, FALSE), nruns), rbind(Nout1, Nout2)) sample <- matrix(unlist(sample), n) z1 <- sample[, sel1, drop = FALSE] z2 <- sample[, !sel1, drop = FALSE] ## dispersion or tsqsc if (ifam == 0) { tsq <- tsqsc } else { tsq <- dispersion tsqdf <- 0 } ## Output SE <- numeric(nnew) bf <- numeric(nruns) Sig <- numeric((nruns - 1)^2) VT1 <- VT2 <- numeric(nnew) RUN <- .Fortran(froutine, bf = bf, Sig = Sig, SE = SE, VT1 = VT1, VT2 = VT2, as.integer(reference), as.double(phi), as.double(omg), as.double(nu), as.double(kappa), as.double(phi_pnts), as.double(omg_pnts), as.double(nu_pnts), as.double(kappa_pnts), as.double(z1), as.integer(Nout1), as.integer(Ntot1), as.double(z2), as.integer(Nout2), as.integer(Ntot2), as.double(y), as.double(l), as.double(F), as.double(dm), as.double(betm0), as.double(betQ0), as.double(ssqdf), as.double(ssqsc), as.double(tsqdf), as.double(tsq), as.integer(icf), as.integer(n), as.integer(p), as.integer(nnew), as.integer(nruns), as.integer(ifam), as.integer(imeth), as.integer(nb1), as.integer(nb2), as.integer(ibvmeth), as.integer(itr), PACKAGE = "geoBayes") ## Return out <- list() pargrid$SE <- RUN$SE/sqrt(Ntot2) out$pargrid <- pargrid
out$bfEstimate <- RUN$bf
out$bfSigma <- matrix(RUN$Sig/Ntot1, nruns - 1)
out$VT1 <- RUN$VT1/Ntot2
out$VT2 <- RUN$VT2/Ntot2
return (out)
}

groupColMeans <- function (x, i) {
## Compute the mean at each column of x by subsetting its rows
## according to the factor i. Returns a matrix with as many columns
## as x and as many rows as the number of levels in i.
x <- as.matrix(x)
rx <- nrow(x)
cx <- ncol(x)
m <- tapply(seq_len(rx), i, function(jj) colMeans(x[jj, , drop = FALSE]),
simplify = FALSE)
matrix(unlist(m), ncol = cx, byrow = TRUE)
}

bmmcse <- function(x, size)
{
## Purpose: Compute batch-means standard error of univariate MC samples
## ----------------------------------------------------------------------
## Arguments:
## x   A matrix of MC samples. The SE corresponding to each
## column of x will be returned.
## size   The size of each batch.
## ----------------------------------------------------------------------
x <- as.matrix(x)
n <- nrow(x)
if (missing(size)) {
size <- "sqroot"
} else if (length(size) != 1) stop ("Argument size must have length 1.")
if (is.character(size)) {
i <- pmatch(size, c("sqroot", "sqrt", "cuberoot", "cubert"), nomatch = 0L,
duplicates.ok = FALSE)
if (i == 1 | i == 2) {
size <- as.integer(sqrt(n))
} else if (i == 3 | i == 4) {
size <- as.integer(n^(1/3))
} else stop ("Character size must be one of sqroot or cuberoot.")
} else {
size <- as.numeric(size)
if (size <= 0) {
stop ("Argument size must be positive.")
} else if (size <= 1) {
size <- as.integer(size*n)
} else if (size > n) stop ("Argument size is larger than sample size.")
}
## Determine the number of batches and the size of each batch
nbatches <- as.integer(n/size)
if (nbatches <= 1L) stop ("Number of batches too small. Reduce size.")
nrem <- n%%nbatches
batchl <- rep(c(size + 1L, size), c(nrem, nbatches - nrem))
batchf <- rep(seq_len(nbatches), batchl)
## Compute batch mean and overall mean
bm <- groupColMeans(x, batchf)
m <- colSums(bm*batchl)/n
## Compute the batch variance
bmmm <- sweep(bm, 2, m, check.margin = FALSE)
bv <- colSums(batchl*bmmm^2)/(nbatches - 1)
sqrt(bv)
}

## .. content for \description{} (no empty lines) ..
##
## .. content for \details{} ..
## @title
## @param sample A list containing the samples from each run. Each
##   element of the list must be a matrix or a vector. If a matrix,
##   each column represents one sample.
## @param theta0 A matrix or vector containing the parameter values
##   corresponding to each run in \code{sample}. Each column of
##   \code{theta0} is a parameter.
## @param theta1 A matrix or vector containing the new parameter
##   values for which to compute the standard errors. Each column of
##   \code{theta1} is a parameter.
## @param llikfun A function that computes the log-likelihood. The
##   function is called as \code{llikfun(x,theta,...)} where x is a
##   sample and theta is a parameter vector. The function must return
##   a scalar for each \code{x,theta} pair.
## @param MoreArgs Further arguments to \code{llikfun}.
## @param bfsize1 A scalar or vector of the same length as
##   \code{sample} with all integer values or all values in (0, 1]. How
##   many samples (or what proportion of the sample) to use for
##   estimating the Bayes factors at the first stage. The remaining
##   sample will be used for estimating the standard errors in the
##   second stage. Setting it to 1 will perform only the first stage.
## @param nbatch1 A scalar or vector of the same length as \code{sample}. All
##   values must be integers or less than 1. This is used for
##   calculating how many batches to split each of the sample in runs
##   for the calculation of the Bayes factors standard errors for the
##   parameters corresponding to \code{theta0}.
## @param nbatch2 A scalar or vector of the same length as \code{sample}. All
##   values must be integers or less than 1. This is used for
##   calculating how many batches to split each of the sample in runs
##   for the calculation of the Bayes factors standard errors for the
##   parameters corresponding to \code{theta1}.
## @param method Which method to use to calculate the Bayes factors:
## Reverse logistic or Meng-Wong.
## @param bvmethod Which method to use for the calculation of the
##   batch variance. The standard method splits to disjoint batches.
##   The second and third method use the spectral variance method
##   with different lag windows.
## @param reference Which model goes in the denominator.
## @return A list with components
## \itemize{
## \item \code{logbf} The estimated Bayes factors at \code{theta0}.
## \item \code{bfSigma} The covariance matrix for the Bayes factors
## estimates in \code{logbf}.
## \item \code{SE} The standard error of the Bayes factors estimates
##   at \code{theta1}
## \item \code{VT1, VT2} The first and second variance terms for
##   \code{SE} above.
## }
bfsecalc <- function(sample, theta0, theta1, llikfun, MoreArgs = NULL,
bfsize1 = 0.8, nbatch1 = 0.5, nbatch2 = 0.5,
method = c("RL", "MW"),
bvmethod = c("Standard", "TukeyHanning", "Bartlett"),
reference = 1)
{
## Method
method <- match.arg(method)
imeth <- match(method, eval(formals()$method)) ## Batch variance method bvmethod <- match.arg(bvmethod) ibvmeth <- match(bvmethod, eval(formals()$bvmethod))

kg <- length(sample)

reference <- as.integer(reference)
if (reference < 1 || reference > kg)
stop ("Argument reference must be in between 1 and number of parameters.")

nCOL <- function(x) if (length(d <- dim(x)) > 1L) d[2L] else length(x)
Nout <- sapply(sample, nCOL)
Ntot <- sum(Nout)
Nout1 <- getsize(bfsize1, Nout, "*")
Ntot1 <- sum(Nout1)
Nout2 <- Nout - Nout1
Ntot2 <- sum(Nout2)

## Size of x
nROW <- function(x) if (length(d <- dim(x)) > 1L) d[1L] else 1L
n <- unique(sapply(sample, nROW))
if (length(n) != 1L) stop ("The dimension of the sampled variable changes.")

## Check input theta
ltht <- nROW(theta0)
if (nCOL(theta0) != kg)
stop ("Columns in theta0 must be the length of samples.")
if (nROW(theta1) != ltht)
stop ("theta0 and theta1 must have the same number of rows.")
nnew <- nCOL(theta1)
dim(theta0) <- c(ltht, kg)
dim(theta1) <- c(ltht, nnew)

## Sizes for SE
nb1 <- getsize(nbatch1, Nout1, "^")
nb2 <- getsize(nbatch2, Nout2, "^")

## Split the sample
sel1 <- rep(rep(c(TRUE, FALSE), kg), rbind(Nout1, Nout2))
sample <- matrix(unlist(sample), n)
z1 <- sample[, sel1, drop = FALSE]
z2 <- sample[, !sel1, drop = FALSE]

## Compute log-likelihoods
llik1 <- mapply(llikfun, do.call("c", apply(z1, 2, list)),
rep(do.call("c", apply(theta0, 2, list)), each = Ntot1),
MoreArgs = MoreArgs)
llik2 <- mapply(llikfun, do.call("c", apply(z2, 2, list)),
rep(do.call("c", apply(theta0, 2, list)), each = Ntot2),
MoreArgs = MoreArgs)
llikn <- mapply(llikfun, do.call("c", apply(z2, 2, list)),
rep(do.call("c", apply(theta1, 2, list)), each = Ntot2),
MoreArgs = MoreArgs)

## Compute SE
bf <- numeric(kg)
SE <- VT1 <- VT2 <- numeric(nnew)
Sig <- matrix(0, kg-1, kg-1)
Bet <- Omg <- matrix(0, kg, kg)
fcall <- .Fortran("bfsecalc", bf, Sig, SE, VT1, VT2, reference,
llik1, llik2, llikn, Nout1, Ntot1, Nout2, Ntot2,
nnew, kg, imeth, nb1, nb2, ibvmeth, Bet, Omg,
PACKAGE = "geoBayes")

Bet <- fcall[[20]]; Bet[lower.tri(Bet)] <- t(Bet)[lower.tri(Bet)]
Omg <- fcall[[21]]; Omg[lower.tri(Omg)] <- t(Omg)[lower.tri(Omg)]
BMP <- chol2inv(chol(Bet + 1/kg)) - 1/kg
BOB <- t(BMP) %*% Omg %*% BMP

## Return
out <- fcall[1:5]
names(out) <- c("logbf", "bfSigma", "SE", "VT1", "VT2")
out\$Varlogbf <- BOB
out
}


## Try the geoBayes package in your browser

Any scripts or data that you put into this service are public.

geoBayes documentation built on Feb. 28, 2019, 5:05 p.m.