test_validity.R
In mcmcse: Monte Carlo Standard Errors for MCMC

set.seed(10)
library(testthat)
library(mcmcse)


context("validResults")

eureka <- function(order_max, r, g, coefs, var, a, threshold) {
  v = r[1]
  d = r[2]
  a[1] = 1.0
  coefs[1,1] = g[2]/v
  
  ans = list("vars" = var, "coefs" = coefs, "order" = order_max)
  
  if(abs(coefs[1,1]) <= threshold) {
    ans$vars = var
    ans$coefs = coefs
    ans$order = 0
    return(ans)
  }
  
  q = coefs[1,1] * r[2]
  var[1] = (1 - coefs[1,1]*coefs[1,1]) * r[1]
  
  if(order_max == 1) {
    ans$vars = var
    ans$coefs = coefs
    ans$order = 1
    return(ans)
  }
  
  for(l in 2:order_max) {
    a[l] = -d/v
    
    if(l > 2) {
      l1 = (l-2)/2
      l2 = l1+1
      
      for(j in 2:l2) {
        hold = a[j]
        k = l-j+1
        a[j] = a[j] + a[l] * a[k]
        a[k] = a[k] + a[l] * hold
      }
      
      if((2*l1) != (l-2.0))
        a[l2+1] = a[l2+1] * (1.0 + a[l])
    }
    
    v = v + a[l] * d
    coefs[l,l] = (g[l+1] - q)/v
    
    if(abs(coefs[l,l]) <= threshold) {
      ans$vars = var
      ans$coefs = coefs
      ans$order = l-1
      return(ans)
    }
    
    
    for(j in 1:(l-1)) {
      coefs[l,j] = coefs[l-1,j] + coefs[l,l] * a[l-j+1]
    }
    
    var[l] = var[l-1] * (1 - coefs[l,l]*coefs[l,l])
    
    if(l == order_max) {
      ans$vars = var
      ans$coefs = coefs
      ans$order = order_max
      return(ans)
    }
    
    d = 0.0
    q = 0.0
    
    for(i in 1:l) {
      k = l-i+2
      d = d + a[i] * r[k]
      q = q + coefs[l,i] * r[k]
    }
    
  }
  ans$vars = var
  ans$coefs = coefs
  ans$order = order_max
  return(ans)
}

ar_yw <- function(order.max, r, g, coefs, var, a, threshold, n) {
  ans = eureka(order.max, r, g, coefs, var, a, threshold)
  coef_vec = numeric(ans$order)
  
  if(ans$order> 0)  {
    coef_vec = t(ans$coefs[ans$order, 1:ans$order])
    var_pred = ans$vars[ans$order]
  }
  else{
    var_pred = r[1]
  }
  
  var_pred = var_pred*n/(n-(ans$order+1))
  
  ret = list("coefs" = coef_vec, "vars" = var_pred, "order" = ans$order)
  return(ret)
}

arp_approx <- function(xacf_vec, order.max, n)
{
  threshold = qnorm((1.95)/2)/sqrt(n)
  # Fitting a univariate AR(m) model
  ar.fit <- ar_yw(order.max, xacf_vec, xacf_vec, matrix(0, nrow = order.max, ncol = order.max), 
                  numeric(order.max), numeric(order.max), threshold, n)
  
  # estimated autocovariances
  gammas <- xacf_vec #as.numeric(acf(x, type = "covariance", lag.max = ar.fit$order, plot = FALSE)$acf)
  spec <- ar.fit$vars/(1-sum(ar.fit$coefs))^2  #asym variance
  
  if(ar.fit$order != 0)
  {
    foo <- 0
    for(i in 1:ar.fit$order)
    {
      for(k in 1:i)
      {
        foo <- foo + ar.fit$coefs[i]*k*gammas[abs(k-i)+1]
      }
    }
    Gamma <- 2*(foo + (spec - gammas[1])/2 *sum(1:ar.fit$order * ar.fit$coefs)  )/(1-sum(ar.fit$coefs))
  } else{
    Gamma <- 0
  }
  rtn <- cbind(Gamma, spec)
  colnames(rtn) <- c("Gamma", "Sigma")
  return(rtn)
}

test_batchSize <- function(x, method = c("bm", "obm", "bartlett", "tukey"), g = NULL)  {
  method = match.arg(method)
  chain <- as.matrix(x)
  if(!is.matrix(chain) && !is.data.frame(chain))
    stop("'x' must be a matrix or data frame.")
  
  if (is.function(g)) 
  {
    chain <- apply(chain, 1, g)
    
    if(is.vector(chain))
    {
      chain <- as.matrix(chain)
    }else
    {
      chain <- t(chain)
    }
  }
  
  n <- dim(chain)[1]
  p <- dim(chain)[2]
  
  if(n < (p+1))
    stop("sample size is insufficient for a Markov chain of this dimension")
  
  order.max <- min(p, n - 1L, floor(10 * log10(n))) # Maximum order up to which AR is fit
  xacf = matrix(, nrow = order.max+1, ncol = p)
  xacf = sapply(1:p, function(i) acf(chain[,i], type = "covariance", lag.max = order.max, plot =
                                       FALSE, demean=TRUE, na.action = na.pass)$acf)
  
  ar_fit <- apply(xacf, 2, arp_approx, order.max = order.max, n = n)^2
  coeff <- ( sum(ar_fit[1,])/sum(ar_fit[2,]) )^(1/3)
  # method = match.arg(method)
  
  b.const <- (3/2*n)*(method == "obm" || method == "bartlett" || method == "tukey") + (n)*(method == "bm")
  b <- b.const^(1/3) * coeff
  if(b <= 1) b <- 1
  
  b <- min(b, floor(n / (p + 1)))
  if(n > 10)
    b = min(b, floor(n/10))
  b <- floor(b)
  return(b)
}

Gibbs_sampler <- function(mu1, mu2, a, b, rho, init, n) {
  X <- matrix(0, nrow = n, ncol = 2)
  X[1, 1] = init[1]
  X[1, 2] = init[2] 
  for (i in 2:n) {
    X[i, 1] = rnorm(1, mu1 + (rho / b) * (X[i - 1, 2] - mu2), sqrt(a - (rho ^ 2) / b))
    X[i, 2] = rnorm(1, mu2 + (rho / a) * (X[i, 1] - mu1), sqrt(b - (rho ^ 2) / a))
  }
  return(X)
}

test_that("batchsize test", {
  mvg = Gibbs_sampler(2, 50, 1, 1, 0.5, c(2, 50), 1e4)
  expect_equal(test_batchSize(mvg), batchSize(mvg, fast = FALSE))
  expect_equal(test_batchSize(mvg, method = "obm"), batchSize(mvg, method = "obm", fast = FALSE))
  expect_equal(test_batchSize(mvg, method = "bartlett"), batchSize(mvg, method = "bartlett", 
                                                                   fast = FALSE))
  expect_equal(test_batchSize(mvg, method = "tukey"), batchSize(mvg, method = "tukey", 
                                                                fast = FALSE))
})

test_mcse_multi <- function(x, method = c("bm", "obm", "bartlett", "tukey", "lug"), r=3, size = NULL, g = NULL, adjust = TRUE, blather = FALSE)
{ 
  method = match.arg(method)
  
  # at some point the method used may be different
  # from the method asked. Blather will output this
  method.used <- method
  if(method == "lug")   # not releaved to the public. Inside option for developers
  {
    method = "bm"
    r <- 3
    c <- 0.5
  }
  r=1
  c = 0.5
  
  if(!is.numeric(r)) stop("r should be numeric")
  if(!is.numeric(c)) stop("c should be numeric")
  
  if(r > 5) warning("We recommend using r <=5. r = 1,2,3 are standard")
  if(r < 1)  {
    warning("r cannot be less than 1. Setting r = 3")
    r = 3
  }
  if(c > 1) {
    warning("c cannot be greater than 1. Setting c = 0.5")
    c = 0.5
  }
  
  # making matrix compatible and applying g
  chain <- as.matrix(x)
  if(!is.matrix(chain) && !is.data.frame(chain))
    stop("'x' must be a matrix or data frame.")
  
  if (is.function(g)) 
  {
    chain <- apply(chain, 1, g)
    
    if(is.vector(chain))
    {
      chain <- as.matrix(chain)
    }else
    {
      chain <- t(chain)
    }
  }
  
  
  ## Setting dimensions on the mcmc output. 
  n = dim(chain)[1]
  p = dim(chain)[2]
  
  if(n < (p+1))
    stop("sample size is insufficient for a Markov chain of this dimension")
  
  # Setting batch size based on chosen option
  if(is.null(size))
  {
    b <- batchSize(x = x, method = method)  # optimal
  }
  else if(size == "sqroot")
  {
    b <- floor(sqrt(n))
  } 
  else if(size == "cuberoot") {
    b <- floor(n^(1/3))
  } 
  else {
    if (!is.numeric(size) || size < 1 || size >= n || floor(n/size) <=1) {
      warning("size is either too large, too small, or not a number. Setting 'size' to the optimal
              value")
      size = batchSize(x = x, method = method, g = g)
    }
    
    
    b <- floor(size)
  }
  a <- floor(n/b)
  if(b == 1 && r != 1)
  {
    r = 1
  }
  ########################## 
  
  
  ## Overall means of the mcmc output
  mu.hat <- colMeans(chain)  # this is based on the full output. not n = a*b
  
  message <- ""   # will store some info for blather
  
  if(b < (2*r)) {
    r = 1       
    message = paste(message, "estimated batch size is low, lugsail not required")
  }

  ## Setting matrix sizes to avoid dynamic memory 
  sig.mat = matrix(0, nrow = p, ncol = p)
  sig.sum = matrix(0, nrow = p, ncol = p)
  
  ## only does bm and obm
  if(method != "bm" && method != "obm" && method != "bartlett" && method != "tukey")
  {
    stop("No such method available")
  }
  
  if(b == 1)
    sig.mat = var(chain)
  else  {
    if(method == "bm")
    {
      a = floor(n/b)
      y.bar <- matrix(0,nrow = a, ncol = p)
      y.bar <- apply(chain, 2, function(x) sapply(1:a, function(k) mean(x[((k-1)*b+1):(k*b)])))
      for(i in 1:a)
      {
        sig.sum = sig.sum + tcrossprod(y.bar[i,] - mu.hat)
      }
      
      sig.mat <- b*sig.sum/(a-1)
    }
    
    
    if(method == "obm")
    {
      a = n-b+1
      y.bar <- matrix(0,nrow = a, ncol = p)
      y.bar <- apply(chain, 2, function(x) sapply(1:a, function(k) mean(x[((k-1)*b+1):(k*b)])))
      y.bar <- apply(chain, 2, function(x) sapply(1:a, function(k) mean(x[k:(k+b-1)])))
      for(i in 1:a)
      {
        sig.sum = sig.sum + tcrossprod(y.bar[i,] - mu.hat)
      }
      
      sig.mat <- b*sig.sum/n
      
    }
    
    ## Modified Bartlett Window
    
    if(method == "bartlett")
    {
      chain <- scale(chain, center = mu.hat, scale = FALSE)
      dummy <- lapply(1:(b-1), function(j) (1 - j/b)*(  t(chain[1:(n-j), ])%*%chain[(j+1):n, ] 
                                                        + t(chain[(1+j):(n), ])%*%chain[1:(n-j), ]  ) )
      
      sig.sum <- t(chain)%*%chain + Reduce('+', dummy)
      
      sig.mat <- sig.sum/n
    }
    
    if(method == "tukey")
    {
      chain <- scale(chain, center = mu.hat, scale = FALSE)
      dummy <- lapply(1:(b-1), function(j) ((.5)*(1 + cos(pi * j/b))*t(chain[1:(n-j), ]))%*%chain[(j+1):n, ] 
                      + ((.5)*(1 + cos(pi * j/b))*t(chain[(1+j):(n), ]))%*%chain[1:(n-j), ])
      
      sig.sum <- t(chain[1:(n), ])%*%chain[(1):n, ] + Reduce('+', dummy)
      sig.mat <- sig.sum/n
      
    }
  }
  
  
  
  # ## Batch Means
  # if(method == "bm")
  # {
  # 
  #   for(i in 1:a)
  #   {
  #     batch_means[i,] = colMeans(chain[((i-1)*b+1): (i*b),])
  #   }
  # 
  #   for(j in 1:a)
  #   {
  #     sig.mat <- sig.mat + tcrossprod(batch_means[j,] - mu.hat)
  #   }
  # 
  #   sig.mat <- b*sig.mat/(a-1)
  #   m <- a - 1
  # }

  # ## Modified Bartlett Window
  # 
  # if(method == "bartlett" || method == "tukey")
  # {
  #   m <- n - b
  #   lag <- function(x)
  #   { 
  #     ifelse(method == "bartlett",  (1 - x/b), (1 + cos(pi * x/b))/2 ) 
  #   }
  #   
  #   ## centering
  #   for(i in 1:n)
  #   {
  #     chain[i,] <- chain[i,] - mu.hat
  #   }
  #   
  #   for(s in 1:(b-1))
  #   {
  #     for(j in 1: (n-s))
  #     {
  #       foo <- chain[j,]%*%t(chain[j+s,])
  #       sig.mat <- sig.mat + lag(s)*(foo + t(foo))
  #     }
  #   }
  #   sig.mat <- (sig.mat + t(chain)%*%chain)/n
  # }
  
  sig.eigen = eigen(sig.mat, only.values = TRUE)$values

  value = list("cov" = sig.mat, "est" = mu.hat, "nsim" = n, "eigen_values" = sig.eigen)
  class(value) = "mcmcse"
  value
}

n <- 1e3
p <- 3
b <- floor(sqrt(n))
out <- matrix(rnorm(n*p), nrow = n, ncol = p)

mbatch <- test_mcse_multi(out)
mobm <- test_mcse_multi(out, method = "obm")
mbart <- test_mcse_multi(out, method = "bartlett")
mtukey <- test_mcse_multi(out, method = "tukey")

test_that("test if functions return correct values for mcse.multi", {
  
  expect_equal(mcse.multi(out, r=1, adjust = FALSE), mbatch)
  expect_equal(mcse.multi(out, method = "obm", r=1, adjust = FALSE), mobm)
  expect_equal(mcse.multi(out, method = "bartlett", r=1, adjust = FALSE), mbart)
  expect_equal(mcse.multi(out, method = "tukey", r=1, adjust = FALSE), mtukey)
  
})


test_mcse.q = function(x, q, size = NULL, g = NULL, method = c("bm", "obm", "sub"), warn = FALSE)
{
  method = match.arg(method)
  if(is.function(g))
    x = sapply(x, g)
  
  n = length(x)
  
  if (n < 1000)
  {
    if (warn)
      warning("too few samples (less than 1,000)")
    if (n < 10)
      return(NA)
  }
  
  if(is.null(size)) {
    b <- batchSize(x = x, method = method)
  } else if(size == "sqroot")  {
    b <- floor(sqrt(n))
  } else if(size == "cuberoot") {
    b <- floor(n^(1/3))
  } else  {
    if (!is.numeric(size) || size < 1 || size >= n || floor(n/size) <=1) {
      warning("size is either too large, too small, or not a number. Setting 'size' to n^(1/2)")
      size = sqrt(n)
    }
    
    b <- floor(size)
  }
  
  a <- floor(n/b)
  
  counting = function(var.vector, var.number)
  {
    return(length(var.vector[var.vector <= var.number]))
  }
  
  if (! is.numeric(q) || q <= 0 || q >= 1)    {
    stop("'q' must be from (0, 1).")
  }
  
  quant = function(input) { quantile(input, prob = q, type = 1, names = FALSE) }
  
  if (method == "bm")
  {
    xi.hat = quant(x)
    y = sapply(1:a, function(k) return(counting(x[((k - 1) * b + 1):(k * b)], xi.hat))) / b
    mu.hat = mean(y)
    var.hat = b * sum((y - mu.hat)^2) / (a - 1)
    f.hat.junk = density(x, from = xi.hat, to = xi.hat, n = 1)
    f.hat = f.hat.junk$y
    se = sqrt(var.hat / n) / f.hat
  }
  else if (method == "obm")
  {
    xi.hat = quant(x)
    a = n - b + 1
    y = sapply(1:a, function(k) return(counting(x[k:(k + b - 1)], xi.hat))) / b
    mu.hat = mean(y)
    var.hat = n * b * sum((y - mu.hat)^2) / (a - 1) / a
    f.hat.junk = density(x, from = xi.hat, to = xi.hat, n = 1)
    f.hat = f.hat.junk$y
    se = sqrt(var.hat / n) / f.hat
  } 
  else # method == "sub"
  {
    xi.hat = quant(x)
    a = n - b + 1
    y = sapply(1:a, function(k) return(quant(x[k:(k + b - 1)])))
    mu.hat = mean(y)
    var.hat = n * b * sum((y - mu.hat)^2) / (a - 1) / a
    se = sqrt(var.hat / n)
  }
  value = list("est" = xi.hat, "se" = se, "nsim" = n)
  value
}

n = 1e4
out = double(n)
out[1] = 2
for (i in 1:(n - 1))
  out[i + 1] = 0.9 * out[i] + rnorm(1)

mbatch <- test_mcse.q(out, q = 0.7)
mobm <- test_mcse.q(out, q = 0.7, method = "obm")
msub <- test_mcse.q(out, q = 0.7, method = "sub")

test_that("test if functions return correct values for mcse.multi", {
  
  expect_equal(mcse.q(out, q = 0.7), mbatch)
  expect_equal(mcse.q(out, q = 0.7, method = "obm"), mobm)
  expect_equal(mcse.q(out, q = 0.7, method = "sub"), msub)
  
})





test_that("ess performs univariate sampling and output makes sense", {
  chain = Gibbs_sampler(2, 50, 1, 1, 0.5, c(2, 50), 1e4)
  ess_est = ess(chain)
  expect_equal(length(ess_est), 2)
})

test_that("mcse performs univariate output analysis",{
  chain = Gibbs_sampler(2, 50, 1, 1, 0.5, c(2, 50), 1e4)
  foo = mcse.mat(chain)
  expect_equal(length(foo[,1]), 2)
})

# test_that("batchsize follows constraints", {
#   chain = matrix(rnorm(4), nrow = 2)
#   expect_error(batchSize(chain), "sample size is insufficient for a Markov chain of this dimension")
#   
#   x = numeric(20)
#   x[1] = 1
#   for(i in 2:20)
#     x[i] = 0.99999*x[i-1] + rnorm(1, sd = 0.01)
#   expect_lte(batchSize(x), 2)
# })