#' gfpca_Bayes
#'
#' Implements a Bayesian approach to generalized functional principal
#' components analysis for sparsely observed binary curves
#'
#'
#' @param data A dataframe containing observed data. Should have column names
#' \code{index} for observation times, \code{value} for observed responses,
#' and \code{id} for curve indicators.
#' @param npc Number of FPC basis functions to estimate; defaults to 3
#' @param output_index Grid on which estimates should be computed. Defaults to
#' \code{NULL} and returns estimates on the timepoints in the observed dataset
#' @param nbasis Number of basis functions used in spline expansions; defaults to 10
#' @param iter Number of sampler iterations; defaults to 1000
#' @param warmup Number of iterations discarded as warmup; defaults to 400
#' @param method Bayesian fitting method; \code{vb} (default) or \code{sampling}
#'
#' @author Jeff Goldsmith \email{jeff.goldsmith@@columbia.edu} and John Muschelli
#'
#' @references Gertheiss, J., Goldsmith, J., and Staicu, A.-M. (2016). A note
#' on modeling sparse exponential-family functional response curves.
#' \emph{Under Review}.
#' @examples
#'
#' \dontrun{
#' library(mvtnorm)
#' library(boot)
#' library(refund.shiny)
#'
#' ## set simulation design elements
#'
#' bf = 10 ## number of bspline fns used in smoothing the cov
#' D = 101 ## size of grid for observations
#' Kp = 2 ## number of true FPC basis functions
#' grid = seq(0, 1, length = D)
#'
#' ## sample size and sparsity
#' I <- 300
#' mobs <- 7:10
#'
#' ## mean structure
#' mu <- 8*(grid - 0.4)^2 - 3
#'
#' ## Eigenfunctions /Eigenvalues for cov:
#' psi.true = matrix(NA, 2, D)
#' psi.true[1,] = sqrt(2)*cos(2*pi*grid)
#' psi.true[2,] = sqrt(2)*sin(2*pi*grid)
#'
#' lambda.true = c(1, 0.5)
#'
#' ## generate data
#'
#' set.seed(1)
#'
#' ## pca effects: xi_i1 phi1(t)+ xi_i2 phi2(t)
#' c.true = rmvnorm(I, mean = rep(0, Kp), sigma = diag(lambda.true))
#' Zi = c.true %*% psi.true
#'
#' Wi = matrix(rep(mu, I), nrow=I, byrow=T) + Zi
#' pi.true = inv.logit(Wi) # inverse logit is defined by g(x)=exp(x)/(1+exp(x))
#' Yi.obs = matrix(NA, I, D)
#' for(i in 1:I){
#' for(j in 1:D){
#' Yi.obs[i,j] = rbinom(1, 1, pi.true[i,j])
#' }
#' }
#'
#' ## "sparsify" data
#' for (i in 1:I)
#' {
#' mobsi <- sample(mobs, 1)
#' obsi <- sample(1:D, mobsi)
#' Yi.obs[i,-obsi] <- NA
#' }
#'
#' Y.vec = as.vector(t(Yi.obs))
#' subject <- rep(1:I, rep(D,I))
#' t.vec = rep(grid, I)
#'
#' data.sparse = data.frame(
#' index = t.vec,
#' value = Y.vec,
#' id = subject
#' )
#'
#' data.sparse = data.sparse[!is.na(data.sparse$value),]
#'
#' ## fit Bayesian models
#' fit.Bayes = gfpca_Bayes(data = data.sparse)
#' plot(mu)
#' lines(fit.Bayes$mu, col=2)
#' plot_shiny(fit.Bayes)
#' }
#'
#' @export gfpca_Bayes
#' @import rstan
#' @importFrom splines bs
gfpca_Bayes <- function(data, npc = 3, output_index = NULL, nbasis = 10, iter = 1000, warmup = 400,
method = c("vb", "sampling")){
method = match.arg(method)
## implement some data checks
if (is.null(output_index)) { output_index = sort(unique(data['index'][[1]])) }
I = length(unique(data['id'][[1]]))
D = length(output_index)
X.des = matrix(1, nrow = I, ncol = 1)
p = 1
BS_fit = bs(data['index'][[1]], df = nbasis, intercept = TRUE, degree = 3)
BS_output = bs(output_index, df = nbasis, intercept = TRUE, degree = 3)
alpha = .1
diff0 = diag(1, D, D)
diff2 = matrix(rep(c(1, -2, 1, rep(0, D - 2)), D - 2)[1:((D - 2)*D)], D - 2, D, byrow = TRUE)
P0 = t(BS_output) %*% t(diff0) %*% diff0 %*% BS_output
P2 = t(BS_output) %*% t(diff2) %*% diff2 %*% BS_output
P.mat = alpha * P0 + (1 - alpha) * P2
## format sparse data for stan fitting
Y.vec.obs = data['value'][[1]]
subject.obs = data['id'][[1]]
n.total = length(Y.vec.obs)
## fit model using STAN
stanfit = stanmodels$gfpca
dat = list(Y = Y.vec.obs, X = X.des, BS = BS_fit,
subjId = subject.obs,
N = n.total, I = I, D = D, p = p, Kt = nbasis, Kp = npc,
PenMat = P.mat)
if (method == "vb") {
GenFPCA.fit = vb(stanfit, data = dat, iter = iter)
} else if (method == "sampling") {
GenFPCA.fit = sampling(stanfit,
data = dat, iter = iter,
warmup = warmup,
control = list(adapt_delta = .65),
chains = 1, verbose = FALSE)
}
## post process to obtain point estimates
beta.post = extract(GenFPCA.fit, "beta")$beta
beta_psi.post = extract(GenFPCA.fit, "beta_psi")$beta_psi
c.post = extract(GenFPCA.fit, "c")$c
betaHat.post = array(NA, dim = c(p, D, dim(c.post)[1]))
for (i in 1:dim(c.post)[1]) {
betaHat.post[,,i] = (beta.post[i,,] %*% t(BS_output))
}
y.post = z.post = array(NA, dim = c(I, D, dim(c.post)[1]))
for (i in 1:dim(c.post)[1]) {
y.post[,,i] = X.des %*% (beta.post[i,,] %*%
t(BS_output)) + c.post[i,,] %*%
(beta_psi.post[i,,] %*% t(BS_output))
z.post[,,i] = c.post[i,,] %*% (beta_psi.post[i,,] %*% t(BS_output))
}
z_pm = apply(z.post, c(1,2), mean)
Yhat_matrix = apply(y.post, c(1,2), mean)
## obtain orthogonal estimate of FPC quantities
decomp = svd(z_pm)
evalues = decomp$d[1:npc]
efunctions = decomp$v[, 1:npc]
scores = decomp$u[, 1:npc]
## format output
mu = as.vector(apply(betaHat.post, c(1,2), mean))
Y = data
index = output_index
family = "binomial"
Yhat = data.frame(
value = as.vector(t(Yhat_matrix)),
index = rep(output_index, I),
id = rep(1:I, each = D)
)
ret.objects = c("Yhat", "Y", "scores", "mu", "efunctions", "evalues", "npc", "index", "family")
ret = lapply(1:length(ret.objects), function(u) get(ret.objects[u]))
names(ret) = ret.objects
class(ret) = "fpca"
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.