#' gfpca_TwoStep
#'
#' Implements a two-step approach to generalized functional principal
#' components analysis for sparsely observed binary curves
#'
#'
#' @param data A dataframe containing observed data. Should have column names
#' \code{index} for observation times, \code{value} for observed responses,
#' and \code{id} for curve indicators.
#' @param npc prespecified value for the number of principal components (if
#' given, this overrides \code{pve}).
#' @param pve proportion of variance explained; used to choose the number of
#' principal components.
#' @param output_index Grid on which estimates should be computed. Defaults to
#' \code{NULL} and returns estimates on the timepoints in the observed dataset
#' @param type Type of estimate for the FPCs; either \code{approx} or
#' \code{naive}
#' @param basis Basis used as FPCs; in simulations, allows the use of the true
#' FPCs
#' @param nbasis Number of basis functions used in spline expansions
#' @author Jan Gertheiss \email{jan.gertheiss@@agr.uni-goettingen.de}
#' @seealso \code{\link{gfpca_TwoStep}}, \code{\link{gfpca_Bayes}}.
#' @references Gertheiss, J., Goldsmith, J., and Staicu, A.-M. (2016). A note
#' on modeling sparse exponential-family functional response curves.
#' \emph{Under Review}.
#' @examples
#'
#' \dontrun{
#' library(mvtnorm)
#' library(boot)
#' library(refund.shiny)
#'
#' ## set simulation design elements
#'
#' bf = 10 ## number of bspline fns used in smoothing the cov
#' D = 101 ## size of grid for observations
#' Kp = 2 ## number of true FPC basis functions
#' grid = seq(0, 1, length = D)
#'
#' ## sample size and sparsity
#' I <- 300
#' mobs <- 7:10
#'
#' ## mean structure
#' mu <- 8*(grid - 0.4)^2 - 3
#'
#' ## Eigenfunctions /Eigenvalues for cov:
#' psi.true = matrix(NA, 2, D)
#' psi.true[1,] = sqrt(2)*cos(2*pi*grid)
#' psi.true[2,] = sqrt(2)*sin(2*pi*grid)
#'
#' lambda.true = c(1, 0.5)
#'
#' ## generate data
#'
#' set.seed(1)
#'
#' ## pca effects: xi_i1 phi1(t)+ xi_i2 phi2(t)
#' c.true = rmvnorm(I, mean = rep(0, Kp), sigma = diag(lambda.true))
#' Zi = c.true %*% psi.true
#'
#' Wi = matrix(rep(mu, I), nrow=I, byrow=T) + Zi
#' pi.true = inv.logit(Wi) # inverse logit is defined by g(x)=exp(x)/(1+exp(x))
#' Yi.obs = matrix(NA, I, D)
#' for(i in 1:I){
#' for(j in 1:D){
#' Yi.obs[i,j] = rbinom(1, 1, pi.true[i,j])
#' }
#' }
#'
#' ## "sparsify" data
#' for (i in 1:I)
#' {
#' mobsi <- sample(mobs, 1)
#' obsi <- sample(1:D, mobsi)
#' Yi.obs[i,-obsi] <- NA
#' }
#'
#' Y.vec = as.vector(t(Yi.obs))
#' subject <- rep(1:I, rep(D,I))
#' t.vec = rep(grid, I)
#'
#' data.sparse = data.frame(
#' index = t.vec,
#' value = Y.vec,
#' id = subject
#' )
#'
#' data.sparse = data.sparse[!is.na(data.sparse$value),]
#'
#' ## fit models
#'
#' # 2-step with eigenfunctions from FPCA
#' fit.2step = gfpca_TwoStep(data = data.sparse, type="naive")
#' plot(mu)
#' lines(fit.2step$mu, col=2)
#'
#' # 2-step with eigenfunctions estimated following Hall et al. (2008)
#' fit.2step = gfpca_TwoStep(data = data.sparse, type="approx")
#' plot(mu)
#' lines(fit.2step$mu, col=2)
#' plot_shiny(fit.2step)
#'
#' }
#'
#' @export gfpca_TwoStep
#' @importFrom gamm4 gamm4
#' @import Rcpp
gfpca_TwoStep <- function(data, npc = NULL, pve = .9, output_index = NULL,
type=c("approx", "naive", "fixed"),
basis=NULL, nbasis=10){
# some data checks
if (is.null(output_index)) { output_index = sort(unique(data['index'][[1]])) }
type <- match.arg(type)
type <- switch(type, approx = "approx", naive = "naive", fixed = "fixed")
# data
Y.vec <- data['value'][[1]]
input_index <- data['index'][[1]]
id.vec <- data['id'][[1]]
D <- length(output_index)
I <- length(unique(id.vec))
Y.obs <- matrix(NA, nrow = I, ncol = D)
for (i in 1:I) {
Yi <- Y.vec[id.vec == i]
ti <- input_index[id.vec == i]
indexi <- sapply(ti, function(t) which(output_index == t))
Y.obs[i,indexi] <- Yi
}
dta = data.frame(
value = as.vector(t(Y.obs)),
id = rep(1:I, rep(D,I)),
index = rep(output_index, I)
)
# obtain efunctions and evalues using selected method
if (type == "naive") {
# a simple way to get estimates of the eigenfunctions
# Y.pca <- fpca.sc(Yi.obs, pve=pve, npc=npc)
Y.pca <- fpca.sc(Y.obs, pve = pve, npc = npc)
npc <- ncol(Y.pca$efunctions)
efunctions = Y.pca$efunctions[,1:npc]
evalues = Y.pca$evalues[1:npc]
} else if (type == "approx") {
# use HMY approach to estimate the eigenfunctions
hmy_cov <- covHall(data = data, u = output_index, bf = 10, pve = pve, eps = 0.01, nu = 1)
# obtain spectral decomposition of the covariance of X
eigen_HMY = eigen(hmy_cov)
fit.lambda = eigen_HMY$values
fit.phi = eigen_HMY$vectors
# remove negative eigenvalues
wp <- which(fit.lambda > 0)
fit.lambda_pos = fit.lambda[wp]
fit.phi <- fit.phi[,wp]
if (is.null(npc)) {
# truncate using the cumulative percentage of explained variance
npc <- which((cumsum(fit.lambda_pos)/sum(fit.lambda_pos)) > pve)[1]
}
efunctions = fit.phi[,1:npc]
evalues = fit.lambda[1:npc]
} else {
if (is.null(basis))
stop("basis has to be supplied if type = fixed")
if (nrow(basis) != D)
stop("nrow(basis) has to match grid")
if (!is.null(npc) && (ncol(basis) != npc))
stop("ncol(basis) has to match npc")
else
npc <- ncol(basis)
efunctions = basis[,1:npc]
evalues = NULL
}
# construct elements for fitting using mixed model
for (i in 1:npc) {
dta <- cbind(dta, rep(efunctions[,i], I))
}
names(dta)[4:(4 + npc - 1)] <- c(paste0("psi", 1:npc))
random.structure = paste(paste0("psi", 1:npc), collapse = "+")
random.formula = formula(paste("~(0+", random.structure, "|| id)"))
# fit using mixed model
outre <- gamm4(value ~ s(index, k = nbasis), family = "binomial", data = dta,
random = random.formula)
scores = as.matrix(coef(outre$mer)$id[,npc:1])
Z.gamm.fpca <- scores %*% t(efunctions[,1:npc])
mu <- as.vector(predict.gam(outre$gam, newdata = data.frame(index = output_index)))
Yhat <- matrix(rep(mu, I), nrow = I, byrow = TRUE) + Z.gamm.fpca
## format output
Y = data
index = output_index
family = "binomial"
Yhat = data.frame(
value = as.vector(t(Yhat)),
index = rep(output_index, I),
id = rep(1:I, each = D)
)
ret.objects = c("Yhat", "Y", "scores", "mu", "efunctions", "evalues", "npc", "index", "family")
ret = lapply(1:length(ret.objects), function(u) get(ret.objects[u]))
names(ret) = ret.objects
class(ret) = "fpca"
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.