Nothing
#' Fit Functional Concurrent Regression
#'
#' @description
#' This function implements functional concurrent regression for sparse functional responses with both functional and scalar covariates.
#' This function is a wrapper for mgcv's \code{\link{gam}}/\code{\link{bam}}.
#'
#' @param formula
#' formula will accept any input formula which is valid for \code{\link{gam}}.
#' The formula should only include terms not associated with the random function intercept b_i(t_{ij}). See Examples.
#'
#' @param argvals a string indicating the functional domain variable name in data
#'
#' @param subj a string indicating the unique subject identifier name in data
#'
#' @param argvals.new new values of the functional domanin to predict using \code{\link{face.sparse}}, optional
#' if one desires to predict at points of the functional domain not included in the data fitting procedure,
#' they must be supplied in this argument.
#'
#' @param data dataframe including all variables of interest. Must not have any missing data for variables used in model fitting.
#' data must also not contain any variables named: "g", "phi" followed by any numbers, or "sp" followed by any numbers. These
#' names are reserved for the fitting procedure.
#'
#' @param niter number of times to iterate the covariance estimation
#'
#' @param sp logical arguement indicating whether smoothing parameters for random effects should be supplied to
#' \code{\link{gam}} or \code{\link{bam}} using estimates from \code{\link{face.sparse}}
#' (TRUE), or whether smoothing parameters for random effects should
#' be estimated by mgcv (FALSE). Defaults to FALSE.
#'
#' @param nPhi number of random effects to include in final model (i.e. number of eigenfunctions of the covariance function).
#' Default value (NULL) results in the use of all estimated random effects.
#'
#' @param use_bam logical argument indicating whether to use \code{\link{gam}} or \code{\link{bam}}.
#' For moderate or large number of eigenfunctions
#' it is recommended to use \code{\link{bam}}.
#'
#' @param discrete logical argument indicating whether whether to supple discrete = TRUE argument to \code{\link{bam}}.
#' This argument may reduce computation time, but is currently listed as ``experimental". Not available when use_bam = FALSE.
#' Defaults to FALSE.
#'
#' @param face.args list of arguments to pass to \code{\link{face.sparse}}. Can not pass the arguments ``data", ``newdata", ``center" or ``argvals.new"
#' as these are determined by the procedure.
#'
#' @param ... arguments to be passed to mgcv::gam()/bam()
#'
#' @details
#'
#' The models fit are of the form
#'
#' \deqn{y = f_0(t_{ij}) + f_1(t_{ij})X_{ij} + ... + b_i(t_{ij}) + \epsilon_{ij}}
#'
#' Note that this function will accept any valid formula for \code{\link{gam}}/\code{\link{bam}}.
#' However, only the identity link function is available at this time.
#' See the package vignettes for additional descriptions of dynamic prediction and the class of models fit by this function.
#'
#'
#' @return An object of class \code{fcr} containing five elements
#' \describe{
#' \item{fit}{An object corresponding to the fitted model from the mgcv package}
#' \item{face.object}{An object corresponding to the estimated covariance features}
#' \item{runtime}{Model fitting time}
#' \item{argvals}{Character scalar corresponding the name of the functional domain variable}
#' \item{runtime}{logical scalar corresponding to sp argument used in model fitting}}
#'
#' @examples
#'
#' \dontshow{
#' ## toy example, use small number of interior knots for fpca/small number of
#' ## eigenfunctions to speed up computation time. See example below for a more realistic analysis.
#' data <- content[1:1000,]
#' k <- 4
#' K <- 3
#' fit <- fcr(formula = Y ~ s(argvals, k = K) + Male,
#' argvals = "argvals", subj = "subj", data = data, use_bam=TRUE,
#' nPhi = 1,
#' face.arges = list(knots = k))
#' }
#'
#'
#' \donttest{
#'
#' data <- content
#' ## smoothing parameters
#' k <- 12 # number of interior knots for fpca (results in k + 3 basis functions)
#' K <- 15 # dimenson of smooth for time varying coefficients
#'
#' ## functional domain where we need predictions
#' tnew <- sort(unique(data$argvals))
#'
#' ###########################################
#' ## Step 1: Smooth time-varying covariate ##
#' ###########################################
#' dat.waz <- data.frame("y" = data$waz, "subj" = data$subj, argvals = data$argvals)
#' fit.waz <- face.sparse(dat.waz, newdata = dat.waz, knots = k, argvals.new = tnew)
#' data$wazPred <- fit.waz$y.pred
#'
#'
#' #####################
#' ## Step 2: Fit fcr ##
#' #####################
#' fit <- fcr(formula = Y ~ s(argvals, k=K, bs="ps") +
#' s(argvals, by=Male, k=K, bs="ps") +
#' s(argvals, by=wazPred, bs="ps"),
#' argvals = "argvals", subj="subj", data=data, use_bam=TRUE, argvals.new=tnew,
#' face.args = list(knots=k, pve=0.99))
#'
#' ## plot covariance features
#' plot(fit, plot.covariance=TRUE)
#'
#' ## plot coefficient functions and qq plots for random effects
#' plot(fit)
#'
#' ########################
#' ## Step 3: Prediction ##
#' ########################
#' ## data frames for in-sample and dynamic predictions
#' data_dyn <- data_in <- data
#'
#' ## change subject IDs to values not used in model fitting
#' ## for dynamic prediction
#' data_dyn$subj <- data_dyn$subj + 1000
#'
#' ## make all observations beyond 0.5 NA in both data frames
#' ## and dynamically predict the concurrent covariate in
#' ## dynamic prediction
#' inx_na <- which(data_dyn$argvals > 0.5)
#' data_dyn$Y[inx_na] <- data_dyn$waz[inx_na] <- NA
#' data_dyn$wazPred <- predict(fit.waz,
#' newdata= data.frame("subj" = data_dyn$subj,
#' "argvals" = data_dyn$argvals,
#' "y" = data_dyn$Y))$y.pred
#'
#' data_in$Y[inx_na] <- NA
#'
#'
#' ## in sample and dynamic predictions on the same subjects
#' insample_preds <- predict(fit, newdata = data)
#' dynamic_preds <- predict(fit, newdata = data_dyn)
#'
#'}
#'
#' @references
#'
#' Jaganath D, Saito M Giman RH Queirox DM, Rocha GA, Cama V, Cabrera L, Kelleher D, Windle HJ,
#' Crabtree JE, Jean E, Checkley W. First Detected Helicobacter pylori Infection in Infancy Modifies the
#' Association Between Diarrheal Disease and Childhood Growth in Peru. Helicobacter (2014); 19:272-297.
#'
#' Leroux A, Xiao L, Crainiceanu C, Checkley W (2017). Dynamic prediction in functional concurrent
#' regression with an application to child growth.
#'
#' Xiao L, Li C, Checkley W, Crainiceanu C. Fast covariance estimation for sparse functional data.
#' Statistics and Computing, (2017).
#'
#' @importFrom stats predict as.formula coef complete.cases
#'
#' @importFrom utils data
#'
#' @export
#'
fcr <- function(formula, argvals, subj, argvals.new = NULL, data = NULL, niter = 1,
sp = FALSE, nPhi = NULL, use_bam = FALSE, discrete = FALSE,
face.args = list(knots = 12, lower = -3, pve=0.95),
...){
stopifnot(class(data) == "data.frame")
stopifnot(class(subj) == "character")
stopifnot(is.list(face.args))
stopifnot(!any(c("data","newdata","center","argvals.new") %in% names(face.args)))
stopifnot(all(subj %in% names(data), argvals %in% names(data)))
if(!use_bam & discrete) stop("discrete = TRUE is only available with use_bam = TRUE")
if(any(grepl("^phi[0-9]+|^sp[0-9]+",colnames(data)))){
stop("column names `sp[0-9]+` and `phi[0-9]+ are reserved`")
}
if("g" %in% colnames(data)){
stop("column name `g` is reserved`")
}
## check to see if new predictions are in the range of the data used to fit the model
if(!is.null(argvals.new)){
if(max(argvals.new) > max(data[[argvals]]) | min(argvals.new) < min(data[[argvals]])) {
warning("Range of arvals.new exceeds the range of the funcitonal domain in the data.")
}
}
model <- as.character(formula)
stopifnot(model[1] == "~" & length(model) == 3)
outcome <- model[[2]]
if(!outcome %in% names(data)) stop("Outcome variable must be named and included in data.")
fit_pilot <- gam(as.formula(formula), data = data, ...)
curEst_fx <- fit_pilot$fitted.values
if(length(curEst_fx) != nrow(data)) stop("Method not implemented to handle missing data in model fitting. Please remove missing data and refit.")
run_time <- c()
data$g <- factor(data[[subj]])
if(is.null(argvals.new)){
ut <- sort(unique(c(data[[argvals]],
seq(min(data[[argvals]]), max(data[[argvals]]), len =100))))
} else {
ut <- sort(unique(c(argvals.new, data[[argvals]],
seq(min(data[[argvals]]), max(data[[argvals]]), len =100))))
}
for(n in 1:niter){
start_time <- proc.time()
resid1 <- curEst_fx - data[[outcome]]
datCest <- data.frame("argvals" = as.vector(data[[argvals]]),
"subj" = as.vector(data[[subj]]),
"y" = as.vector(resid1))
Cest1 <- do.call("face.sparse",
c(list(data = datCest, newdata = datCest,
center = FALSE, argvals.new = ut),
face.args))
if(is.null(nPhi)){
nPhi <- length(Cest1$eigenvalues)
} else {
nPhi <- min(length(Cest1$eigenvalues), nPhi)
}
message(paste("Number of Eigenfunctions used in estimation =", nPhi))
data <- createPhi(Cest1, data = data, argvals = argvals, nPhi = nPhi)
if(use_bam){
fit <- bam(formula = as.formula(createFormula(Cest1, formula = formula, sp = sp, nPhi = nPhi)),
data=data, discrete = discrete, ...)
} else if (!use_bam){
fit <- gam(formula = as.formula(createFormula(Cest1, formula = formula, sp = sp, nPhi = nPhi)),
data=data, ...)
}
run_time[[n]] <- proc.time() - start_time
curEst <- fit$fitted.values
if(niter > 1){
coef_names <- names(coef(fit))
phi_names <- unique(regmatches(coef_names, regexpr("phi[0-9]+", coef_names)))
curEst_fx <- predict(fit, exclude = paste("s(g):", phi_names, sep=""))
nPhi <- NULL
rm_cols <- which(grepl("^phi[0-9]+",colnames(data)))
data[rm_cols] <- NULL
}
rm(list=ls()[which(ls() %in% paste("sp",0:10000,sep=""))])
}
ret <- list("fit" = fit,
"face.object" = Cest1,
"runtime" = run_time,
"argvals" = argvals,
"sp" = sp)
class(ret) <- "fcr"
ret
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.