R/vd_fit.R
In VDPO: Working with and Analyzing Functional Data of Varying Lengths

Documented in vd_fit

#' Estimation of the generalized additive functional regression models for
#' variable domain functional data
#'
#' The \code{vd_fit} function fits generalized additive functional regression models
#' for variable domain functional data.
#'
#' @param formula a formula object with at least one \code{ffvd} term.
#' @param data a \code{list} object containing the response variable
#' and the covariates as the components of the list.
#' @param family a \code{family} object specifying the distribution from which the
#' data originates. The default distribution is \code{\link{gaussian}}.
#' @param offset An offset vector. The default value is \code{NULL}.
#'
#' @return An object of class \code{vd_fit}. It is a \code{list} containing the following items:
#'
#' - An item named `fit` of class \code{sop}. See \link[SOP]{sop.fit}.
#' - An item named `Beta` which is the estimated functional coefficient.
#' - An item named `theta` which is the basis coefficient of `Beta`.
#' - An item named `covar_theta` which is the covariance matrix of `theta`.
#' - An item named `M` which is the number of observations points for each curve.
#' - An item named `ffvd_evals` which is the result of the evaluations of the `ffvd`
#' terms in the formula.
#
#' @examples
#' # VARIABLE DOMAIN FUNCTIONAL DATA EXAMPLE
#'
#' # set seed for reproducibility
#' set.seed(42)
#'
#' # generate example data
#' data <- data_generator_vd(
#'   N = 100,
#'   J = 100,
#'   beta_index = 1,
#'   use_x = TRUE,
#'   use_f = TRUE,
#' )
#'
#' # Define a formula object that specifies the model behavior.
#' # The formula includes a functional form of the variable 'X_se' using 'ffvd'
#' # with a non-default number of basis functions ('nbasis' is set to c(10, 10, 10)).
#' # Additionally, it includes a smooth function 'f' applied to 'x2' with 10 segments ('nseg = 10'),
#' # a second-order penalty ('pord = 2'), and cubic splines ('degree = 3').
#' # The model also contains the linear term 'x1'.
#' formula <- y ~ ffvd(X_se, nbasis = c(10, 10, 10)) + f(x2, nseg = 10, pord = 2, degree = 3) + x1
#'
#' # We can fit the model using the data and the formula
#' res <- vd_fit(formula = formula, data = data)
#'
#' # Some important parameters of the model can be accesed as follows
#' res$Beta # variable domain functional coefficient
#' res$fit$fitted.values # estimated response variable
#'
#' # Also, a summary of the fit can be accesed using the summary function
#' summary(res)
#'
#' # And a heatmap for an specific beta can be obtained using the plot function
#' plot(res, beta_index = 1)
#'
#' @seealso \code{\link{ffvd}}
#'
#' @export
vd_fit <- function(formula, data, family = stats::gaussian(), offset = NULL) {
  if (inherits(formula, "character")) {
    formula <- stats::as.formula(formula)
  }

  if (!inherits(data, what = "list")) {
    stop("the 'data' argument should be a list", call. = FALSE)
  }

  if (length(data) < 2) {
    stop("'data' should have at least a response and a covariate", call. = FALSE)
  }

  na_indices <- c()
  for (name in names(data)) {
    if (is.null(dim(data[[name]]))) {
      na_indices <- c(na_indices, which(is.na(data[[name]])))
    }
  }

  na_indices <- unique(na_indices)

  if (length(na_indices) != 0) {
    data <- data[-na_indices, ]
  }

  vdpoenv <- environment(formula)
  vdpoenv$f <- SOP::f
  vdpons <- loadNamespace("VDPO")

  for (var in names(data)) {
    vdpoenv[[var]] <- data[[var]]
  }

  tf <- stats::terms.formula(formula, specials = c("ffvd", "f"))

  terms <- attr(tf, "term.labels")
  nterms <- length(terms)
  specials_indices <- attr(tf, "specials") # indices for the special terms

  if (attr(tf, "response")) {
    # > formula <- y ~ x + z + 1
    # > "list" "y"    "x"    "z"
    response <- as.character(attr(tf, "variables"))[2]
    variables <- as.character(attr(tf, "variables"))[-c(1, 2)]

    # If the response exists, we need to lower in the index for every
    # special term

    specials_indices <- lapply(
      specials_indices,
      function(x) if (is.null(x)) NA else x - 1
    )
  } else {
    variables <- as.character(attr(tf, "variables"))[-1]

    # No need to lower the index of the specials terms if the response exists
    specials_indices <- lapply(
      specials_indices,
      function(x) if (is.null(x)) NA else x
    )
  }

  all_indices <- seq_along(variables)
  non_special_indices <- setdiff(all_indices, unlist(specials_indices))

  specials <- length(unlist(stats::na.omit(specials_indices))) > 0

  evals <- lapply(
    terms,
    function(term) eval(parse(text = term), envir = vdpoenv, enclos = vdpons)
  )
  names(evals) <- terms

  nf <- sum(grepl("\\bf\\(\\b", names(evals)))
  nffvd <- sum(grepl("\\bffvd\\(\\b", names(evals)))

  if (nffvd == 0) {
    stop("this function should be used with at least one 'ffvd' term",
      call. = FALSE
    )
  }

  X <- c() # matrix
  Z <- c() # matrix
  G <- list() # list

  l.f <- NULL
  if (nf > 0) {
    names.fun <- NULL
    for (f_index in which(grepl("\\bf\\(\\b", names(evals)))) {
      l.f[[f_index]] <- evals[[f_index]]
      names.fun <- c(names.fun, terms[[f_index]])
      form <- stats::as.formula(paste("~", terms[[f_index]], sep = ""))
      aa <- switch(as.character(l.f[[f_index]]$dim),
        `3` = {
          l.f[[f_index]]$Xmat <- construct.3D.pspline(
            form,
            list_to_df(data, response)
          )
        },
        `2` = {
          l.f[[f_index]]$Xmat <- construct.2D.pspline(
            form,
            list_to_df(data, response)
          )
        },
        `1` = {
          l.f[[f_index]]$Xmat <- construct.1D.pspline(
            form,
            list_to_df(data, response)
          )
        }
      )
      X <- cbind(X, l.f[[f_index]]$Xmat$X)
      Z <- cbind(Z, l.f[[f_index]]$Xmat$Z)
      G <- c(G, list(l.f[[f_index]]$Xmat$g))
    }
    l.f <- l.f[lengths(l.f) != 0]

    if (length(G) == 1 && length(G[[1]]) == 1) {
      # G <- list(unlist(G))
      G <- unname(G[[1]])
      # names(G) <- names(G[[1]])
    } else {
      G <- unname(construct.capital.lambda(G))
    }

    n_coefs_f <- 0
    for (i in seq_along(l.f)) {
      aux_prod <- 1
      for (j in seq_along(l.f[[i]]$nseg)) {
        aux_prod <- aux_prod * (l.f[[i]]$nseg[j] + l.f[[i]]$degree[j] - 1)
      }
      n_coefs_f <- n_coefs_f + aux_prod
    }
  }

  if (nffvd > 0) {
    B_all <- c()
    deglist <- vector(mode = "list", length = nffvd)
    L_Phi <- vector(mode = "list", length = nffvd) # list of matrices
    B_T <- vector(mode = "list", length = nffvd) # list of matrices
    M <- vector(mode = "list", length = nffvd) # list of matrices

    ffvd_counter <- 0
    for (ffvd_evaluation in evals[grepl("ffvd", names(evals))]) {
      ffvd_counter <- ffvd_counter + 1

      B_all <- cbind(B_all, ffvd_evaluation[["B"]])
      deglist[[ffvd_counter]] <- ffvd_evaluation[["nbasis"]][2:3]

      L_Phi[[ffvd_counter]] <- ffvd_evaluation[["L_Phi"]]
      B_T[[ffvd_counter]] <- ffvd_evaluation[["B_T"]]
      M[[ffvd_counter]] <- ffvd_evaluation[["M"]]
      # TMatrix[[ffvd_counter]] <- ffvd_evaluation[["TMatrix"]]
    }

    B_res <- B2XZG(B_all, deglist)

    X <- cbind(X, B_res$X_ffvd)
    Z <- cbind(Z, B_res$Z_ffvd)
    G <- c(G, B_res$G_ffvd)
  }

  for (column_index in rev(non_special_indices)) {
    X <- cbind(evals[[column_index]], X)
  }

  if (is.null(Z)) {
    stop("please use the 'lm' function from the 'stats' package", call. = TRUE)
  }

  X <- as.matrix(cbind(rep(1, lenght = nrow(X)), X))

  G <- add_zeros_to_G(G, nf, ncol(Z))

  if (is.null(offset)) {
    nobs <- length(data[[response]])
    offset <- rep(0L, nobs)
  }

  fit <- sop.fit(
    X = X,
    Z = Z,
    G = G,
    y = data[[response]],
    family = family,
    control = list(trace = FALSE),
    offset = offset
  )

  intercept <- fit$b.fixed[1]

  Vp_tmp <- calculate_Vp_tmp(fit, non_special_indices, nf, n_coefs_f)

  theta_aux <- calculate_theta_aux(fit, non_special_indices, nf, l.f)

  covar_theta <- B_res$TMatrix %*% Vp_tmp %*% t(B_res$TMatrix)

  theta_ffvd <- B_res$TMatrix %*% theta_aux

  theta_no_functional <- calculate_theta_no_functional(fit, non_special_indices)

  theta_f <- calculate_theta_f(nf, l.f, fit, non_special_indices)
  Beta_ffvd <- calculate_beta_ffvd(nffvd, data, response, M, L_Phi, B_T, theta_ffvd, deglist)

  ffvd_evals <- process_ffvd_evals(evals)

  res <- list(
    fit = fit,
    Beta = Beta_ffvd,
    theta = theta_ffvd,
    covar_theta = covar_theta,
    M = M,
    ffvd_evals = ffvd_evals,
    theta_no_functional = theta_no_functional,
    theta_f = theta_f
  )

  class(res) <- "vd_fit"
  attr(res, "N") <- length(data[[response]])

  res
}



#' @export
summary.vd_fit <- function(object, ...) {
  base::summary(object$fit, ...)
}

#' Helper function to process ffvd evaluations
#'
#' @noRd
process_ffvd_evals <- function(evals) {
  ffvd_evals <- evals[grepl("ffvd", names(evals))]
  names <- paste0("ffvd_", gsub(".*?\\((.+?),.+", "\\1", names(ffvd_evals)))
  names(ffvd_evals) <- names
  ffvd_evals
}

#' Helper function to compute beta_ffvd
#'
#' @noRd
calculate_beta_ffvd <- function(nffvd, data, response, M, L_Phi, B_T, theta, deglist) {
  if (nffvd == 0) {
    return(NULL)
  }

  nffvd <- length(M)
  Beta_ffvd <- vector("list", nffvd)

  # Initialize Beta_ffvd matrices
  for (j in seq_len(nffvd)) {
    Beta_ffvd[[j]] <- matrix(
      NA,
      nrow = length(data[[response]]),
      ncol = max(M[[j]])
    )
  }

  # Calculate Beta_ffvd values
  theta_index <- 1
  for (j in seq_len(nffvd)) {
    for (i in seq_along(data[[response]])) {
      range_start <- M[[j]][i, 1]
      range_end <- M[[j]][i, 2]
      range <- range_start:range_end

      kron_product <- kronecker(
        L_Phi[[j]]$B[range, ],
        t(B_T[[j]]$B[i, ])
      )

      end_index <- theta_index + prod(deglist[[j]]) - 1
      Beta_ffvd[[j]][i, range] <- as.matrix(kron_product) %*% theta[theta_index:end_index]
    }
    theta_index <- theta_index + prod(deglist[[j]])
  }

  Beta_ffvd
}

#' Helper function to sum the number of column for all the matrices inside a list
#'
#' @noRd
sum_cols <- function(list, type) {
  sum(sapply(list, function(x) ncol(x$Xmat[[type]])))
}

#' Helper function to compute theta_aux
#'
#' @noRd
calculate_theta_aux <- function(fit, non_special_indices, nf, l.f = NULL) {
  # Determine case and calculate accordingly
  case_type <- list(
    has_non_special = length(non_special_indices) > 0,
    has_f = nf > 0
  )

  if (case_type$has_non_special && !case_type$has_f) {
    # Only non-special indices
    excluded_range <- 1:(length(non_special_indices) + 1)
    b_fixed_subset <- fit$b.fixed[-excluded_range]
    return(c(b_fixed_subset, fit$b.random))
  } else if (!case_type$has_non_special && case_type$has_f) {
    # Only f
    x_cols <- sum_cols(l.f, "X")
    z_cols <- sum_cols(l.f, "Z")

    b_fixed_subset <- fit$b.fixed[-c(1:x_cols)]
    b_random_subset <- fit$b.random[-c(1:z_cols)]
    return(c(b_fixed_subset, b_random_subset))
  } else if (case_type$has_non_special && case_type$has_f) {
    # Both f and non-special indices
    last_index_z <- sum_cols(l.f, "Z")
    x_cols <- sum_cols(l.f, "X")

    last_index_x <- seq(
      length(non_special_indices) + 2,
      by = 1,
      length.out = x_cols
    )

    b_fixed_subset <- fit$b.fixed[(max(last_index_x) + 1):length(fit$b.fixed)]
    b_random_subset <- fit$b.random[(last_index_z + 1):length(fit$b.random)]
    return(c(b_fixed_subset, b_random_subset))
  } else {
    # Neither f nor non-special indices
    return(c(fit$b.fixed[-1], fit$b.random))
  }
}

#' Helper function to compute theta_f
#'
#' @noRd
calculate_theta_f <- function(nf, l.f, fit, non_special_indices) {
  if (nf == 0) {
    return(NULL)
  }

  # Calculate total columns for X matrices and last index for random effects
  total_x_cols <- sum(sapply(l.f, function(x) ncol(x$Xmat$X)))
  last_random_index <- sum(sapply(l.f, function(x) ncol(x$Xmat$Z)))

  # Generate indices for fixed effects
  indices_b_fixed <- if (non_special_indices) {
    seq(
      length(non_special_indices) + 2,
      by = 1,
      length.out = total_x_cols
    )
  } else {
    1:total_x_cols
  }

  # Combine fixed and random effects
  c(
    fit$b.fixed[indices_b_fixed],
    fit$b.random[1:last_random_index]
  )
}

#' Helper function to add zeros to the different Gs
#'
#' @noRd
add_zeros_to_G <- function(G, nf, ncol_Z) {
  if (nf == 0) {
    return(G)
  }

  for (i in seq_along(G)) {
    side <- if (i <= nf) "right" else "left"
    G[[i]] <- add_zeros_to_side(G[[i]], ncol_Z, side = side)
  }

  G
}

#' Helper function to calculate theta_no_functional
#'
#' @noRd
calculate_theta_no_functional <- function(fit, non_special_indices) {
  if (length(non_special_indices) == 0) {
    return(NULL)
  }

  fit$b.fixed[2:(length(non_special_indices) + 1)]
}

#' Helper function to calculate Vp_tmp
#'
#' @noRd
calculate_Vp_tmp <- function(fit, non_special_indices, nf, n_coefs_f) {
  # Calculate total offset
  base_offset <- 1 # We always exclude at least the first row/column
  special_offset <- length(non_special_indices)
  f_offset <- if (nf > 0) n_coefs_f else 0

  total_offset <- base_offset + special_offset + f_offset

  # If the total offset is the base offset we only exclude the first row/column
  if (total_offset == 1) {
    return(fit$Vp[-1, -1])
  }

  exclude_indices <- 1:total_offset

  fit$Vp[-exclude_indices, -exclude_indices]
}

Any scripts or data that you put into this service are public.

VDPO documentation built on Oct. 21, 2024, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

VDPO
Working with and Analyzing Functional Data of Varying Lengths

R/vd_fit.R
In VDPO: Working with and Analyzing Functional Data of Varying Lengths

Defines functions calculate_Vp_tmp calculate_theta_no_functional add_zeros_to_G calculate_theta_f calculate_theta_aux sum_cols calculate_beta_ffvd process_ffvd_evals summary.vd_fit vd_fit

Documented in vd_fit

Try the VDPO package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

VDPO Working with and Analyzing Functional Data of Varying Lengths

R/vd_fit.R In VDPO: Working with and Analyzing Functional Data of Varying Lengths

Defines functions calculate_Vp_tmp calculate_theta_no_functional add_zeros_to_G calculate_theta_f calculate_theta_aux sum_cols calculate_beta_ffvd process_ffvd_evals summary.vd_fit vd_fit

Documented in vd_fit

Try the VDPO package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

VDPO
Working with and Analyzing Functional Data of Varying Lengths

R/vd_fit.R
In VDPO: Working with and Analyzing Functional Data of Varying Lengths