R/main.R

Defines functions sparsedistreg

Documented in sparsedistreg

#' @title Fitting Sparse Distributional Regression Models
#'
#' @param y response variable
#' @param list_of_formulas a named list of right hand side formulas,
#' one for each parameter of the distribution specified in \code{family};
#' set to \code{~ 1} if the parameter should be treated as constant.
#' It is only required to supply the covariate names, e.g.,
#' \code{~ 1 + x + z + a + b + c}. If the argument is \code{NULL},
#' all covariates in \code{data} are used.
#' @param family a character specifying the distribution. For information on
#' possible distribution and parameters, see \code{\link{make_tfd_dist}}. Can also
#' be a custom distribution
#' @param data data.frame or named list with input features
#' @param type character; if \code{"reduced"} (default), then P-splines with 
#' basis 3 and penalty order 2 are used which have a linear null space. 
#' \code{"standard"} uses P-splines with basis 2 and 0-order penalty 
#' @param gammas vector; of \code{length(list_of_formulas)} with 
#' entries in the range [0,1]. Each value specifies the amount
#' of non-linearity in each additive predictor (0 means maximum penalization for
#' non-linear terms, 1 maximum flexibility). Only used for \code{type = "standard"}
#' @param lambdas a vector of \code{length(list_of_formulas)} specifying the amount
#' of sparsity in each additive predictor (0 means no sparsity)
#' @param sterm_options options for smooth terms defined in \code{sterm_control}
#' @param penalty_options options for penalization, see \code{?deepregression}
#' @param ... further arguments passed to \code{deepregression}
#'
#' @import deepregression
#'
#' @export
#'
#' @examples
#' library(sparsedistreg)
#' 
#' set.seed(32)
#' 
#' data <- data.frame(a=rnorm(100), b=rnorm(100), c=rnorm(100))
#' y <- rnorm(100) + data$a
#'
#' # fit a model with lambdas = gammas = 0
#' mod <- sparsedistreg(
#'   y = y,
#'   data = data,
#'   type = "standard"
#' )
#' 
#' mod %>% fit(epochs=1500L, early_stopping=TRUE)
#' 
#' mod %>% plot()
#'
#' # fit a model with more sparsity penalty for location
#' # parameter (2) and 20/80% penalty for linear/non-linear
#' mod <- sparsedistreg(
#'   y = y,
#'   data = data,
#'   type = "standard",
#'   gammas = c(0.2,0),
#'   lambdas = c(2,0)
#' )
#'
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#' mod %>% coef()
#'
#' # now set the penalty for linear effects in
#' # the location to 100%
#' mod <- sparsedistreg(
#'   y = y,
#'   data = data,
#'   type = "standard",
#'   gammas = c(1,0),
#'   lambdas = c(200,0)
#' )
#'
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#' mod %>% coef()
#'
#' # penalize linear effect 100%
#' mod <- sparsedistreg(
#'   y = y,
#'   list_of_formulas = list(
#'   ~ a,
#'   ~ b + c
#'   ),
#'   data = data,
#'   type = "standard",
#'   gammas = c(1,0),
#'   lambdas = c(2,0)
#' )
#'
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' # plots only the mean, so s(a)
#' mod %>% plot()
#' # linear effect completely absorbed into spline
#' mod %>% coef()
#' 
#' ##### reduced form now
#' # first penalize s(a) to null effect
#' mod <- sparsedistreg(
#'   y = y,
#'   list_of_formulas = list(
#'   ~ a,
#'   ~ b + c
#'   ),
#'   data = data,
#'   type = "reduced",
#'   lambdas = c(1,0)
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#' 
#' mod %>% plot()
#' 
#' # now less lambda penalty
#' 
#' mod <- sparsedistreg(
#'   y = y,
#'   list_of_formulas = list(
#'   ~ a,
#'   ~ b + c
#'   ),
#'   data = data,
#'   type = "reduced",
#'   lambdas = c(0,0)
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#' 
#' mod %>% plot()
#' 
#' # now additional term with no influence
#' mod <- sparsedistreg(
#'   y = y,
#'   list_of_formulas = list(
#'   ~ a + b,
#'   ~ c
#'   ),
#'   data = data,
#'   type = "reduced",
#'   lambdas = c(4,0),
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#' 
#' mod %>% plot()
#' 
#' # now with p-splines instead of thin-plate
#' 
#' mod <- sparsedistreg(
#'   y = y,
#'   list_of_formulas = list(
#'   ~ a + b,
#'   ~ c
#'   ),
#'   data = data,
#'   type = "reduced",
#'   lambdas = c(4,0)
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#' 
#' mod %>% plot()
#' 
sparsedistreg <- function(
  y,
  list_of_formulas = NULL,
  family = "normal",
  data,
  type = "reduced",
  gammas = rep(0, length(list_of_formulas)),
  lambdas = rep(0, length(list_of_formulas)),
  sterm_options = sterm_control(),
  penalty_options = penalty_control(),
  ...
){

  if(!is.null(list_of_formulas)){

    if(length(gammas) != length(list_of_formulas) & type == "standard")

      gammas <- fit_vector_length(gammas, length(list_of_formulas), "gamma")

    if(length(lambdas) != length(list_of_formulas))

      lambdas <- fit_vector_length(lambdas, length(list_of_formulas), "lambdas")
    
  }else{

    list_of_formulas <- create_full_lists(data, family)

  }

  if(type == "reduced")
    sterm_options$sterm_default <-  function(x, la) 
      paste0("sparse(s(", x, ", la = ", la, ", m=c(3,2), bs = 'ps'))")
    
  
  list_of_formulas <- expand_forms(list_of_formulas, 
                                   controls = c(sterm_options, 
                                                penalty_options,
                                                gammas = list(gammas),
                                                lambdas = list(lambdas)),
                                   type = type)
  

  mod <- deepregression(
    y = y,
    list_of_formulas = list_of_formulas,
    family = family,
    data = data,
    additional_processors = list(sparse = sparses_processor),
    penalty_options = penalty_options,
    ...
  )

  class(mod) <- c("sparsedistreg", class(mod))

  return(mod)

}
neural-structured-additive-learning/sparsedistreg documentation built on May 13, 2022, 3:56 a.m.