#' @title Fitting Sparse Distributional Regression Models
#'
#' @param y response variable
#' @param list_of_formulas a named list of right hand side formulas,
#' one for each parameter of the distribution specified in \code{family};
#' set to \code{~ 1} if the parameter should be treated as constant.
#' It is only required to supply the covariate names, e.g.,
#' \code{~ 1 + x + z + a + b + c}. If the argument is \code{NULL},
#' all covariates in \code{data} are used.
#' @param family a character specifying the distribution. For information on
#' possible distribution and parameters, see \code{\link{make_tfd_dist}}. Can also
#' be a custom distribution
#' @param data data.frame or named list with input features
#' @param type character; if \code{"reduced"} (default), then P-splines with
#' basis 3 and penalty order 2 are used which have a linear null space.
#' \code{"standard"} uses P-splines with basis 2 and 0-order penalty
#' @param gammas vector; of \code{length(list_of_formulas)} with
#' entries in the range [0,1]. Each value specifies the amount
#' of non-linearity in each additive predictor (0 means maximum penalization for
#' non-linear terms, 1 maximum flexibility). Only used for \code{type = "standard"}
#' @param lambdas a vector of \code{length(list_of_formulas)} specifying the amount
#' of sparsity in each additive predictor (0 means no sparsity)
#' @param sterm_options options for smooth terms defined in \code{sterm_control}
#' @param penalty_options options for penalization, see \code{?deepregression}
#' @param ... further arguments passed to \code{deepregression}
#'
#' @import deepregression
#'
#' @export
#'
#' @examples
#' library(sparsedistreg)
#'
#' set.seed(32)
#'
#' data <- data.frame(a=rnorm(100), b=rnorm(100), c=rnorm(100))
#' y <- rnorm(100) + data$a
#'
#' # fit a model with lambdas = gammas = 0
#' mod <- sparsedistreg(
#' y = y,
#' data = data,
#' type = "standard"
#' )
#'
#' mod %>% fit(epochs=1500L, early_stopping=TRUE)
#'
#' mod %>% plot()
#'
#' # fit a model with more sparsity penalty for location
#' # parameter (2) and 20/80% penalty for linear/non-linear
#' mod <- sparsedistreg(
#' y = y,
#' data = data,
#' type = "standard",
#' gammas = c(0.2,0),
#' lambdas = c(2,0)
#' )
#'
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#' mod %>% coef()
#'
#' # now set the penalty for linear effects in
#' # the location to 100%
#' mod <- sparsedistreg(
#' y = y,
#' data = data,
#' type = "standard",
#' gammas = c(1,0),
#' lambdas = c(200,0)
#' )
#'
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#' mod %>% coef()
#'
#' # penalize linear effect 100%
#' mod <- sparsedistreg(
#' y = y,
#' list_of_formulas = list(
#' ~ a,
#' ~ b + c
#' ),
#' data = data,
#' type = "standard",
#' gammas = c(1,0),
#' lambdas = c(2,0)
#' )
#'
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' # plots only the mean, so s(a)
#' mod %>% plot()
#' # linear effect completely absorbed into spline
#' mod %>% coef()
#'
#' ##### reduced form now
#' # first penalize s(a) to null effect
#' mod <- sparsedistreg(
#' y = y,
#' list_of_formulas = list(
#' ~ a,
#' ~ b + c
#' ),
#' data = data,
#' type = "reduced",
#' lambdas = c(1,0)
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#'
#' # now less lambda penalty
#'
#' mod <- sparsedistreg(
#' y = y,
#' list_of_formulas = list(
#' ~ a,
#' ~ b + c
#' ),
#' data = data,
#' type = "reduced",
#' lambdas = c(0,0)
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#'
#' # now additional term with no influence
#' mod <- sparsedistreg(
#' y = y,
#' list_of_formulas = list(
#' ~ a + b,
#' ~ c
#' ),
#' data = data,
#' type = "reduced",
#' lambdas = c(4,0),
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#'
#' # now with p-splines instead of thin-plate
#'
#' mod <- sparsedistreg(
#' y = y,
#' list_of_formulas = list(
#' ~ a + b,
#' ~ c
#' ),
#' data = data,
#' type = "reduced",
#' lambdas = c(4,0)
#' )
#' mod %>% fit(epochs=2000L, early_stopping=TRUE)
#'
#' mod %>% plot()
#'
sparsedistreg <- function(
y,
list_of_formulas = NULL,
family = "normal",
data,
type = "reduced",
gammas = rep(0, length(list_of_formulas)),
lambdas = rep(0, length(list_of_formulas)),
sterm_options = sterm_control(),
penalty_options = penalty_control(),
...
){
if(!is.null(list_of_formulas)){
if(length(gammas) != length(list_of_formulas) & type == "standard")
gammas <- fit_vector_length(gammas, length(list_of_formulas), "gamma")
if(length(lambdas) != length(list_of_formulas))
lambdas <- fit_vector_length(lambdas, length(list_of_formulas), "lambdas")
}else{
list_of_formulas <- create_full_lists(data, family)
}
if(type == "reduced")
sterm_options$sterm_default <- function(x, la)
paste0("sparse(s(", x, ", la = ", la, ", m=c(3,2), bs = 'ps'))")
list_of_formulas <- expand_forms(list_of_formulas,
controls = c(sterm_options,
penalty_options,
gammas = list(gammas),
lambdas = list(lambdas)),
type = type)
mod <- deepregression(
y = y,
list_of_formulas = list_of_formulas,
family = family,
data = data,
additional_processors = list(sparse = sparses_processor),
penalty_options = penalty_options,
...
)
class(mod) <- c("sparsedistreg", class(mod))
return(mod)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.