Nothing
# The functions in this file are used to compute the elastic net coefficient
# paths for a GLM. The main function is glm_elnet, other functions are
# auxiliaries. The L1-regularized projection path is computed by replacing the
# actual data y by the fit of the reference model when calling glm_elnet. Uses
# functions in glmfun.cpp.
standardization <- function(x, center = TRUE, scale = TRUE, weights = NULL) {
#
# return the shift and scaling for each variable based on data matrix x.
#
w <- weights / sum(weights)
if (center) {
mx <- colSums(x * w)
} else {
mx <- rep(0, ncol(x))
}
if (scale) {
sx <- apply(x, 2, weighted.sd, w)
} else {
sx <- rep(1, ncol(x))
}
return(list(shift = mx, scale = sx))
}
pseudo_data <- function(f, y, family, offset = rep(0, NROW(f)),
weights = rep(1.0, NROW(f)), obsvar = 0, wprev = NULL) {
#
# Returns locations z and weights w (inverse-variances) of the Gaussian
# pseudo-observations based on the linear approximation to the link function
# at f = eta = x*beta + beta0, as explained in McCullagh and Nelder (1989).
# Returns also the loss (= negative log likelihood) and its pointwise
# derivative w.r.t f at the current f.
#
mu <- family$linkinv(f + offset)
dmu_df <- family$mu.eta(f + offset)
z <- f + (y - mu) / dmu_df
if (family$family == "Student_t") {
# Student-t does not belong to the exponential family and thus it has its
# own way of computing the observation weights
if (is.null(wprev)) {
# initialization of the em-iteration; loop recursively until stable
# initial weights are found
wprev <- weights
while (TRUE) {
wtemp <- pseudo_data(f, y, family, offset = offset, weights = weights,
wprev = wprev, obsvar = obsvar)$wobs
if (max(abs(wtemp - wprev)) < 1e-6) {
break
}
wprev <- wtemp
}
}
# given the weights from the previous em-iteration, update s2 based on the
# previous weights and mu, and then compute new weights w
nu <- family$nu
s2 <- sum(wprev * (obsvar + (y - mu)^2)) / sum(weights)
wobs <- weights * (nu + 1) / (nu + 1 / s2 * (obsvar + (y - mu)^2))
loss <- 0.5 * sum(family$deviance(mu, y, weights, sqrt(s2)))
grad <- weights * (mu - y) / (nu * s2) * (nu + 1) /
(1 + (y - mu)^2 / (nu * s2)) * dmu_df
} else if (family$family %in% c("gaussian", "poisson", "binomial")) {
# exponential family distributions
wobs <- (weights * dmu_df^2) / family$variance(mu) # 2* because of deviance
# We can use always use `dis <- NULL` here because for the Poisson and the
# binomial family, there is no dispersion parameter and for the Gaussian
# family, `dis <- NULL` is internally replaced by `dis <- 1` which is ok
# here because `dis` simply scales and shifts the deviance (and thus `loss`
# here), see issue #200 and PR #383:
dis <- NULL
loss <- 0.5 * sum(family$deviance(mu, y, weights, dis))
grad <- -wobs * (z - f)
} else {
stop("Don't know how to compute quadratic approximation and gradients",
sprintf(" for family '%s'.", family$family))
}
return(nlist(z, wobs, loss, grad))
}
lambda_grid <- function(x, y, family, offset, weights, intercept, penalty,
obsvar = 0, alpha = 1.0, lambda_min_ratio = 1e-2,
nlam = 100) {
#
# Standard lambda sequence as described in Friedman et al. (2009), section
# 2.5. The grid will have nlam values, evenly spaced in the log-space between
# lambda_max and lambda_min. lambda_max is the smallest value for which all
# the regression coefficients will be zero (assuming alpha > 0, alpha = 0 will
# be initialized as if alpha = 0.01). Returns also the initial solution
# corresponding to the largest lambda (intercept and the unpenalized variables
# will be nonzero).
n <- dim(x)[1]
if (alpha == 0) {
# initialize ridge as if alpha = 0.01
alpha <- 0.01
}
# find the initial solution, that is, values for the intercept (if included)
# and those covariates that have penalty=0 (those which are always included,
# if such exist)
init <- glm_ridge(x[, penalty == 0, drop = FALSE], y,
family = family, lambda = 0, weights = weights,
offset = offset, obsvar = obsvar, intercept = intercept)
f0 <- init$beta0 * rep(1, n)
if (length(init$beta) > 0) {
f0 <- f0 + as.vector(x[, penalty == 0, drop = FALSE] %*% init$beta)
}
obs <- pseudo_data(f0, y, family, offset, weights, obsvar = obsvar)
resid <- obs$z - f0 # residual from the initial solution
lambda_max_cand <- abs(t(x) %*% (resid * obs$wobs)) / (penalty * alpha)
lambda_max <- max(lambda_max_cand[is.finite(lambda_max_cand)])
## to prevent some variable from entering at the first step due to numerical
## inaccuracy
lambda_max <- 1.001 * lambda_max
lambda_min <- lambda_min_ratio * lambda_max
loglambda <- seq(log(lambda_min), log(lambda_max), len = nlam)
beta <- rep(0, ncol(x))
beta[penalty == 0] <- init$beta
return(list(lambda = rev(exp(loglambda)), beta = beta,
beta0 = init$beta0, w0 = obs$wobs))
}
glm_elnet <- function(x, y, family = gaussian(), nlambda = 100,
lambda_min_ratio = 1e-3, lambda = NULL, alpha = 1.0,
qa_updates_max = ifelse(family$family == "gaussian" &&
family$link == "identity",
1, 100),
pmax = dim(as.matrix(x))[2] + 1, pmax_strict = FALSE,
weights = NULL, offset = NULL, obsvar = 0,
intercept = TRUE, normalize = TRUE, penalty = NULL,
thresh = 1e-6) {
#
# Fits GLM with elastic net penalty on the regression coefficients.
# Computes the whole regularization path.
# Does not handle any dispersion parameters.
#
family <- extend_family(family)
# ensure x is in matrix form and fill in missing weights and offsets
x <- as.matrix(x)
if (is.null(weights)) {
weights <- rep(1.0, nrow(x))
}
if (is.null(offset)) {
offset <- rep(0.0, nrow(x))
}
if (is.null(penalty)) {
penalty <- rep(1.0, ncol(x))
} else if (length(penalty) != ncol(x)) {
stop("Incorrect length of penalty vector (should be ", ncol(x), ").")
}
# standardize the features (notice that the variables are centered only if
# intercept is used because otherwise the intercept would become nonzero
# unintentionally)
transf <- standardization(x, center = intercept, scale = normalize,
weights = weights)
penalty[transf$scale == 0] <- Inf # ignore variables with zero variance
transf$scale[transf$scale == 0] <- 1
x <- t((t(x) - transf$shift) / transf$scale)
# default lambda-sequence, including optimal start point
if (is.null(lambda)) {
temp <- lambda_grid(x, y, family, offset, weights, intercept, penalty,
alpha = alpha, obsvar = obsvar, nlam = nlambda,
lambda_min_ratio = lambda_min_ratio)
lambda <- temp$lambda
w0 <- temp$w0
beta <- temp$beta
beta0 <- temp$beta0
} else {
beta <- rep(0, ncol(x))
beta0 <- 0
w0 <- weights
}
# call the C++-function that serves as the workhorse
pseudo_obs <- function(f, wprev) {
pseudo_data(f, y, family, offset = offset, weights = weights,
obsvar = obsvar, wprev = wprev)
}
out <- glm_elnet_c(
x, pseudo_obs, lambda, alpha, intercept, penalty,
thresh, qa_updates_max, pmax, pmax_strict, beta, beta0, w0
)
beta <- out[[1]]
beta0 <- as.vector(out[[2]])
# return the intercept and the coefficients on the original scale
beta <- beta / transf$scale
beta0 <- beta0 - colSums(transf$shift * beta)
return(nlist(
beta, beta0, w = out[[3]], lambda = lambda[seq_len(ncol(beta))],
npasses = out[[4]], updates_qa = as.vector(out[[5]]),
updates_as = as.vector(out[[6]])
))
}
glm_ridge <- function(x, y, family = gaussian(), lambda = 0, thresh = 1e-7,
qa_updates_max = NULL, weights = NULL, offset = NULL,
obsvar = 0, intercept = TRUE, penalty = NULL,
normalize = TRUE, beta_init = NULL, beta0_init = NULL,
ls_iter_max = 30) {
#
# Fits GLM with ridge penalty on the regression coefficients.
# Does not handle any dispersion parameters.
#
if (is.null(x)) {
x <- matrix(ncol = 0, nrow = length(y))
}
family <- extend_family(family)
if (family$family == "gaussian" && family$link == "identity") {
qa_updates_max <- 1
ls_iter_max <- 1
} else if (is.null(qa_updates_max)) {
qa_updates_max <- 100
}
if (is.null(weights)) {
weights <- rep(1.0, length(y))
}
if (is.null(offset)) {
offset <- rep(0.0, length(y))
}
if (is.null(beta0_init)) {
beta0_init <- 0
}
if (is.null(beta_init)) {
beta_init <- rep(0, NCOL(x))
}
if (intercept) {
beta_start <- c(beta0_init, beta_init)
} else {
beta_start <- beta_init
}
if (is.null(penalty)) {
penalty <- rep(1.0, NCOL(x))
}
if (length(x) == 0) {
if (intercept) {
# model with intercept only (fit like model with no intercept but with one
# constant predictor)
x <- matrix(rep(1, length(y)), ncol = 1)
w0 <- weights
pseudo_obs <- function(f, wprev)
pseudo_data(f, y, family, offset = offset, weights = weights,
obsvar = obsvar, wprev = wprev)
out <- glm_ridge_c(x, pseudo_obs, lambda, FALSE, 0, beta_start, w0,
thresh, qa_updates_max, ls_iter_max)
return(list(beta = matrix(integer(length = 0)),
beta0 = as.vector(out[[1]]), w = out[[3]], loss = out[[4]],
qa_updates = out[[5]]))
} else {
# null model with no predictors and no intercept
pseudo_obs <- function(f, wprev)
pseudo_data(f, y, family, offset = offset, weights = weights,
obsvar = obsvar, wprev = wprev)
pobs <- pseudo_obs(rep(0, length(y)), weights)
return(list(beta = matrix(integer(length = 0)), beta0 = 0, w = pobs$wobs,
qa_updates = 0))
}
}
# normal case, at least one predictor
x <- as.matrix(x) # ensure x is a matrix
# standardize the features (notice that the variables are centered only if
# intercept is used because otherwise the intercept would become nonzero
# unintentionally)
transf <- standardization(x, center = intercept, scale = normalize,
weights = weights)
penalty[transf$scale == 0] <- Inf # ignore variables with zero variance
transf$scale[transf$scale == 0] <- 1
x <- t((t(x) - transf$shift) / transf$scale)
# compute the solution
w0 <- weights
pseudo_obs <- function(f, wprev)
pseudo_data(f, y, family, offset = offset, weights = weights,
obsvar = obsvar, wprev = wprev)
out <- glm_ridge_c(x, pseudo_obs, lambda, intercept, penalty, beta_start, w0,
thresh, qa_updates_max, ls_iter_max)
beta <- out[[1]]
beta0 <- as.vector(out[[2]])
w <- out[[3]]
loss <- out[[4]]
# return the intercept and the coefficients on the original scale
beta_orig <- beta / transf$scale
beta0_orig <- beta0 - sum(transf$shift * beta_orig)
out <- nlist(beta = beta_orig, beta0 = beta0_orig, w,
qa_updates = out[[5]])
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.