#' Cross-validation for angularlasso
#'
#' Does k-fold cross-validation for angularlasso, produces a plot, and returns a
#' value for \code{lambda}. This function is modified based on the \code{cv}
#' function from the \code{glmnet} package.
#'
#' The function runs \code{\link{angularlasso}} \code{nfolds}+1 times; the first to
#' get the \code{lambda} sequence, and then the remainder to compute the fit
#' with each of the folds omitted. The average error and standard deviation
#' over the folds are computed.
#'
#' @aliases cv.angularlasso cv.ls cv.logit cv.angular
#' @param x matrix of predictors, of dimension \eqn{n \times p}{n*p}; each row
#' is an observation vector.
#' @param y response variable. This argument should be quantitative for
#' regression (least squares), and a two-level factor for classification
#' (logistic model, huberized SVM, squared SVM).
#' @param group a vector of consecutive integers describing the grouping of the
#' coefficients (see example below).
#' @param lambda optional user-supplied lambda sequence; default is
#' \code{NULL}, and \code{\link{angularlasso}} chooses its own sequence.
#' @param pred.loss loss to use for cross-validation error. Valid options are:
#' \itemize{ \item \code{"loss"} for classification, margin based loss
#' function. \item \code{"misclass"} for classification, it gives
#' misclassification error. \item \code{"L1"} for regression, mean absolute
#' error used by least squares regression \code{loss="ls"}, it measure the
#' deviation from the fitted mean to the response. \item \code{"L2"} for
#' regression, mean squared error used by least squares regression
#' \code{loss="ls"}, it measure the deviation from the fitted mean to the
#' response. \item \code{"cosine"} for angular regression. } Default is \code{"loss"}.
#' @param nfolds number of folds - default is 5. Although \code{nfolds} can be
#' as large as the sample size (leave-one-out CV), it is not recommended for
#' large datasets. Smallest value allowable is \code{nfolds=3}.
#' @param foldid an optional vector of values between 1 and \code{nfold}
#' identifying what fold each observation is in. If supplied, \code{nfold} can
#' be missing.
#' \code{pred.loss = "loss"}.
#' @param \dots other arguments that can be passed to angularlasso.
#' @return an object of class \code{\link{cv.angularlasso}} is returned, which is a
#' list with the ingredients of the cross-validation fit. \item{lambda}{the
#' values of \code{lambda} used in the fits.} \item{cvm}{the mean
#' cross-validated error - a vector of length \code{length(lambda)}.}
#' \item{cvsd}{estimate of standard error of \code{cvm}.} \item{cvupper}{upper
#' curve = \code{cvm+cvsd}.} \item{cvlower}{lower curve = \code{cvm-cvsd}.}
#' \item{name}{a text string indicating type of measure (for plotting
#' purposes).} \item{angularlasso.fit}{a fitted \code{\link{angularlasso}} object for the
#' full data.} \item{lambda.min}{The optimal value of \code{lambda} that gives
#' minimum cross validation error \code{cvm}.} \item{lambda.1se}{The largest
#' value of \code{lambda} such that error is within 1 standard error of the
#' minimum.}
#' @seealso \code{\link{angularlasso}}, \code{\link{plot.cv.angularlasso}},
#' \code{\link{predict.cv.angularlasso}}, and \code{\link{coef.cv.angularlasso}} methods.
#' @keywords models regression
#' @examples
#'
#' library(angularlasso)
#'
#' # load data set
#' data(bardet)
#'
#' # define group index
#' group <- rep(1:20,each=5)
#'
#' # 5-fold cross validation using group lasso
#' # penalized logisitic regression
#' cv <- cv.angularlasso(x=bardet$x, y=bardet$y, group=group, loss="ls",
#' pred.loss="L2", lambda.factor=0.05, nfolds=5)
#'
#' @export
cv.angularlasso <- function(x, y, group, lambda = NULL, pred.loss = c("misclass",
"loss", "L1", "L2", "cosine"), nfolds = 5, foldid, delta, ...) {
if (missing(pred.loss))
pred.loss <- "default" else pred.loss <- match.arg(pred.loss)
N <- nrow(x)
###Fit the model once to get dimensions etc of output
y <- drop(y)
angularlasso.object <- angularlasso(x, y, group, lambda = lambda, ...)
lambda <- angularlasso.object$lambda
# predict -> coef
if (missing(foldid))
foldid <- sample(rep(seq(nfolds), length = N)) else nfolds <- max(foldid)
if (nfolds < 3)
stop("nfolds must be bigger than 3; nfolds=10 recommended")
outlist <- as.list(seq(nfolds))
###Now fit the nfold models and store them
for (i in seq(nfolds)) {
which <- foldid == i
y_sub <- y[!which]
outlist[[i]] <- angularlasso(x = x[!which, , drop = FALSE], y = y_sub, group = group,
lambda = lambda, ...)
}
###What to do depends on the pred.loss and the model fit
fun <- paste("cv", class(angularlasso.object)[[2]], sep = ".")
cvstuff <- do.call(fun, list(outlist, lambda, x, y, foldid, pred.loss, delta))
cvm <- cvstuff$cvm
cvsd <- cvstuff$cvsd
cvname <- cvstuff$name
out <- list(lambda = lambda, cvm = cvm, cvsd = cvsd, cvupper = cvm + cvsd,
cvlo = cvm - cvsd, name = cvname, angularlasso.fit = gglasso.object)
lamin <- getmin(lambda, cvm, cvsd)
obj <- c(out, as.list(lamin))
class(obj) <- "cv.angularlasso"
obj
}
#' @export
cv.logit <- function(outlist, lambda, x, y, foldid, pred.loss, delta) {
typenames <- c(misclass = "Misclassification Error", loss = "Margin Based Loss")
if (pred.loss == "default")
pred.loss <- "misclass"
if (!match(pred.loss, c("misclass", "loss"), FALSE)) {
warning("Only 'misclass' and 'loss' available for logistic regression; 'misclass' used")
pred.loss <- "misclass"
}
prob_min <- 1e-05
fmax <- log(1/prob_min - 1)
fmin <- -fmax
###Turn y into c(0,1)
y <- as.factor(y)
y <- c(-1, 1)[as.numeric(y)]
nfolds <- max(foldid)
predmat <- matrix(NA, length(y), length(lambda))
nlams <- double(nfolds)
for (i in seq(nfolds)) {
which <- foldid == i
fitobj <- outlist[[i]]
preds <- predict(fitobj, x[which, , drop = FALSE], type = "link")
nlami <- length(outlist[[i]]$lambda)
predmat[which, seq(nlami)] <- preds
nlams[i] <- nlami
}
predmat <- pmin(pmax(predmat, fmin), fmax)
cvraw <- switch(pred.loss, loss = 2 * log(1 + exp(-y * predmat)), misclass = (y !=
ifelse(predmat > 0, 1, -1)))
N <- length(y) - apply(is.na(predmat), 2, sum)
cvm <- apply(cvraw, 2, mean, na.rm = TRUE)
cvsd <- sqrt(apply(scale(cvraw, cvm, FALSE)^2, 2, mean, na.rm = TRUE)/(N -
1))
list(cvm = cvm, cvsd = cvsd, name = typenames[pred.loss])
}
#' @export
cv.ls <- function(outlist, lambda, x, y, foldid, pred.loss, delta) {
typenames <- c(L2 = "Least-Squared loss", L1 = "Absolute loss")
if (pred.loss == "default")
pred.loss <- "L2"
if (!match(pred.loss, c("L2", "L1"), FALSE)) {
warning("Only 'L2' and 'L1' available for least squares models; 'L2' used")
pred.loss <- "L2"
}
predmat <- matrix(NA, length(y), length(lambda))
nfolds <- max(foldid)
nlams <- double(nfolds)
for (i in seq(nfolds)) {
which <- foldid == i
fitobj <- outlist[[i]]
preds <- predict(fitobj, x[which, , drop = FALSE])
nlami <- length(outlist[[i]]$lambda)
predmat[which, seq(nlami)] <- preds
nlams[i] <- nlami
}
cvraw <- switch(pred.loss, L2 = (y - predmat)^2, L1 = abs(y - predmat))
N <- length(y) - apply(is.na(predmat), 2, sum)
cvm <- apply(cvraw, 2, mean, na.rm = TRUE)
cvsd <- sqrt(apply(scale(cvraw, cvm, FALSE)^2, 2, mean, na.rm = TRUE)/(N -
1))
list(cvm = cvm, cvsd = cvsd, name = typenames[pred.loss])
}
#' @export
cv.angular <- function(outlist, lambda, x, y, foldid, pred.loss, delta) {
typenames <- c(cosine = "Cosine loss")
if (pred.loss == "default")
pred.loss <- "cosine"
predmat <- matrix(NA, length(y), length(lambda))
nfolds <- max(foldid)
nlams <- double(nfolds)
for (i in seq(nfolds)) {
which <- foldid == i
fitobj <- outlist[[i]]
preds <- predict(fitobj, x[which, , drop = FALSE])
nlami <- length(outlist[[i]]$lambda)
predmat[which, seq(nlami)] <- preds
nlams[i] <- nlami
}
cvraw <- cos(y - predmat)
N <- length(y) - apply(is.na(predmat), 2, sum)
cvm <- apply(cvraw, 2, mean, na.rm = TRUE)
cvsd <- sqrt(apply(scale(cvraw, cvm, FALSE)^2, 2, mean, na.rm = TRUE)/(N - 1))
list(cvm = cvm, cvsd = cvsd, name = typenames[pred.loss])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.