R/crossval-class.R
In quadrupen: Sparsity by Worst-Case Quadratic Penalties

#' Class "cvpen"
#'
#' Class of object returned by a cross-validation performed through
#' the \code{crossval} method.
#'
#' @section Slots: \describe{
#' \item{\code{lambda1}:}{vector of \eqn{\lambda_1}{lambda1}
#' (\eqn{\ell_1}{l1} or \eqn{\ell_\infty}{infinity} penalty levels)
#' for which each cross-validation has been performed.}
#' \item{\code{lambda2}:}{vector (or scalar) of \eqn{\ell_2}{l2}-penalty levels for
#' which each cross-validation has been performed.}
#' \item{\code{lambda1.min}:}{level of \eqn{\lambda_1}{lambda1} that minimizes the
#' error estimated by cross-validation.}
#' \item{\code{lambda1.1se}:}{largest level of \eqn{\lambda_1}{lambda1} such as
#' the cross-validated error is within 1 standard error of the
#' minimum.}
#' \item{\code{lambda2.min}:}{level of \eqn{\lambda_2}{lambda2} that minimizes the
#' error estimated by cross-validation.}
#' \item{\code{cv.error}:}{a data frame containing the mean
#' cross-validated error and its associated standard error for each
#' values of \code{lambda1} and \code{lambda2}.}
#' \item{\code{folds}:}{list of \code{K} vectors indicating the folds
#' used for cross-validation.}
#' \item{\code{beta.min}:}{the vector of parameters obtained by
#' fitting the problem on the full data set \code{x} and \code{y} with
#' \code{lambda1.min} and  \code{lambda2.min} penalties.}
#' \item{\code{beta.1se}:}{the vector of parameters obtained by
#' fitting the problem on the full data set \code{x} and \code{y} with
#' \code{lambda1.1se} and \code{lambda2.min} penalties.
#' }
#'
#' }
#'
#' The specific \code{\link{plot,cvpen-method}} method is documented.
#'
#' @docType class
#'
#' @keywords class
#'
#' @seealso See also \code{\link{plot,cvpen-method}} and
#' \code{\link{crossval}}.
#'
#' @name cvpen-class
#'
#' @rdname cvpen-class
#'
#' @exportClass cvpen
#'
setClass("cvpen",
   representation = representation(
     lambda1     = "numeric",
     lambda2     = "numeric",
     lambda1.min = "numeric",
     lambda1.1se = "numeric",
     lambda2.min = "numeric",
     cv.error    = "data.frame",
     folds       = "list",
     beta.min    = "numeric",
     beta.1se    = "numeric")
)

#' Plot method for cross validated error of a \code{quadrupen} model
#'
#' Produce a plot of the cross validated error of a \code{quadrupen}
#' model.
#'
#' @usage \\S4method{plot}{cvpen}(x, y, log.scale=TRUE, reverse=FALSE,
#' plot=TRUE, main = "Cross-validation error", ...)
#' @param x output of a \code{crossval} run (must be of class
#' \code{cvpen}).
#' @param y used for S4 compatibility.
#' @param main the main title, with a hopefully appropriate default definition.
#' @param log.scale logical; indicates if a log-scale should be used
#' when \code{xvar="lambda"}. Ignored for 2D cross-validation plot.
#' @param plot logical; indicates if the graph should be plotted. Default is \code{TRUE}.
#' @param reverse logical; should the X-axis by reversed when \code{xvar=lambda}? Default is FALSE.  Ignored for 2D cross-validation plot.
#' @param ... used for S4 compatibility.
#' @return a \pkg{ggplot2} object which can be plotted via the \code{print}
#' method.
#' @name plot,cvpen-method
#' @aliases plot,cvpen-method
#' @aliases plot.cvpen
#' @docType methods
#' @rdname plot.cvpen
#'
#' @examples
#' ## Simulating multivariate Gaussian with blockwise correlation
#' ## and piecewise constant vector of parameters
#' beta <- rep(c(0,1,0,-1,0), c(25,10,25,10,25))
#' cor  <- 0.75
#' Soo  <- toeplitz(cor^(0:(25-1))) ## Toeplitz correlation for irrelevant variables
#' Sww  <- matrix(cor,10,10) ## bloc correlation between active variables
#' Sigma <- bdiag(Soo,Sww,Soo,Sww,Soo) + 0.1
#' diag(Sigma) <- 1
#' n <- 100
#' x <- as.matrix(matrix(rnorm(95*n),n,95) %*% chol(Sigma))
#' y <- 10 + x %*% beta + rnorm(n,0,10)
#'
#' ## Use fewer lambda1 values by overwritting the default parameters
#' ## and cross-validate over the sequences lambda1 and lambda2
#' \donttest{
#' cv.double <- crossval(x,y, lambda2=10^seq(2,-2,len=50), nlambda1=50)
#' }
#' ## Rerun simple cross-validation with the appropriate lambda2
#' cv.10K <- crossval(x,y, lambda2=.2)
#' ## Try leave one out also
#' cv.loo <- crossval(x,y, K=n, lambda2=0.2)
#'
#' \donttest{
#' plot(cv.double)
#' }
#' plot(cv.10K)
#' plot(cv.loo)
#'
#' ## Performance for selection purpose
#' beta.min.10K <- slot(cv.10K, "beta.min")
#' beta.min.loo <- slot(cv.loo, "beta.min")
#'
#' cat("\nFalse positives with the minimal 10-CV choice: ", sum(sign(beta) != sign(beta.min.10K)))
#' cat("\nFalse positives with the minimal LOO-CV choice: ", sum(sign(beta) != sign(beta.min.loo)))
#'
#' @importFrom graphics plot
#' @importFrom stats quantile
#' @exportMethod plot
#' @import ggplot2 scales grid
#' @export
setMethod("plot", "cvpen", definition =
  function(x, y, log.scale=TRUE, reverse=FALSE, plot=TRUE, main = "Cross-validation error", ...) {

    cv.error <- as.data.frame(x@cv.error)
    if (length(x@lambda2) > 1) {
      d <- ggplot(data=cv.error, aes_(x=~lambda1, y=~lambda2, z=~mean))
      d <- d + geom_tile(aes(fill=mean)) + stat_contour(size=0.2, binwidth=diff(range(cv.error$mean))/10) + ggtitle(main)
      d <- d + scale_x_continuous(trans=log10_trans()) + xlab(expression(log[10](lambda[1])))
      d <- d + scale_y_continuous(trans=log10_trans()) + ylab(expression(log[10](lambda[2])))
      d <- d + annotation_logticks()
      in.1se <- which(cv.error$mean - cv.error$serr <= min(cv.error$mean))
      d <- d + stat_contour(alpha=0.5, colour="#CCCCCC", size=0.65, breaks=quantile(cv.error$mean[in.1se], probs=seq(0,1,len=6)))
    } else {
      ## SIMPLE CROSS-VALIDATION GRAPH
      if (log.scale) {
        cv.error$lambda1 <- log10(cv.error$lambda1)
      }
      cv.error$ymin <- cv.error$mean-cv.error$serr
      cv.error$ymax <- cv.error$mean+cv.error$serr
      d <- ggplot(cv.error, aes_(x=~lambda1,y=~mean)) + ylab("Mean square error") + geom_point(alpha=0.3)
      d <- d + geom_line() + geom_ribbon(aes_(ymin=~ymin, ymax=~ymax), data=cv.error, alpha=0.2, stat="identity")
      if (reverse==TRUE) {
        d <- d + scale_x_reverse()
      }
      if (log.scale) {
        d <- d + xlab(expression(log[10](lambda[1])))
        d <- d + annotation_logticks(sides="b")
      } else {
        d <- d + xlab( expression(lambda[1]) )
      }
      if (log.scale) {
        lambda <- data.frame(xval=log10(c(x@lambda1.min,x@lambda1.1se)),
                             lambda.choice=factor(c("min. MSE","1-se rule")))
      } else {
        lambda <- data.frame(xval=c(x@lambda1.min,x@lambda1.1se),
                             lambda.choice=factor(c("min. MSE","1-se rule")))
      }
      d <- d + ggtitle(main) +
        geom_vline(data=lambda, aes_(xintercept=~xval,colour=~lambda.choice), linetype="dashed",  alpha=0.5, show.legend = TRUE)
    }
    ## DO THE PLOT
    if (plot) {print(d)}

    return(d)
  })