R/resamples.R

"resamples" <- function(x, ...) UseMethod("resamples")

resamples.default <- function(x, modelNames = names(x), ...)
  {

    ## Do a lot of checkin of the object types and make sure
    ## that they actually used the samples samples in the resamples
    if(length(x) < 2) stop("at least two train objects are needed")
    classes <- unlist(lapply(x, function(x) class(x)[1]))
    if(!all(classes %in% c("sbf", "rfe", "train"))) stop("all objects in x must of class train, sbf or rfe")

 
    numResamp <- unlist(lapply(x, function(x) length(x$control$index)))
    if(length(unique(numResamp)) > 1) stop("There are different numbers of resamples in each model")
    
    
    for(i in 1:numResamp[1])
      {
        indices <- lapply(x, function(x, i) sort(x$control$index[[1]]), i = i)
        uniqueIndex <- length(table(table(unlist(indices))))
        if(length(uniqueIndex) > 1) stop("The samples indices are not equal across resamples")
      }

    getNames <- function(x)
      {
        switch(class(x)[1],
               sbf = x$metrics,
               train =, rfe = x$perfNames)               
      }

    getTimes <- function(x)
      {
        out <- rep(NA, 3)
        if(all(names(x) != "times")) return(out)
        if(any(names(x$times) == "everything")) out[1] <- x$time$everything[3]
        if(any(names(x$times) == "final")) out[2] <- x$time$final[3]
        if(any(names(x$times) == "prediction")) out[3] <- x$time$prediction[3]
        out
      }
    
    perfs <- lapply(x, getNames)
    if(length(unique(unlist(lapply(perfs, length)))) != 1) stop("all objects must has the same performance metrics")
    perfs <- do.call("rbind", perfs)
    uniques <- apply(perfs, 2, function(x) length(unique(x)))
    if(!all(uniques == 1)) stop("all objects must has the same performance metrics")
    pNames <- unique(as.vector(perfs))

    
    if(is.null(modelNames)) modelNames <- paste("Model", seq(along = x))
    for(i in seq(along = x))
      {

        ## TODO check for returnResamp in control object and select appropriate
        ## data for train
        if(class(x[[i]])[1] == "rfe" && x[[i]]$control$returnResamp == "all")
          {
            x[[i]]$resample <- subset(x[[i]]$resample, Variables == x[[i]]$bestSubset)
          }
##        if(class(x[[i]])[1] == "train" && x[[i]]$control$returnResamp == "all")
##          {
##            x[[i]]$resample <- merge(x[[i]]$resample, x[[i]]$bestTune)
##          }          
        tmp <- x[[i]]$resample[, c(pNames, "Resample"), drop = FALSE]
        names(tmp)[names(tmp) %in% pNames] <- paste(modelNames[i], names(tmp)[names(tmp) %in% pNames], sep = "~")
        out <- if(i == 1) tmp else merge(out, tmp)
      }
    if(any(unlist(lapply(x, function(x) x$control$returnResamp)) != "final")) stop("some model did not have 'returnResamp=\"final\"")

    timings <- do.call("rbind", lapply(x, getTimes))
    colnames(timings) <- c("Everything", "FinalModel", "Prediction")

    
    out <- structure(
                     list(
                          call = match.call(),
                          values = out,
                          models = modelNames,
                          metrics = pNames,
                          timings = as.data.frame(timings),
                          methods = unlist(lapply(x, function(x) x$method))),
                     class = "resamples")
    
    
    out
  }


prcomp.resamples <- function(x, metric = x$metrics[1],  ...)
  {
    
    if(length(metric) != 1) stop("exactly one metric must be given")

    tmpData <- x$values[, grep(paste("~", metric, sep = ""),
                               names(x$value),
                               fixed = TRUE),
                        drop = FALSE]
    names(tmpData) <- gsub(paste("~", metric, sep = ""),
                           "",
                           names(tmpData),
                           fixed = TRUE)

    tmpData <- t(tmpData)
    out <- prcomp(tmpData)
    out$metric <- metric
    out$call <- match.call()
    class(out) <- c("prcomp.resamples", "prcomp")
    out
  }


"cluster" <- function(x, ...) UseMethod("cluster")

cluster.default <- function(x, ...) stop("only implemented for resamples objects")

cluster.resamples <- function(x, metric = x$metrics[1],  ...)
  {
    
    if(length(metric) != 1) stop("exactly one metric must be given")

    tmpData <- x$values[, grep(paste("~", metric, sep = ""),
                               names(x$value),
                               fixed = TRUE),
                        drop = FALSE]
    names(tmpData) <- gsub(paste("~", metric, sep = ""),
                           "",
                           names(tmpData),
                           fixed = TRUE)

    tmpData <- t(tmpData)
    dt <- dist(tmpData)
    out <- hclust(dt, ...)
    out$metric <- metric
    out$call <- match.call()
    class(out) <- c("cluster.resamples", "hclust")
    out
  }


plot.prcomp.resamples <- function(x, what = "scree", dims = max(2, ncol(x$rotation)), ...)
{
  if(length(what) > 1) stop("one plot at a time please")
  switch(what,
         scree =
         {
           barchart(x$sdev ~ paste("PC", seq(along = x$sdev)),
                    ylab = "Standard Deviation", ...)
         },
         cumulative =
         {
           barchart(cumsum(x$sdev^2)/sum(x$sdev^2) ~ paste("PC", seq(along = x$sdev)),
                    ylab = "Culmulative Percent of Variance", ...)
         },
         loadings =
         {

           panelRange <- extendrange(x$rotation[, 1:dims,drop = FALSE])
           if(dims > 2)
             {
               
               splom(~x$rotation[, 1:dims,drop = FALSE],
                     main = useMathSymbols(x$metric),
                     prepanel.limits = function(x) panelRange,
                     type = c("p", "g"),
                     ...)
             } else {
               xyplot(PC2~PC1, data = as.data.frame(x$rotation),
                     main = useMathSymbols(x$metric),
                     xlim = panelRange,
                     ylim = panelRange,
                     type = c("p", "g"),
                     ...)
             }

         },
         components =
         {

           panelRange <- extendrange(x$x[, 1:dims,drop = FALSE])
           if(dims > 2)
             {
               
               splom(~x$x[, 1:dims,drop = FALSE],
                     main = useMathSymbols(x$metric),
                     prepanel.limits = function(x) panelRange,
                     groups = rownames(x$x),
                     type = c("p", "g"),
                     ...)
             } else {
               xyplot(PC2~PC1, data = as.data.frame(x$x),
                     main = useMathSymbols(x$metric),
                     xlim = panelRange,
                     ylim = panelRange,
                     
                     groups = rownames(x$x),
                     type = c("p", "g"),
                     ...)
             }

         })
} 


print.prcomp.resamples <- function (x, digits = max(3, getOption("digits") - 3), print.x = FALSE, ...) 
{
  printCall(x$call)
  cat("Metric:", x$metric, "\n")


  sds <- rbind(x$sdev, cumsum(x$sdev^2)/sum(x$sdev^2))
  rownames(sds) <- c("Std. Dev. ", "Cum. Percent Var. ")
  colnames(sds) <- rep("", ncol(sds))
                     
  print(sds, digits = digits, ...)
  cat("\nRotation:\n")
  print(x$rotation, digits = digits, ...)
  if (print.x && length(x$x)) {
    cat("\nRotated variables:\n")
    print(x$x, digits = digits, ...)
  }
  invisible(x)
}

print.resamples <- function(x, ...)
  {
    printCall(x$call)
    cat("Models:", paste(x$models, collapse = ", "), "\n")
    cat("Number of resamples:", nrow(x$values), "\n")
    cat("Performance metrics:",  paste(x$metrics, collapse = ", "), "\n")

    hasTimes <- apply(x$timing, 2, function(a) !all(is.na(a)))
    if(any(hasTimes))
      {
        names(hasTimes) <- gsub("FinalModel", "final model fit", names(hasTimes))
        cat("Time estimates for:",  paste(tolower(names(hasTimes))[hasTimes], collapse = ", "), "\n")
      }
    invisible(x)
  }

summary.resamples <- function(object, ...)
{

  vals <- object$values[, names(object$values) != "Resample", drop = FALSE]

  out <- vector(mode = "list", length = length(object$metrics))
  for(i in seq(along = object$metrics))
    {
      tmpData <- vals[, grep(paste("~", object$metrics[i], sep = ""), names(vals), fixed = TRUE), drop = FALSE]
      
      out[[i]] <- do.call("rbind", lapply(tmpData, function(x) summary(x)[1:6]))
      naSum <- matrix(unlist(lapply(tmpData, function(x) sum(is.na(x)))), ncol = 1)
      colnames(naSum) <- "NA's"
      out[[i]] <- cbind(out[[i]], naSum)
      rownames(out[[i]]) <- gsub(paste("~", object$metrics[i], sep = ""),
                                 "",
                                 rownames(out[[i]]),
                                 fixed = TRUE)
    }
  
  names(out) <- object$metrics
  out <- structure(
                   list(values = vals,
                        call = match.call(),
                        statistics = out,
                        models = object$models,
                        metrics = object$metrics,
                        methods = object$methods),
                   class = "summary.resamples")
  out
}


print.summary.resamples <- function(x, ...)
  {
    printCall(x$call)
    cat("Models:", paste(x$models, collapse = ", "), "\n")
    cat("Number of resamples:", nrow(x$values), "\n")

    cat("\n")
    
    for(i in seq(along = x$statistics))
      {
        cat(names(x$statistics)[i], "\n")
        print(x$statistics[[i]])
        cat("\n")
      }
    invisible(x)
  }



xyplot.resamples <- function (x, data = NULL, what = "scatter", models = NULL, metric = x$metric[1], units = "min", ...) 
{
  if(!(units %in% c("min", "sec", "hour"))) stop("units should be 'sec', 'min' or 'hour'")
  if(!(what %in% c("scatter", "BlandAltman", "tTime", "mTime", "pTime"))) stop("the what arg should be 'scatter', 'BlandAltman', 'tTime', 'mTime' or 'pTime'")
  
  if(is.null(models)) models <- if(what %in% c("tTime", "mTime", "pTime")) x$models else x$models[1:2]
  if(length(metric) != 1) stop("exactly one metric must be given")
  if(what == "BlandAltman" & length(models) != 2) stop("exactly two model names must be given")
  if(what == "BlandAltman")
    {
      tmpData <- x$values[, paste(models, metric, sep ="~")]
      tmpData$Difference <- tmpData[,1] - tmpData[,2]
      tmpData$Average <- (tmpData[,1] + tmpData[,2])/2
      ylm <- extendrange(c(tmpData$Difference, 0))
      out <- xyplot(Difference ~ Average,
                    data = tmpData,
                    ylab = paste(models, collapse = " - "),
                    ylim = ylm,
                    main = useMathSymbols(metric),
                    panel = function(x, y, ...)
                    {
                      panel.abline(h = 0, col = "darkgrey", lty = 2)
                      panel.xyplot(x, y, ...)
                    })

    }
  if(what == "scatter")
    {
      tmpData <- x$values[, paste(models, metric, sep ="~")]
      colnames(tmpData) <- gsub(paste("~", metric, sep = ""), "", colnames(tmpData))
      
      ylm <- extendrange(c(tmpData[,1], tmpData[,2]))
      out <- xyplot(tmpData[,1] ~ tmpData[,2],
                    ylab = colnames(tmpData)[1],
                    xlab = colnames(tmpData)[2],
                    xlim = ylm, ylim = ylm,
                    main = useMathSymbols(metric),
                    panel = function(x, y, ...)
                    {
                      panel.abline(0, 1, col = "darkgrey", lty = 2)
                      panel.xyplot(x, y, ...)
                    })
    }
  if(what %in% c("tTime", "mTime", "pTime"))
    {
      ## the following is based on
      ## file.show(system.file("demo/intervals.R", package = "lattice")
      prepanel.ci <- function(x, y, lx, ux, subscripts, ...)
        {
          x <- as.numeric(x)
          lx <- as.numeric(lx[subscripts])
          ux <- as.numeric(ux[subscripts])
          list(xlim = extendrange(c(x, ux, lx)),
               ylim = extendrange(y))
        }

      panel.ci <- function(x, y, lx, ux, groups, subscripts, pch = 16, ...)
        {
          theme <- trellis.par.get()
          x <- as.numeric(x)
          y <- as.numeric(y)
          lx <- as.numeric(lx[subscripts])
          ux <- as.numeric(ux[subscripts])
          gps <- unique(groups)
          for(i in seq(along = gps))
            {
              panel.arrows(lx[groups == gps[i]],
                           y[groups == gps[i]],
                           ux[groups == gps[i]],
                           y[groups == gps[i]],
                           col = theme$superpose.line$col[i],
                           length = .01, unit = "npc",
                           angle = 90, code = 3)
              panel.xyplot(x[groups == gps[i]],
                           y[groups == gps[i]],
                           type = "p",
                           col = theme$superpose.symbol$col[i],
                           cex = theme$superpose.symbol$cex[i],
                           pch = theme$superpose.symbol$pch[i],
                           , ...)              
            }
        }
      tmpData <- apply(x$values[,-1], 2,
                       function(x)
                       {
                         tmp <- t.test(x)
                         c(tmp$conf.int[1], tmp$estimate, tmp$conf.int[2])
                       })
      rownames(tmpData) <- c("lower", "point", "upper")

      tmpData <- tmpData[, paste(models, metric, sep ="~")]
      colnames(tmpData) <- gsub(paste("~", metric, sep = ""), "", colnames(tmpData))
      tmpData <- data.frame(t(tmpData))
      tmpData$Model <- rownames(tmpData)

      tm <- switch(what,
                   tTime = x$timings[,"Everything"],
                   mTime = x$timings[,"FinalModel"],
                   pTime = x$timings[,"Prediction"])
      lab <- switch(what,
                   tTime = "Total Time (",
                   mTime = "Final Model Time (",
                   pTime = "Prediction Time (")
      lab <- paste(lab, units, ")", sep = "")
                   
      times <- data.frame(Time = tm,
                          Model = rownames(x$timings))
      plotData <- merge(times, tmpData)

      if(units == "min") plotData$Time <- plotData$Time/60
      if(units == "hour") plotData$Time <- plotData$Time/60/60
      
      out <- with(plotData,
                  xyplot(Time ~ point,
                         lx = lower, ux = upper,
                         xlab = useMathSymbols(metric),
                         ylab = lab,
                         prepanel = prepanel.ci,
                         panel = panel.ci, groups = Model,
                         ...))
    }  
  out
}
 
parallelplot.resamples <- function (x, data = NULL, models = x$models, metric = x$metric[1], ...) 
{
  if(length(metric) != 1) stop("exactly one metric must be given")

  tmpData <- x$values[, grep(paste("~", metric, sep = ""),
                             names(x$value),
                             fixed = TRUE),
                      drop = FALSE]
  names(tmpData) <- gsub(paste("~", metric, sep = ""),
                         "",
                         names(tmpData),
                         fixed = TRUE)
  rng <- range(unlist(lapply(lapply(tmpData, as.numeric), range, na.rm = TRUE)))
  prng <- pretty(rng)

  reord <- order(apply(tmpData, 2, median, na.rm = TRUE))
  tmpData <- tmpData[, reord]

  parallelplot(~tmpData,
               common.scale = TRUE,
               scales = list(x = list(at = (prng-min(rng))/diff(rng), labels = prng)),
               xlab = useMathSymbols(metric),
               ...)
  
}

splom.resamples <- function (x, data = NULL, variables = "models",
                             models = x$models,
                             metric = NULL,
                             panelRange = NULL,
                             ...) 
{

  if(variables == "models")
    {

      if(is.null(metric)) metric <- x$metric[1]
      if(length(metric) != 1) stop("exactly one metric must be given")

      tmpData <- x$values[, grep(paste("~", metric, sep = ""),
                                 names(x$value),
                                 fixed = TRUE),
                          drop = FALSE]
      names(tmpData) <- gsub(paste("~", metric, sep = ""),
                             "",
                             names(tmpData),
                             fixed = TRUE)
      if(is.null(panelRange)) panelRange <- extendrange(tmpData)
      splom(~tmpData,
            panel = function(x, y)
            {
              panel.splom(x, y, ...)
              panel.abline(0, 1, lty = 2, col = "darkgrey")

            },
            main = useMathSymbols(metric),
            prepanel.limits = function(x) panelRange,
            ...)
    } else{
      if(variables == "metrics")
        {
      if(is.null(metric)) metric <- x$metric
          if(length(metric) < 2) stop("There should be at least two metrics")
          plotData <- melt(x$values, id.vars = "Resample")
          tmp <- strsplit(as.character(plotData$variable), "~", fixed = TRUE)
          plotData$Model <- unlist(lapply(tmp, function(x) x[1]))
          plotData$Metric <- unlist(lapply(tmp, function(x) x[2]))
          plotData <- subset(plotData, Model %in% models & Metric  %in% metric)
          means <- dcast(plotData, Model ~ Metric, mean, na.rm = TRUE)
          splom(~means[, metric], groups = means$Model, ...)

        } else stop ("'variables' should be either 'models' or 'metrics'")

    }
  
}


densityplot.resamples <- function (x, data = NULL, models = x$models, metric = x$metric, ...) 
{
  plotData <- melt(x$values, id.vars = "Resample")
  tmp <- strsplit(as.character(plotData$variable), "~", fixed = TRUE)
  plotData$Model <- unlist(lapply(tmp, function(x) x[1]))
  plotData$Metric <- unlist(lapply(tmp, function(x) x[2]))
  plotData <- subset(plotData, Model %in% models & Metric  %in% metric)

  metricVals <- unique(plotData$Metric)
  plotForm <- if(length(metricVals) > 1) as.formula(~value|Metric) else as.formula(~value)
  densityplot(plotForm, data = plotData, groups = Model,
              xlab = if(length(unique(plotData$Metric)) > 1) "" else metricVals, ...)
                         
}



bwplot.resamples <- function (x, data = NULL, models = x$models, metric = x$metric, ...) 
{
  plotData <- melt(x$values, id.vars = "Resample")
  tmp <- strsplit(as.character(plotData$variable), "~", fixed = TRUE)
  plotData$Model <- unlist(lapply(tmp, function(x) x[1]))
  plotData$Metric <- unlist(lapply(tmp, function(x) x[2]))
  plotData <- subset(plotData, Model %in% models & Metric  %in% metric)
  avPerf <- ddply(subset(plotData, Metric == metric[1]),
                  .(Model),
                  function(x) c(Median = median(x$value, na.rm = TRUE)))
  avPerf <- avPerf[order(avPerf$Median),]
  plotData$Model <- factor(as.character(plotData$Model),
                           levels = avPerf$Model)
  metricVals <- unique(plotData$Metric)
  plotForm <- if(length(metricVals) > 1) as.formula(Model~value|Metric) else as.formula(Model~value)
  bwplot(plotForm, data = plotData,
         xlab = if(length(unique(plotData$Metric)) > 1) "" else metricVals, ...)
                         
}



dotplot.resamples <- function (x, data = NULL, models = x$models, metric = x$metric, conf.level = 0.95, ...) 
{
  plotData <- melt(x$values, id.vars = "Resample")
  tmp <- strsplit(as.character(plotData$variable), "~", fixed = TRUE)
  plotData$Model <- unlist(lapply(tmp, function(x) x[1]))
  plotData$Metric <- unlist(lapply(tmp, function(x) x[2]))
  plotData <- subset(plotData, Model %in% models & Metric  %in% metric)
  plotData$variable <- factor(as.character(plotData$variable))

  plotData <- split(plotData, plotData$variable)
  results <- lapply(plotData,
                    function(x, cl)
                    {
                      ttest <- try(t.test(x$value, conf.level = cl),
                                   silent = TRUE)
                      if(class(ttest)[1] == "htest")
                        {
                          out <- c(ttest$conf.int, ttest$estimate)
                          names(out) <- c("LowerLimit", "UpperLimit", "Estimate")
                        } else out <- rep(NA, 3)
                      out
                    },
                    cl = conf.level)
  results <- do.call("rbind", results)
  results <- melt(results)
  results <- results[!is.nan(results$value),]
  tmp <- strsplit(as.character(results$Var1), "~", fixed = TRUE)
  results$Model <- unlist(lapply(tmp, function(x) x[1]))
  results$Metric <- unlist(lapply(tmp, function(x) x[2]))
  ## to avoid "no visible binding for global variable 'Var2'"
  Var2 <- NULL
  avPerf <- ddply(subset(results, Metric == metric[1] & Var2 == "Estimate"),
                  .(Model),
                  function(x) c(Median = median(x$value, na.rm = TRUE)))
  avPerf <- avPerf[order(avPerf$Median),]
  results$Model <- factor(as.character(results$Model),
                           levels = avPerf$Model)
  metricVals <- unique(results$Metric)
  plotForm <- if(length(metricVals) > 1) as.formula(Model~value|Metric) else as.formula(Model~value)
  dotplot(plotForm,
         data = results,
         xlab = if(length(unique(plotData$Metric)) > 1) "" else metricVals,
         panel = function(x, y)
         {
           plotTheme <- trellis.par.get()
           y <- as.numeric(y)
           vals <- aggregate(x, list(group = y), function(x) c(min = min(x), mid = median(x), max = max(x)))
           
           panel.dotplot(vals$x[,"mid"], vals$group,
                         pch = plotTheme$plot.symbol$pch[1],
                         col = plotTheme$plot.symbol$col[1],
                         cex = plotTheme$plot.symbol$cex[1])
           panel.segments(vals$x[,"min"], vals$group, 
                          vals$x[,"max"], vals$group, 
                          lty = plotTheme$plot.line$lty[1],
                          col = plotTheme$plot.line$col[1],
                          lwd = plotTheme$plot.line$lwd[1])
           len <- .03
           panel.segments(vals$x[,"min"], vals$group+len, 
                          vals$x[,"min"], vals$group-len, 
                          lty = plotTheme$plot.line$lty[1],
                          col = plotTheme$plot.line$col[1],
                          lwd = plotTheme$plot.line$lwd[1])
           panel.segments(vals$x[,"max"], vals$group+len, 
                          vals$x[,"max"], vals$group-len, 
                          lty = plotTheme$plot.line$lty[1],
                          col = plotTheme$plot.line$col[1],
                          lwd = plotTheme$plot.line$lwd[1])           
          
         },
          sub = paste("Confidence Level:", conf.level),
         ...)
  
}


diff.resamples <- function(x,
                           models = x$models,
                           metric = x$metrics,
                           test = t.test,
                           confLevel = 0.95,
                           adjustment = "bonferroni",
                           ...)
  {

    allDif <- vector(mode = "list", length = length(metric))
    names(allDif) <- metric

    x$models <- x$models[x$models %in% models]
    p <- length(x$models)
    ncomp <- choose(p, 2)
    if(adjustment == "bonferroni") confLevel <- 1 - ((1 - confLevel)/ncomp)
    allStats <- allDif
    
    for(h in seq(along = metric))
      {
        index <- 0
        dif <- matrix(NA,
                      nrow = nrow(x$values),
                      ncol = choose(length(models), 2))
        stat <- vector(mode = "list", length = choose(length(models), 2))
        
        colnames(dif) <- paste("tmp", 1:ncol(dif), sep = "")
        for(i in seq(along = models))
          {
            for(j in seq(along = models))
              {
                if(i < j)
                  {
                    index <- index + 1
                    
                    left <- paste(models[i], metric[h], sep = "~")
                    right <- paste(models[j], metric[h], sep = "~")
                    dif[,index] <- x$values[,left] - x$values[,right]
                    colnames(dif)[index] <- paste(models[i], models[j], sep = ".diff.")
                  }
              }
          }

        stats <- apply(dif, 2, function(x, tst, ...) tst(x, ...), tst = test, conf.level = confLevel, ...)
        
        allDif[[h]] <- dif
        allStats[[h]] <- stats
      }
    out <- structure(
                     list(
                          call = match.call(),
                          difs = allDif,
                          confLevel = confLevel,
                          adjustment = adjustment,
                          statistics = allStats,
                          models = models,
                          metric = metric),
                     class = "diff.resamples")
    out
  }


densityplot.diff.resamples <- function(x, data, metric = x$metric, ...)
  {
    plotData <- lapply(x$difs,
                       function(x) stack(as.data.frame(x)))
    plotData <- do.call("rbind", plotData)
    plotData$Metric <- rep(x$metric, each = length(x$difs[[1]]))
    plotData$ind <- gsub(".diff.", " - ", plotData$ind, fixed = TRUE)
    plotData <- subset(plotData, Metric %in% metric)
    metricVals <- unique(plotData$Metric)
    plotForm <- if(length(metricVals) > 1) as.formula(~values|Metric) else as.formula(~values)
    
    densityplot(plotForm, data = plotData, groups = ind,
                xlab = if(length(unique(plotData$Metric)) > 1) "" else metricVals, ...)

  }


bwplot.diff.resamples <- function(x, data, metric = x$metric, ...)
  {
    plotData <- lapply(x$difs,
                       function(x) stack(as.data.frame(x)))
    plotData <-  do.call("rbind", plotData)
    plotData$Metric <- rep(x$metric, each = length(x$difs[[1]]))
    plotData$ind <- gsub(".diff.", " - ", plotData$ind, fixed = TRUE)
    plotData <- subset(plotData, Metric %in% metric)
    metricVals <- unique(plotData$Metric)
    plotForm <- if(length(metricVals) > 1) as.formula(ind ~ values|Metric) else as.formula(ind ~ values)
    
    bwplot(plotForm,
           data = plotData,
           xlab = if(length(unique(plotData$Metric)) > 1) "" else metricVals,
           ...)

  }

print.diff.resamples <- function(x, ...)
  {
    printCall(x$call)
    cat("Models:", paste(x$models, collapse = ", "), "\n")
    cat("Metrics:", paste(x$metric, collapse = ", "), "\n")
    cat("Number of differences:",  ncol(x$difs[[1]]), "\n")
    cat("p-value adjustment:",  x$adjustment, "\n")    
    invisible(x)
  }


summary.diff.resamples <- function(object, digits = max(3, getOption("digits") - 3), ...)
{
  comps <- ncol(object$difs[[1]])

  
  all <- vector(mode = "list", length = length(object$metric))
  names(all) <- object$metric

  for(h in seq(along = object$metric))
    {
      pvals <- matrix(NA, nrow = length(object$models), ncol = length( object$models))
      meanDiff <- pvals
      index <- 0
      for(i in seq(along = object$models))
        {
          for(j in seq(along = object$models))
            {
              
              if(i < j)
                {
                  index <- index + 1
                  meanDiff[i, j] <- object$statistics[[h]][index][[1]]$estimate
                }
            }
        }
      
      index <- 0
      for(i in seq(along = object$models))
        {
          for(j in seq(along = object$models))
            {
              
              if(i < j)
                {
                  index <- index + 1
                  pvals[j, i] <- object$statistics[[h]][index][[1]]$p.value

                }
            }
        }
      pvals[lower.tri(pvals)] <- p.adjust(pvals[lower.tri(pvals)], method = object$adjustment)
      asText <- matrix("", nrow = length(object$models), ncol = length( object$models))
      meanDiff2 <- format(meanDiff, digits = digits)
      pvals2 <- matrix(format.pval(pvals, digits = digits), nrow = length( object$models))
      asText[upper.tri(asText)] <- meanDiff2[upper.tri(meanDiff2)]
      asText[lower.tri(asText)] <- pvals2[lower.tri(pvals2)]
      diag(asText) <- ""
      colnames(asText) <- object$models
      rownames(asText) <- object$models
      all[[h]] <- asText
    }  
  
  out <- structure(
                   list(
                        call = match.call(),
                        adjustment = object$adjustment,
                        table = all),
                   class = "summary.diff.resamples")
  out
}


levelplot.diff.resamples <- function(x, data = NULL, metric = x$metric[1], what = "pvalues", ...)
{
  comps <- ncol(x$difs[[1]])
  if(length(metric) != 1) stop("exactly one metric must be given")
  
  all <- vector(mode = "list", length = length(x$metric))
  names(all) <- x$metric

  for(h in seq(along = x$metric))
    {
      temp <- matrix(NA, nrow = length(x$models), ncol = length( x$models))
      index <- 0
      for(i in seq(along = x$models))
        {
          for(j in seq(along = x$models))
            {
              
              if(i < j)
                {
                  index <- index + 1
                   temp[i, j] <-  if(what == "pvalues")
                     {
                       x$statistics[[h]][index][[1]]$p.value
                     } else x$statistics[[h]][index][[1]]$estimate
                 temp[j, i] <- temp[i, j] 
                }
            }
        }
      colnames(temp) <- x$models
      temp  <- as.data.frame(temp)
      temp$A <- x$models
      temp$Metric <- x$metric[h]
      all[[h]] <- temp
    }
  all <- do.call("rbind", all)
  all <- melt(all, measure.vars = x$models)
  names(all)[names(all) == "variable"] <- "B"
  all$A <- factor(all$A, levels = x$models)
  all$B <- factor(as.character(all$B), levels = x$models)
  
  all <- all[complete.cases(all),]
  levelplot(value ~ A + B|Metric,
            data = all,
            subset = Metric %in% metric,
            xlab = "",
            ylab = "",
            sub = ifelse(what == "pvalues", "p-values", "difference = (x axis - y axis)"),
            ...)
}




print.summary.diff.resamples <- function(x, ...)
  {
    printCall(x$call)

    if(x$adjustment != "none")
      cat("p-value adjustment:", x$adjustment, "\n")

    
    cat("Upper diagonal: estimates of the difference\n",
        "Lower diagonal: p-value for H0: difference = 0\n\n",
        sep = "")
        
    for(i in seq(along = x$table))
      {
        cat(names(x$table)[i], "\n")
        print(x$table[[i]], quote = FALSE)
        cat("\n")
      }
    invisible(x)
  }


dotplot.diff.resamples <- function(x, data = NULL, metric = x$metric[1], ...)
  {
    if(length(metric) > 1)
      {
        metric <- metric[1]
        warning("Sorry Dave, only one value of metric is allowed right now. I'll use the first value")

      }
    h <- which(x$metric == metric)
    plotData <- as.data.frame(matrix(NA, ncol = 3, nrow = ncol(x$difs[[metric]])))
    ## Get point and interval estimates on the differences
    index <- 0
    for(i in seq(along = x$models))
      {
        for(j in seq(along = x$models))
          {
            
            if(i < j)
              {
                index <- index + 1
                plotData[index, 1] <- x$statistics[[h]][index][[1]]$estimate
                plotData[index, 2:3] <- x$statistics[[h]][index][[1]]$conf.int

              }
          }
      }
    names(plotData)[1:3] <- c("Estimate", "LowerLimit", "UpperLimit")
    plotData$Difference <- gsub(".diff.", " - ", colnames(x$difs[[metric]]), fixed = TRUE)
    plotData <- melt(plotData, id.vars = "Difference")
    xText <- paste("Difference in",
                   useMathSymbols(metric),
                   "\nConfidence Level",
                   round(x$confLevel, 3),
                   ifelse(x$adjustment == "bonferroni",
                          " (multiplicity adjusted)",
                          " (no multiplicity adjustment)"))
    dotplot(Difference ~ value,
            data = plotData,
            xlab = xText,
            panel = function(x, y)
            {
              plotTheme <- trellis.par.get()
              

              middle <- aggregate(x, list(mod = y), median)
              upper <- aggregate(x, list(mod = as.numeric(y)), max)
              lower <- aggregate(x, list(mod = as.numeric(y)), min)
              panel.dotplot(middle$x, middle$mod,
                            col = plotTheme$plot.symbol$col[1],
                            pch = plotTheme$plot.symbol$pch[1],
                            cex = plotTheme$plot.symbol$cex[1])
              panel.abline(v = 0,
                           col = plotTheme$reference.line$col[1],
                           lty = plotTheme$reference.line$lty[1],
                           lwd = plotTheme$reference.line$lwd[1])
              for(i in seq(along = upper$mod))
                {
                  panel.segments(upper$x[i], upper$mod[i], lower$x[i], lower$mod[i],
                                 col = plotTheme$plot.line$col[1],
                                 lwd = plotTheme$plot.line$lwd[1],
                                 lty = plotTheme$plot.line$lty[1])
                  len <- .03
                  panel.segments(lower$x[i], upper$mod[i]+len, 
                                 lower$x[i], lower$mod[i]-len, 
                                 lty = plotTheme$plot.line$lty[1],
                                 col = plotTheme$plot.line$col[1],
                                 lwd = plotTheme$plot.line$lwd[1])
                  panel.segments(upper$x[i],upper$mod[i]+len, 
                                 upper$x[i], lower$mod[i]-len, 
                                 lty = plotTheme$plot.line$lty[1],
                                 col = plotTheme$plot.line$col[1],
                                 lwd = plotTheme$plot.line$lwd[1])
                }
            },
            ...)
  }


modelCor <- function(x, metric = x$metric[1], ...)
{
  dat <- x$values[, grep(paste("~", metric[1], sep = ""), names(x$values))]
  colnames(dat) <- gsub(paste("~", metric[1], sep = ""), "", colnames(dat))
  cor(dat, ...)
}

sort.resamples <- function(x, decreasing = FALSE, metric = x$metric[1], FUN = mean, ...) 
{
  dat <- x$values[, grep(paste("~", metric[1], sep = ""), names(x$values))]
  colnames(dat) <- gsub(paste("~", metric[1], sep = ""), "", colnames(dat))
  stats <- apply(dat, 2, FUN, ...)
  names(sort(stats, decreasing = decreasing))
}

Try the caret package in your browser

Any scripts or data that you put into this service are public.

caret documentation built on May 2, 2019, 5:47 p.m.