R/earth.cv.R

Defines functions print_cv trace.get.fold2 trace.get.fold1 trace.fold trace.fold.header trace.cv.header get.this.group get.groups check.cv.args col.means.with.special.na.handling earth.cv

# earth.cv.R: Functions for cross validation of earth models.
#             Note that earth.cv returns null unless nfold > 1.

earth.cv <- function(object, x, y, subset, weights, na.action,
    pmethod, keepxy, trace, trace.org, glm.arg, degree,
    nfold, ncross, stratify, get.oof.fit.tab, get.oof.rsq.per.subset,
    Scale.y, env, ...)
{
    get.fold.rsq.per.subset <- function(foldmod, oof.y, max.nterms, trace, must.print.dots)
    {
        wp.expanded <- wp.expanded / sum(wp.expanded)
        oof.rsq.per.subset <- infold.rsq.per.subset <- repl(0, max.nterms)
        # nrow(foldmod$dirs) is the number of terms in this fold's model before pruning
        for(nterms in 1:min(max.nterms, nrow(foldmod$dirs))) {
            trace.get.fold1(trace, must.print.dots, nterms)
            # penalty=-1 to enforce strict nprune
            # we set nprune=nterms so earth_plotmodsel shows oof.rsq
            # for all submodels even when the user specifies nprune
            # TODO with keepxy=TRUE, 70% of cv time is spent in update.earth
            # TODO check that subset arg of main call to earth is handled correctly here
            pruned.foldmod <- update.earth(foldmod, nprune=nterms,
                    penalty=-1, ponly=TRUE,
                    # glm=NULL for speed, ok because we don't need the glm submodel,
                    # except if is.bpairs must convert bpairs to yfrac in pruned.foldmod
                    glm=if(is.bpairs) glm.arg else NULL,
                    trace=max(0, trace-1))
            fit <- predict.earth(pruned.foldmod, newdata=x, type="earth")
            oof.fit    <- fit[oof.subset,    , drop=FALSE]
            infold.fit <- fit[infold.subset, , drop=FALSE]
            for(iresp in seq_len(NCOL(fit))) { # for each response
                oof.rsq.per.subset[nterms] <- oof.rsq.per.subset[nterms] +
                    wp.expanded[iresp] *
                        get.weighted.rsq(oof.y[,iresp], oof.fit[,iresp], oof.weights)
                infold.rsq.per.subset[nterms] <- infold.rsq.per.subset[nterms] +
                    wp.expanded[iresp] *
                        get.weighted.rsq(infold.y[,iresp], infold.fit[,iresp], infold.weights)
            }
            if(nrow(foldmod$dirs) < max.nterms)
                for(nterms in (nrow(foldmod$dirs)+1): max.nterms)
                    oof.rsq.per.subset[nterms] <- infold.rsq.per.subset[nterms] <- NA
        }
        trace.get.fold2(trace, must.print.dots, nterms)
        list(oof.rsq.per.subset    = oof.rsq.per.subset,
             infold.rsq.per.subset = infold.rsq.per.subset)
    }
    #--- earth.cv starts here ---
    # We called check.cv.args(nfold, ncross, varmod.method, pmethod)
    # earlier so it's safe to use those args here.
    # Likewise, subset arg was already checked in earth.fit.
    stratify <- check.boolean(stratify)
    stopifnot(nfold > 1, ncross >= 1)
    if(nfold > nrow(x))
        nfold <- nrow(x)
    is.bpairs <- !is.null(object$glm.bpairs) # response is glm binomial pairs
    nresp <- if(is.bpairs) 1 else ncol(y) # number of responses
    max.nterms <- nrow(object$dirs)
    wp <- wp.expanded <- object$wp
    if(is.null(wp))
        wp.expanded <- repl(1, nresp) # all ones vector
    cv.list <- list()                   # returned list of cross validated models
    ncases <- nrow(x)
    # ndigits aligns trace prints without too much white space
    ndigits <- ceiling(log10(ncases))
    # print pacifier dots if get.fold.rss.per.subset will be slow
    must.print.dots <- trace >= .5 && trace <= 1 &&
        (get.oof.rsq.per.subset || get.oof.fit.tab) &&
        nrow(x) * max.nterms > 50e3
    trace.cv.header(object, nresp, pmethod,
                    if(pmethod == "cv") "backward" else pmethod,
                    trace.org, must.print.dots)
    groups <- matrix(NA, nrow=ncross*ncases, ncol=2)
    colnames(groups) <- c("cross", "fold")
    for(icross in seq_len(ncross)) {
        start <- ((icross-1) * ncases) + 1
        groups[start:(start+ncases-1), 1] <- icross
        groups[start:(start+ncases-1), 2] <- get.groups(y, nfold, stratify)
    }
    is.binomial <- is.poisson <- FALSE
    if(!is.null(glm.arg)) {
        # TODO revisit the following to save memory?
        family <- get.glm.family(glm.arg$family, env=env)
        is.binomial <- is.binomial(family)
        is.poisson <- is.poisson(family)
    }
    must.get.class.rate <- !is.null(object$levels)
    # the final summary row of the tables is "mean", "all" or "max", depending on the statistic.
    if(ncross > 1)
        fold.names <- paste0(rep(paste0("fold", seq_len(ncross), "."), each=nfold),
                             rep(seq_len(nfold), times=ncross))
    else
        fold.names <- sprint("fold%d", seq_len(nfold))
    fold.names.plus.mean <- c(fold.names, "mean")
    fold.names.plus.all  <- c(fold.names, "all")
    fold.names.plus.max  <- c(fold.names, "max")
    resp.names <- if(is.bpairs) colnames(y)[1] else colnames(y)
    resp.names.plus.mean <- c(resp.names, "mean") # response names plus "mean"
    resp.names.plus.max  <- c(resp.names, "max")
    ncross.fold <- ncross * nfold
    nvars.selected.by.gcv <- double(ncross.fold+1)  # nbr of used predictors in each CV mod
    nterms.selected.by.gcv <- double(ncross.fold+1) # nbr of selected terms in each CV mod
    names(nvars.selected.by.gcv) <- fold.names.plus.mean
    names(nterms.selected.by.gcv) <- fold.names.plus.mean

    rsq.tab <- matrix(0, nrow=ncross.fold+1, ncol=1+nresp) # table of cv results, +1 for means
    colnames(rsq.tab) <- resp.names.plus.mean
    rownames(rsq.tab) <- fold.names.plus.mean

    maxerr.tab <- matrix(0, nrow=ncross.fold+1, ncol=1+nresp) # table of cv results, +1 for max
    colnames(maxerr.tab) <- resp.names.plus.max
    rownames(maxerr.tab) <- fold.names.plus.max

    deviance.tab <- calib.int.tab <- calib.slope.tab <- test.tab <- class.rate.tab <- NULL
    if(is.binomial || is.poisson) {
        deviance.tab    <- matrix(0, nrow=ncross.fold+1, ncol=1+nresp) # mean deviance
        calib.int.tab   <- matrix(0, nrow=ncross.fold+1, ncol=1+nresp)
        calib.slope.tab <- matrix(0, nrow=ncross.fold+1, ncol=1+nresp)
        test.tab        <- matrix(0, nrow=ncross.fold+1, ncol=1+nresp) # binomial auc, poisson cor
        colnames(deviance.tab) <- colnames(calib.int.tab) <-
            colnames(calib.slope.tab) <- colnames(test.tab) <- resp.names.plus.mean
        rownames(deviance.tab) <- rownames(calib.int.tab) <-
            rownames(calib.slope.tab) <- rownames(test.tab) <- fold.names.plus.mean
    }
    if(must.get.class.rate) {
        class.rate.tab <- matrix(0, nrow=ncross.fold+1, ncol=1+nresp)
        colnames(class.rate.tab) <- resp.names.plus.mean
        rownames(maxerr.tab) <- fold.names.plus.all
    }
    oof.rsq.tab <- infold.rsq.tab <- oof.fit.tab <- NULL
    if(get.oof.rsq.per.subset) {
        oof.rsq.tab <- infold.rsq.tab <- matrix(0, nrow=ncross.fold+1, ncol=max.nterms)
        colnames(oof.rsq.tab) <- colnames(infold.rsq.tab) <- paste0("nterms", 1:max.nterms)
        rownames(oof.rsq.tab) <- rownames(infold.rsq.tab) <- fold.names.plus.all
    }
    if(get.oof.fit.tab) {
        oof.fit.tab <- matrix(0, nrow=nrow(x), ncol=ncross) # preds on oof data
        colnames(oof.fit.tab) <- paste0("icross", seq_len(ncross))
    }
    for(icross in seq_len(ncross)) {
        this.group <- get.this.group(icross, ifold, ncases, groups)
        for(ifold in seq_len(nfold)) {
            icross.fold <- ((icross-1) * nfold) + ifold
            oof.subset <- seq_len(ncases)[which(this.group == ifold)]
            infold.subset <- seq_len(ncases)[-oof.subset]
            trace.fold.header(trace, ncross, icross, ifold)
            infold.x <- x[infold.subset,,drop=FALSE]
            infold.y <- y[infold.subset,,drop=FALSE]
            infold.weights <- weights[infold.subset]
            foldmod <- earth.default(x=infold.x, y=infold.y, weights=infold.weights,
                wp=wp, Scale.y=Scale.y, subset=subset,
                trace=trace, glm=glm.arg, degree=degree,
                pmethod=if(pmethod == "cv") "backward" else pmethod,
                nfold=0, ncross=0, varmod.method="none",
                keepxy=(keepxy == 2), ...)

            foldmod$icross <- icross
            foldmod$ifold <- ifold
            oof.x <- x[oof.subset,,drop=FALSE]
            oof.y <- if(is.bpairs) object$glm.yfrac else y
            oof.y <- oof.y[oof.subset,,drop=FALSE]
            oof.weights <- weights[oof.subset]
            oof.fit <- predict(foldmod, newdata=oof.x, type="earth")

            # fill in subset of entries in this icross column of oof.fit.tab
            # note that we use only the first response when there are multiple responses
            if(!is.null(oof.fit.tab))
                oof.fit.tab[oof.subset, icross] <- oof.fit[,1]
            oof.fit.resp <- NULL
            if(is.binomial || is.poisson)
                oof.fit.resp <- predict(foldmod, newdata=oof.x, type="response")
            else if(must.get.class.rate) # not glm but has binary response?
                oof.fit.resp <- oof.fit

            # fill in this fold's row in summary tabs

            for(iresp in seq_len(nresp)) {
                rsq.tab[icross.fold, iresp] <-
                    get.weighted.rsq(oof.y[,iresp], oof.fit[,iresp], oof.weights)
                if(is.binomial) {
                    deviance.tab[icross.fold, iresp] <-
                        get.binomial.deviance(oof.fit.resp[,iresp], oof.y[,iresp])
                    calib <- get.binomial.calib(oof.fit.resp[,iresp],
                                if(is.bpairs) y[oof.subset,] else oof.y[,iresp])
                    calib.int.tab[icross.fold, iresp]   <- calib[1]
                    calib.slope.tab[icross.fold, iresp] <- calib[2]
                    maxerr.tab[icross.fold, iresp] <-
                        get.maxerr(oof.y[,iresp] - oof.fit.resp[,iresp])
                    test.tab[icross.fold, iresp] <-
                        get.auc(oof.fit.resp[,iresp], oof.y[,iresp])
                }
                else if(is.poisson) {
                    deviance.tab[icross.fold, iresp] <-
                        get.poisson.deviance(oof.fit.resp[,iresp], oof.y[,iresp])
                    calib <- get.poisson.calib(oof.fit.resp[,iresp], oof.y[,iresp])
                    calib.int.tab[icross.fold, iresp]   <- calib[1]
                    calib.slope.tab[icross.fold, iresp] <- calib[2]
                    maxerr.tab[icross.fold, iresp] <-
                        get.maxerr(oof.y[,iresp] - oof.fit.resp[,iresp])
                    test.tab[icross.fold, iresp] <-
                        cor(oof.fit.resp[,iresp], oof.y[,iresp])
                }
                else
                    maxerr.tab[icross.fold, iresp] <-
                        get.maxerr(oof.y[,iresp] - oof.fit[,iresp])
            } # end for iresp

            nvars.selected.by.gcv[icross.fold] <-
                get.nused.preds.per.subset(foldmod$dirs, foldmod$selected.terms)
            nterms.selected.by.gcv[icross.fold] <-
                length(foldmod$selected.terms)
            if(must.get.class.rate)
                class.rate.tab[icross.fold, ] <- get.class.rate(oof.fit.resp, oof.y, object$levels)
            if(get.oof.rsq.per.subset) {
                ret <- get.fold.rsq.per.subset(foldmod, oof.y, max.nterms,
                                               trace, must.print.dots)
                    oof.rsq.tab[icross.fold,]    <- ret$oof.rsq.per.subset
                    infold.rsq.tab[icross.fold,] <- ret$infold.rsq.per.subset
            }
            # init last column of summary tables
            ilast.col <- nresp+1 # index of final (summary) column in tables
            rsq.tab[icross.fold, ilast.col]    <- weighted.mean(rsq.tab[icross.fold, -ilast.col], wp.expanded)
            maxerr.tab[icross.fold, ilast.col] <- get.maxerr(maxerr.tab[icross.fold, -ilast.col])
            if(is.binomial || is.poisson) {
                deviance.tab[icross.fold, ilast.col]    <- weighted.mean(deviance.tab   [icross.fold, -ilast.col], wp.expanded)
                calib.int.tab[icross.fold, ilast.col]   <- weighted.mean(calib.int.tab  [icross.fold, -ilast.col], wp.expanded)
                calib.slope.tab[icross.fold, ilast.col] <- weighted.mean(calib.slope.tab[icross.fold, -ilast.col], wp.expanded)
                test.tab[icross.fold, ilast.col]        <- weighted.mean(test.tab       [icross.fold, -ilast.col], wp.expanded)
            }
            if(!keepxy) # reduce memory by getting rid of big fields
                foldmod$bx <- foldmod$residuals <- foldmod$prune.terms <- NULL
            trace.fold(icross, ifold, trace, y, infold.subset, oof.subset, ncross,
                       ndigits, rsq.tab[icross.fold,], must.print.dots)
            cv.list[[icross.fold]] <- foldmod
        } # end for ifold
    } # end for icross

    # init last row of summary tables

    ilast <- ncross.fold+1 # index of last row in tables
    nvars.selected.by.gcv [ilast]  <- mean(nvars.selected.by.gcv[-ilast])
    nterms.selected.by.gcv[ilast]  <- mean(nterms.selected.by.gcv[-ilast])

    rsq.tab   [ilast,] <- colMeans(rsq.tab     [-ilast,])
    maxerr.tab[ilast,] <- get.maxerr(maxerr.tab[-ilast,])
    if(is.binomial || is.poisson) {
        deviance.tab   [ilast,] <- colMeans(deviance.tab   [-ilast,])
        calib.int.tab  [ilast,] <- colMeans(calib.int.tab  [-ilast,])
        calib.slope.tab[ilast,] <- colMeans(calib.slope.tab[-ilast,])
        test.tab       [ilast,] <- colMeans(test.tab       [-ilast,])
    }
    if(must.get.class.rate)
        class.rate.tab[ilast,]  <- colMeans(class.rate.tab [-ilast,])

    oof.rsq.per.subset <- NULL
    if(get.oof.rsq.per.subset) {
        # there will be NAs in oof.rsq.tab if max terms in a fold is
        # less than max terms in full model
        oof.rsq.tab[ilast,]    <-
            col.means.with.special.na.handling(oof.rsq.tab[-ilast,])
        infold.rsq.tab[ilast,] <-
            col.means.with.special.na.handling(infold.rsq.tab[-ilast,])
    }
    trace1(trace, "\n")
    trace.fold(icross, -1, trace, y, TRUE, TRUE, ncross,
               ndigits, rsq.tab[ilast,], must.print.dots)
    if(trace == .5 && nresp > 1 && pmethod == "cv")
        cat("\n")
    trace1("\n")
    names(cv.list) <- fold.names
    rv <- list(
        cv.list = cv.list, # list of earth models built during cross validation
        nterms.selected.by.gcv = nterms.selected.by.gcv,
        nvars.selected.by.gcv  = nvars.selected.by.gcv,
        groups          = groups, # groups used for cross validation
        rsq.tab         = rsq.tab,
        maxerr.tab      = maxerr.tab,
        class.rate.tab  = class.rate.tab,
        auc.tab         = if(is.binomial) test.tab else NULL,
        cor.tab         = if(is.poisson)  test.tab else NULL,
        deviance.tab    = deviance.tab,
        calib.int.tab   = calib.int.tab,
        calib.slope.tab = calib.slope.tab,
        oof.fit.tab     = oof.fit.tab,
        infold.rsq.tab  = infold.rsq.tab,
        oof.rsq.tab     = oof.rsq.tab)
    rv
}

# Return the mean of each column in tab.
# NAs are ignored, except that the column mean is NA
# for columns in which over half the entries are NA.

col.means.with.special.na.handling <- function(tab)
{
    means <- colMeans(tab, na.rm=TRUE)
    nna <- colSums(is.na(tab)) # number of NAs in each column
    means[nna > nrow(tab) / 2] <- NA
    means # a vector of column means, some entries may be NA
}
check.cv.args <- function(nfold, ncross, pmethod, varmod.method)
{
    check.integer.scalar(nfold, min=0)
    # if(nfold > 10000) # 10000 is arbitrary
    #     stop0("nfold ", nfold, " is too big")

    check.integer.scalar(ncross, min=0)
    if(ncross > 1000) # 1000 is arbitrary
        stop0("ncross ", ncross, " is too big (max allowed is 1000)")

    if(ncross > 1 && nfold < 2)
        stop0("ncross=", ncross, " yet nfold=", nfold)
    if(ncross < 1 && nfold > 1)
        stop0("ncross=", ncross, " yet nfold=", nfold)

    if(varmod.method != "none") {
        if(nfold <= 1)
            stop0("varmod.method=\"", varmod.method, "\" requires nfold greater than 1")
        if(ncross < 3)
            stop0("ncross=", ncross,
                  " but should be larger when varmod.method is used\n",
                  "       (suggest at least 30, for debugging 3 is ok)")
    }
}
get.groups <- function(y, nfold, stratify)
{
    groups <- sample(repl(seq_len(nfold), nrow(y)))
    if(stratify) {
        # Get (roughly) equal number of folds for each non-zero entry in each y column
        # If y was originally a factor before expansion to multiple columns, this is
        # equivalent to having the same numbers of each factor level in each fold.
        for(iresp in seq_len(ncol(y))) {
            yset <- y[,iresp] != 0
            groups[yset] <- sample(repl(seq_len(nfold), sum(yset)))
        }
    }
    if(any(table(groups) == 0))
        stop0("Not enough data to do ", nfold,
              " fold cross validation (an out-of-fold set is empty)")
    groups
}
get.this.group <- function(icross, ifold, ncases, groups)
{
    start <- ((icross-1) * ncases) + 1
    groups[start:(start+ncases-1), 2]
}
trace.cv.header <- function(object, nresp, pmethod, pmethod1, trace.org, must.print.dots)
{
    if(trace.org == .5) {
        if(must.print.dots || nresp > 1)
            cat("\n")
        printf("%s with pmethod=\"%s\": GRSq %.3f RSq %.3f nterms %d\n",
                if(pmethod == "cv") "Preliminary model" else "Model",
                pmethod1, object$grsq , object$rsq, length(object$selected.terms))
        if(must.print.dots || nresp > 1)
            cat("\n")
    }
    else if(trace.org >= 1)
    {
        printf("\n")
        if(must.print.dots || nresp > 1)
            cat("\n")
    }
}
trace.fold.header <- function(trace, ncross, icross, ifold)
{
    if(trace >= .5 && trace < 1) {
        if(ncross > 1)
            printf("CV fold %2d.%-2d ", icross, ifold)
        else
            printf("CV fold %-2d ", ifold)
    } else if(trace >= 1) { # newline etc. to distinguish this from other trace prints
        if(ncross > 1)
            printf("\nCV fold %d.%d -----------------------------%s", icross, ifold,
                   "---------------------------------------\n")
        else
            printf("\nCV fold %d -----------------------------%s", ifold,
                   "---------------------------------------\n")
    }
}
# print results for the current fold (ifold=-1 means "all")

trace.fold <- function(icross, ifold, trace, y, infold.subset, oof.subset, ncross,
                       ndigits, rsq.row, must.print.dots)
{
    if(trace < .5)
        return()
    if(ifold < 0) {
        icross <- if(ncross > 1) "   " else ""
        printf("%s%s", "CV all     ", icross)
    } else if(trace >= 1) {
        if(ncross > 1)
            icross <- sprint("%2d.", icross)
        else
            icross <- ""
        printf("CV fold %s%-2d ", icross, ifold)
    }
    printf("CVRSq % -6.3f   ", rsq.row[length(rsq.row)])
    nresp <- length(rsq.row) - 1 # -1 for "all"
    if(nresp > 1) {
        cat("Per response CVRSq ")
        for(iresp in seq_len(nresp))
            printf("% -6.3f  ", rsq.row[iresp])
    }
    if(nresp > 1) {
        if(ncross > 1)
            cat("\n                            ")
        else
            cat("\n                         ")
    }
    if(ifold < 0)
        printf("       %.*s      ", ndigits, "                             ")
    else
        printf("n.oof %*.0f %2.0f%%   ", ndigits, length(infold.subset),
               100 * (length(y[, 1]) - length(infold.subset)) / length(y[, 1]))
    cat("n.infold.nz ")
    if(nresp == 1)
        printf("%*.0f %2.0f%%", ndigits, sum(y[infold.subset, 1] != 0),
               100 * sum(y[infold.subset, 1] != 0) / length(y[infold.subset, 1]))
    else for(iresp in seq_len(nresp))
        printf("%*.0f", ndigits, sum(y[infold.subset, iresp] != 0))

    if(ifold >= 0) {
        cat("   n.oof.nz ")
        if(nresp == 1)
            printf("%*.0f %2.0f%%", ndigits, sum(y[oof.subset, 1] != 0),
                   100 * sum(y[oof.subset, 1] != 0) / length(y[oof.subset, 1]))
        else for(iresp in seq_len(nresp))
            printf("%*.0f", ndigits, sum(y[oof.subset, iresp] != 0))
        if(nresp > 1)
            cat("\n")
    }
    if(must.print.dots && trace == .5 && ifold > 0)
        cat("\n")
    cat("\n")
}
trace.get.fold1 <- function(trace, must.print.dots, nterms)
{
    if(trace >= 2)
        cat0("\nget.fold.rss.per.subset nterms=", nterms, "\n")
    else if(must.print.dots) {
        cat0(".")
        if(nterms %% 40 == 0) {
            cat0("\n")
            if(trace == .5)
                cat("            ")
        }
        flush.console()
    }
}
trace.get.fold2 <- function(trace, must.print.dots, nterms)
{
    if(must.print.dots && nterms %% 40) { # nterms %% 40 avoids double newline
        cat0("\n")
        if(trace == .5)
            cat("            ")
    }
}
print_cv <- function(x) # called from print.earth for cross validated models
{
    cv.field <- function(field) round(x[[field]][ilast,], 3)

    cv.sd <- function(field) round(apply(x[[field]][-ilast,], 2, sd), 3)

    #--- print_cv starts here ---
    stopifnot(!is.null(x$cv.list), x$pmethod != "cv")
    ilast <- nrow(x$cv.rsq.tab) # index of "all" row in summary tables
    cat("\nNote: the cross-validation sd's below are standard deviations across folds\n\n")

    printf("Cross validation:   nterms %.2f sd %.2f    nvars %.2f sd %.2f\n\n",
        x$cv.nterms.selected.by.gcv[ilast],
        sd(x$cv.nterms.selected.by.gcv[-ilast]),
        x$cv.nvars.selected.by.gcv[ilast],
        sd(x$cv.nvars.selected.by.gcv[-ilast]))

    # if printing little then use wide spacing
    wide.spacing <- is.null(x$cv.deviance.tab)

    # create a data.frame and print that
    tab <- if(wide.spacing)
               data.frame("    CVRSq"=cv.field("cv.rsq.tab"), check.names = FALSE)
           else
               data.frame("CVRSq"  =cv.field("cv.rsq.tab"))

    tab$sd.1=cv.sd("cv.rsq.tab")

    if(!is.null(x$cv.class.rate.tab)) {
        if(wide.spacing) tab$"    ClassRate" <- cv.field("cv.class.rate.tab")
        else             tab$"ClassRate"   <- cv.field("cv.class.rate.tab")

        tab$sd.2 <- cv.sd("cv.class.rate.tab")
    }
    if(wide.spacing)
        tab$"    MaxErr" <- x$cv.maxerr.tab[ilast,]
    else
        tab$"MaxErr"   <- x$cv.maxerr.tab[ilast,]

    tab$sd <- apply(x$cv.maxerr.tab[-ilast,], 2, sd)

    if(!is.null(x$cv.auc.tab)) {
        tab$AUC  <- cv.field("cv.auc.tab")
        tab$sd.3 <- cv.sd("cv.auc.tab")
    }
    if(!is.null(x$cv.cor.tab)) {
        tab$cor.tab <- cv.field("cv.cor.tab")
        tab$sd.4    <- cv.sd("cv.cor.tab")
    }
    if(!is.null(x$cv.deviance.tab)) {
        tab$MeanDev    <- x$cv.deviance.tab[ilast,]
        tab$sd.5       <- apply(x$cv.deviance.tab[-ilast,], 2, sd)
        tab$CalibInt   <- cv.field("cv.calib.int.tab")
        tab$sd.6       <- cv.sd("cv.calib.int.tab")
        tab$CalibSlope <- cv.field("cv.calib.slope.tab")
        tab$sd.7       <- cv.sd("cv.calib.slope.tab")
    }
    # change "sd.N" to plain "sd"
    names <- names(tab)
    names[grep("sd", names)] <- "sd"
    names(tab) <- names

    rownames(tab) <- c(colnames(x$fitted.values), "All")

    digits <- min(getOption("digits"), 3)

    if(NCOL(x$coefficients) == 1)   # single response model?
        print(tab[1,,drop=FALSE], digits=digits, row.names=FALSE) # skip "All" row
    else                            # multiple response model
        print(tab, digits=digits)
}

Try the earth package in your browser

Any scripts or data that you put into this service are public.

earth documentation built on Feb. 16, 2023, 6:07 p.m.