R/plotresids.R

Defines functions possible.plotres.legend get.id.indices get.isubset trans.resids trans.versus my.log10 draw.resids.info draw.rlm.line draw.density.along.the.bottom draw.oof.meanfit derive.main derive.ylab derive.xlab draw.smooth draw.bad.leverage.as.star draw.cook.levels draw.pint.resids get.resids.ylim get.resids.xlim get.versus.info get.id.n get.plotres.data plotresids

# plotresids.R

plotresids <- function(
    object,
    which,
    info,
    standardize,
    level,
    versus1,

    id.n,
    smooth.col,
    grid.col,
    jitter,

    npoints,
    center,

    type,

    fitted,
    rinfo,
    rsq,
    iresids,
    nversus,
    colname.versus1,
    force.auto.resids.xlim,
    force.auto.resids.ylim,

    SHOWCALL=NA, # this is here to absorb SHOWCALL from dots

    ...)
{
    stopifnot(length(which) == 1)
    info <- check.boolean(info)

    ok <- which %in% c(W3RESID,W5ABS:W9LOGLOG)
    if(!all(ok))
        stop0("which=", which[!ok][1], " is not allowed")

    # id.n has already been checked in plotres.data
    id.indices.specified <- FALSE
    if(which %in% c(W3RESID, W4QQ:W8CUBE) && id.n != 0)
        id.indices.specified <- TRUE
    level <- check.level.arg(level, zero.ok=TRUE)
    if(which %in% (W5ABS:W9LOGLOG))
        level <- 0 # no pints

    pints <- NULL
    cints <- NULL
    level.shade  <- dota("level.shade  shade.pints", DEF="mistyrose2", ...)
    level.shade2 <- dota("level.shade2 shade.cints", DEF="mistyrose4", ...)
    if(which == W3RESID && is.specified(level)) {
        p <- plotmo.pint(object, newdata=NULL, type, level, trace=0)
        if(!is.null(p$fit) && max(abs(p$fit - fitted)) != 0) {
            # TODO $$ happens with test.unusual.vars.R:earth.glm.spaced.bx
            warning0("Internal inconsistency: p$fit != fitted",
                if(inherits(object, "earth"))
                    "\n         Workaround: no 'glm' arg in call to earth, or no 'level' arg n call to plotres"
                else
                    "")
            fitted <- p$fit # hack
        }
        if(is.specified(level.shade) && !is.null(p$upr)) {
            pints <- data.frame(upr=rinfo$scale * (p$upr - fitted),
                                lwr=rinfo$scale * (p$lwr - fitted))
            colnames(pints) <- c("upr", "lwr")
        }
        if(is.specified(level.shade2) && !is.null(p$cint.upr)) {
            cints <- data.frame(upr=rinfo$scale * (p$cint.upr - fitted),
                                lwr=rinfo$scale * (p$cint.lwr - fitted))
            colnames(cints) <- c("upr", "lwr")
        }
    }
    if(is.null(pints) && is.null(cints))
        level <- 0

    resids <- rinfo$scale * rinfo$resids

    if((which %in% W7VLOG:W9LOGLOG))
        check.that.most.are.positive(
            versus1, "fitted", sprint("which=%d", which), "nonpositive")

    # TODO following is redundant after above check?
    # abs(resids) must be nonnegative to take their log
    if(which %in% W7VLOG:W9LOGLOG)
        check.that.most.are.positive(
            abs(resids), "abs(residuals)", sprint("which=%d", which), "zero")

    trans.versus <- trans.versus(versus1[iresids], which)
    trans.resids <- trans.resids(resids[iresids], which)
    x <- if(nversus == V2INDEX) 1:length(trans.versus) else trans.versus
    jitter <- as.numeric(check.numeric.scalar(jitter, logical.ok=TRUE))
    stopifnot(jitter >= 0, jitter <= 10) # 10 is arbitrary
    jittered.x <- x
    jittered.trans.resids <- trans.resids
    if(jitter > 0) {
        # we use amount=0 (same as S) which seems to work better in this context
        jittered.x            <- jitter(x,            factor=jitter, amount=0)
        jittered.trans.resids <- jitter(trans.resids, factor=jitter, amount=0)
    }
    derived.xlab <- derive.xlab(dota("xlab", DEF=NULL, ...),
                                which, colname.versus1, nversus)
    derived.ylab <- derive.ylab(dota("ylab", DEF=NULL, ...), which, rinfo$name)
    main <- derive.main(main=dota("main", DEF=NULL, ...),
                               derived.xlab, derived.ylab, level,
                               attr(object, "plotmo.s"))

    # allow col.response as an argname for compat with old plotmo
    pt.col <- dota("col.response col.resp", DEF=1, ...)
    pt.col <- dota("pt.col col.points col.point col.residuals col.resid col",
                   EX=c(0,1,1,1,1,1), DEF=pt.col, NEW=1, ...)
    # recycle
    pt.col <- repl(pt.col, length(resids))

    pt.cex <- dota("response.cex cex.response", DEF=1, ...)
    pt.cex <- dota("pt.cex cex.points cex.point cex.residuals cex",
                   EX=c(0,1,1,1,1), DEF=pt.cex, NEW=1, ...)
    pt.cex <- pt.cex * pt.cex(length(x), npoints)
    pt.cex <- repl(pt.cex, length(resids))

    pt.pch <- dota("response.pch pch.response", DEF=20, ...)
    pt.pch <- dota("pt.pch pch.points pch.point pch.residuals pch",
                   EX=c(0,1,1,1,1), DEF=pt.pch, NEW=1, ...)
    pt.pch <- repl(pt.pch, length(resids))

    ylim <- get.resids.ylim(ylim=dota("ylim", ...), force.auto.resids.ylim,
                            object, fitted, trans.resids, which,
                            info, standardize, id.indices.specified, center,
                            pints, cints, rinfo$scale, nversus)

    xlim <- get.resids.xlim(xlim=dota("xlim", ...), force.auto.resids.xlim,
                            which, x, trans.versus, ylim, nversus,
                            id.indices.specified)
    id.indices <- NULL
    if(id.indices.specified)
        id.indices <-
            get.id.indices(rinfo$scale * rinfo$resids, id.n,
                           if(nversus == V4LEVER)
                               hatvalues1(object, sprint("versus=%g", V4LEVER))
                           else
                               NULL)

    call.plot(graphics::plot.default, PREFIX="pt.",
         force.x    = x,
         force.y    = jittered.trans.resids,
         force.main = main,
         force.xlab = derived.xlab,
         force.ylab = derived.ylab,
         force.xlim = xlim,
         force.ylim = ylim,
         force.col  = NA, # no points will actually be plotted at this stage
         ...)
    if(is.specified(grid.col))
        grid(col=grid.col, lty=1)
    else if(which != W9LOGLOG)
        abline(h=0, lty=1, col="lightgray") # axis

    if(level && nversus != V4LEVER) {
        if(is.specified(level.shade))
            draw.pint.resids(pints=pints, x=versus1,
                             shade=level.shade, nversus=nversus, ...)
        if(is.specified(level.shade2))
            draw.pint.resids(pints=cints, x=versus1,
                             shade=level.shade2, nversus=nversus, ...)
    }
    if(nversus == V4LEVER) {
        # vertical line at mean leverage
        mean <- mean(x, na.rm=TRUE)
        abline(v=mean, col="gray")
        # add label "mean"
        if(which == W3RESID) { # not for others otherwise put text over the points
            usr <- par("usr") # xmin, xmax, ymin, ymax
            text(mean,
                 if(info) usr[3] + .1 * (usr[4] - usr[3]) # beyond density plot
                 else     usr[3] + .02 * (usr[4] - usr[3]),
                 "mean",
                 adj=c(0, -.2), cex=.8, srt=90)
        }
        if(standardize && inherits(object, "lm"))
            draw.cook.levels(object, ...)
    }
    call.plot(graphics::points, PREFIX="pt.",
              force.x   = jittered.x,
              force.y   = jittered.trans.resids,
              force.col = pt.col[iresids],
              force.cex = pt.cex[iresids],
              force.pch = pt.pch[iresids],
              ...)
    box()

    # plot points with unity leverage as stars
    draw.bad.leverage.as.star(jittered.x, rinfo, iresids, pt.cex, smooth.col)

    coef.rlm <- NULL
    if(info && nversus != V4LEVER && (which == W5ABS || which == W9LOGLOG))
        coef.rlm <- draw.rlm.line(which, versus1, resids, nversus, ...)

    if(which != W9LOGLOG)
        draw.smooth(x, trans.resids, rinfo$scale[iresids], smooth.col, ...)

    col.cv <- dota("col.cv", ...)
    oof.meanfit.was.plotted <- FALSE
    if(level && !is.null(object$cv.oof.fit.tab) && is.specified(col.cv))
    {
        draw.oof.meanfit(object$cv.oof.fit.tab, fitted, versus1, rinfo,
                         which, col.cv, nversus)
        oof.meanfit.was.plotted <- TRUE
    }
    # TODO implement id.indices for nversus=V2INDEX
    if(id.indices.specified && nversus != V2INDEX) {
        # TODO as.numeric is needed if versus1 is a factor
        # is.na test needed for which=7 (if some are negative?)
        x1 <- as.numeric(trans.versus(versus1, which)[id.indices])
        if(!anyNA(x1))
            plotrix::thigmophobe.labels(x=x1,
                # TODO labels should take into account jitter
                y=trans.resids(resids, which)[id.indices],
                labels=rinfo$labs[id.indices],
                offset=.33, xpd=NA,
                font=dota("label.font", DEF=1, ...)[1],
                cex=.8 * dota("label.cex", DEF=1, ...)[1],
                col=dota("label.col",
                        DEF=if(is.specified(smooth.col)) smooth.col else 2, ...)[1])
    }
    if(info)
        draw.resids.info(which, info, versus1, resids, nversus, rsq, coef.rlm, ...)
    else
        possible.plotres.legend(which=which,
            level=level, smooth.col=smooth.col,
            oof.meanfit.was.plotted=oof.meanfit.was.plotted, ...)

    list(x=x, y=trans.resids) # does not include jittering

}
get.plotres.data <- function(object, object.name, which, standardize, delever,
                             level, versus, id.n, labels.id,
                             trace, npoints, type, nresponse, ..., must.get.rsq)
{
    # the dot argument FORCEPREDICT is to check compat with old plot.earth
    meta <- plotmo_meta(object, type, nresponse, trace,
                        avoid.predict=!dota("FORCEPREDICT", DEF=FALSE, ...), ...)

      nresponse <- meta$nresponse  # column index
      resp.name <- meta$resp.name  # used only in automatic caption, may be NULL
      type      <- meta$type       # always a string (converted from NULL if necessary)
      residtype <- meta$residtype  # ditto

    # we get rsq only if necessary, because error reporting if we can't get it
    # is weak (because of nested try blocks, here and in do.call.trace)
    rsq <- NA
    if(must.get.rsq) {
        rsq <- try(plotmo_rsq1(object=object, newdata=NULL,
                               trace=if(trace == 1) -1 else trace, meta=meta, ...),
                   silent=trace < 2)
        if(is.try.err(rsq)) {
            trace0(trace, "Cannot get training rsq (%s)\n", cleantry(rsq))
            rsq <- NA
        }
    }
    # get the residuals and fitted info
    rinfo <- plotmo_rinfo(object=object, type=type, residtype=residtype,
                nresponse=nresponse,
                standardize=standardize, delever=delever, trace=trace,
                leverage.msg=
                    if(any(which %in% c(W3RESID,W5ABS:W9LOGLOG)))
                        "plotted as a star"
                    else
                        "ignored",
                expected.levs=meta$resp.levs, labels.id=labels.id, ...)

    fitted <- rinfo$fitted # n x 1 numeric matrix
    rinfo$fitted <- NA     # prevent accidental use of rinfo$fitted later

    stopifnot(NCOL(fitted) == 1)
    stopifnot(length(dim(fitted)) == 2)
    colnames(fitted) <- "Fitted" # colname will be used in labels in plots

    # get the values we will plot against (by default the fitted values)
    vinfo <- get.versus.info(which, versus, object, fitted, nresponse, trace)
    stopifnot(nrow(fitted) == length(rinfo$resids))

    ncases <- length(rinfo$resids)
    id.n <- get.id.n(id.n, ncases) # convert special values of id.n

    # convert special values of npoints
    check.integer.scalar(npoints, min=-1, null.ok=TRUE, logical.ok=TRUE)
    npoints.was.neg <- FALSE
    if(is.null(npoints))
        npoints <- 0
    else if(is.logical(npoints))
        npoints <- if(npoints) ncases else 0
    else if(npoints == -1) {
        npoints.was.neg <- TRUE
        npoints <- ncases
    } else if(npoints > ncases)
        npoints <- ncases

    # Use a maximum of NMAX residuals (unless npoints is bigger or negative).
    # Allows plotres to be fast even on models with millions of cases.
    NMAX <- 1e4
    nmax <- max(NMAX, npoints)
    if(!npoints.was.neg && nrow(fitted) > nmax) {
        if(trace >= 1)
            printf("using %g of %g residuals%s\n",
                nmax, ncases,
                if(id.n > 0)
                    ", forcing id.n=0 because of that (implementation restriction)"
                else
                    "")
        # see comment in plotres for use of V4LEVER here
        isubset <- get.isubset(rinfo$resids, nmax, id.n,
                     use.all=(vinfo$nversus == V4LEVER), rinfo$scale)
        fitted           <- fitted      [isubset, , drop=FALSE]
        rinfo$resids     <- rinfo$resids[isubset, , drop=FALSE]
        rinfo$scale      <- rinfo$scale [isubset]
        vinfo$versus.mat <- vinfo$versus.mat  [isubset, , drop=FALSE]
        # Can no longer draw point labels because row numbers are different.
        # TODO Come up with a solution so it doesn't have to be that way.
        id.n <- 0
    }
    list(nresponse  = nresponse, # col index in the response (converted from NA if necessary)
         resp.name  = resp.name, # used only in automatic caption, may be NULL
         type       = type,      # always a string (converted from NULL if necessary)
         rinfo      = rinfo,     # resids, scale, name, etc.
         vinfo      = vinfo,     # versus.mat, icolumns, nversus, etc.
         fitted     = fitted,    # n x 1 numeric matrix, colname is "Fitted"
         id.n       = id.n,      # forced to zero if row indexing changed
         npoints    = npoints,   # special values have been converted
         rsq        = rsq)
}
get.id.n <- function(id.n, ncases) # convert special values of id.n
{
    check.integer.scalar(id.n, null.ok=TRUE, logical.ok=TRUE)
    if(is.null(id.n))
        id.n <- 0
    else if(is.logical(id.n)) {
        id.n <- if(id.n) ncases else 0
    } else if(id.n == -1)
        id.n <- ncases
    else if(abs(id.n) > ncases)
        id.n <- ncases
    id.n
}
get.versus.info <- function(which, versus, object, fitted, nresponse, trace=0)
{
    versus.mat <- fitted
    icolumns   <- 1
    trim.which <- FALSE
    got.versus <- FALSE
    nversus    <- versus
    if(is.numeric(versus)) {
        got.versus <- TRUE
        trim.which <- TRUE
        if(length(versus) != 1)
            stop0(
"illegal 'versus' argument (length of 'versus' must be 1 when 'versus' is numeric)")
        if(floor(versus) != versus)
            versus.err()
        if(versus == V1FITTED)
            trim.which <- FALSE
        else if(versus == V2INDEX)
            NULL
        else if(versus == V3RESPONSE) {
            versus.mat <- plotmo_y(object, nresponse, trace,
                            expected.len=NROW(fitted), object$levels)$y
            colnames(versus.mat) <- "Response"
        } else if(versus == V4LEVER) {
            # TODO handle constant leverages for factors in the same way as plot.lm
            versus.mat <-
                matrix(hatvalues1(object, sprint("versus=%g", V4LEVER)), ncol=1)
            colnames(versus.mat) <- "Leverage"
        } else
            versus.err()
    }
    else if(!is.character(versus))
        versus.err()
    else if(length(versus) == 1 && nchar(versus) >= 2 &&
            (substr(versus, 1, 2) == "b:" || substr(versus, 1, 2) == "B:")) {
        # use the basis matrix
        got.versus <- TRUE
        trim.which <- TRUE
        nversus <- 0
        plotmo_bx <- plotmo_bx(object, trace,
                               versus=substring(versus, 3)) # substring drops "bx:"
            versus.mat <- plotmo_bx$bx
            icolumns   <- plotmo_bx$icolumns
    }
    if(!got.versus) { # user specified x variables
        trim.which <- TRUE
        prefix <- substr(versus, 1, 1)
        nversus <- 0
        # following are needed if versus is a vector
        if(any(prefix == "*"))
            stop0("\"*\" is not allowed in this context in the 'versus' argument\n",
                  "      Your 'versus' argument is ", quote.with.c(versus))
        versus.mat <- plotmo_x(object, trace)
        versus.mat <- as.matrix(versus.mat)
        colnames(versus.mat) <- gen.colnames(versus.mat, "x", "x", trace)
        icolumns <- check.index(versus, "versus", seq_len(NCOL(versus.mat)),
                                colnames=colnames(versus.mat))
    }
    if(trim.which) {
        # remove all entries from which except standard resid and abs resid plots
        org.which <- which
        which <- which[which %in% c(W3RESID,W5ABS)]
        if(length(which) == 0)
            warnf(
                "which=%s is now empty because plots were removed because versus=%s",
                paste.c(org.which, maxlen=50), paste.c(versus, maxlen=30))
    }
    list(which      = which,      # which after possibly removing some plots
         versus.mat = versus.mat, # either fitted, response, x, or bx
         icolumns   = icolumns,   # desired column indices in versus.mat
         nversus    = nversus)    # versus as a number, 0 if versus is character
}
get.resids.xlim <- function(xlim, force.auto.resids.xlim,
                            which, x, trans.versus, ylim, nversus,
                            id.indices.specified)
{
    if(force.auto.resids.xlim || !is.specified(xlim)) { # auto xlim?
        if(which == W9LOGLOG) {
            # don't show lower 5% of points
            quant <- quantile(trans.versus, probs=c(.05, 1), names=FALSE)
            min <- quant[1]
            max <- quant[2]
            # extra left margin so slope of linear fit not flattened
            if(min > .2 * ylim[1])
                min <- .2 * ylim[1]
            xlim <- c(min, max)
        } else if(nversus == V4LEVER) # room for labels on high leverage points
            xlim <- c(0, 1.1 * max(x, na.rm=TRUE))
        else
            xlim <- range1(x, na.rm=TRUE)
        range <- xlim[2] - xlim[1]
        if(id.indices.specified) # space for point labels
            xlim <- c(xlim[1] - .04 * range, xlim[2] + .04 * range)
    }
    stopifnot(is.numeric(xlim), length(xlim) == 2)
    fix.lim(xlim)
}
get.resids.ylim <- function(ylim, force.auto.resids.ylim,
                            object, fitted, resids, which,
                            info, standardize, id.indices.specified, center,
                            pints, cints, scale, nversus)
{
    if(force.auto.resids.ylim || !is.specified(ylim)) { # auto ylim?
        if(!is.null(pints)) {
            min <- min(resids, pints$lwr, na.rm=TRUE)
            max <- max(resids, pints$upr, na.rm=TRUE)
        } else if(!is.null(cints)) {
            min <- min(resids, cints$lwr, na.rm=TRUE)
            max <- max(resids, cints$upr, na.rm=TRUE)
        } else {
            min <- min(resids, na.rm=TRUE)
            max <- max(resids, na.rm=TRUE)
        }
        maxa <- mina <- 0 # adjustments to max and min
        if(which %in% (W5ABS:W8CUBE))
            min <- 0
        else if(which == W3RESID && center) {
            # want symmetric ylim so can more easily see asymmetry
            if(abs(min) > abs(max))
                max <- -min
            else if(abs(max) > abs(min))
                min <- -max
        } else if(which == W9LOGLOG)
            maxa <- .5  # more space on top, looks better
        range <- abs(max - min)
        if(id.indices.specified) { # space for point labels
            # TODO only do this if point labels are near the edges
            mina <- max(mina, .03 * range)
            maxa <- max(maxa, .03 * range)
        }
        if(nversus == V4LEVER && standardize && inherits(object, "lm")) {
            maxa <- max(maxa, maxa + .2 * range) # space for cook distance legend
            mina <- max(mina, mina + .1 * range) # space for "mean" label
        }
        if(info) { # space for extra text (on top) and density plot (in the bottom)
            maxa <- maxa + max * if(id.indices.specified) .2 else .1
            mina <- mina + max * if(id.indices.specified) .2 else .1
        }
        ylim <- c(min-mina, max+maxa)
    }
    fix.lim(ylim)
}
draw.pint.resids <- function(pints, x, shade, nversus, ...)
{
    if(!is.null(pints)) {
        # abscissa must be ordered for polygon to work
        order <- order(x)
        x <- x[order]
        pints <- pints[order,]
        x <- if(nversus == V2INDEX)
                c(1:length(x), length(x):1)
             else
                trans.versus(c(x, rev(x)), 0)
        call.plot(graphics::polygon, PREFIX="level.", drop.shade=1, drop.shade2=1,
            force.x    = x,
            force.y    = trans.resids(c(pints$lwr, rev(pints$upr)), 0),
            force.col  = shade,
            def.border = shade,
            def.lty    = 0,
            ...)
    }
}
# this should be used only for models with homoscedastic errors
draw.cook.levels <- function(object, ...)
{
    cook.levels <- dota("cook.levels", DEF=c(0.5, 1.0), ...)
    stopifnot(is.numeric(cook.levels), all(cook.levels > 0))
    col <- dota("cook.col", DEF="slategray4", ...)
    lty <- dota("cook.lty", DEF=1,          ...)
    lwd <- dota("cook.lwd", DEF=1,          ...)
    # based on code in stats::plot.lm.R
    leverage <- hatvalues1(object, "'standardize'")
    p <- length(coef(object))
    leverage.range <- range(leverage, na.rm=TRUE) # though should never have NA
    x <- seq.int(0, 1, length.out=101)
    for(cook.level in cook.levels) {
        cl <- sqrt(cook.level * p *(1 - x) / x)
        lines(x,  cl, col=col, lty=lty, lwd=lwd)
        lines(x, -cl, col=col, lty=lty, lwd=lwd)
    }
    # we don't use bottomleft like plot.lm because we may plot the density there
    usr <- par("usr") # xmin, xmax, ymin, ymax
    legend(usr[1]-.7 * strwidth("X"), # jam it into the corner
           usr[4]+.5 * strheight("X"),
           legend="Cook's distance", col=col, lty=lty, lwd=lwd,
           box.col="white", bg="white", x.intersp=.2, seg.len=1.5)
    xmax <- min(0.99, usr[2])
    ymult <- sqrt(p * (1 - xmax) / xmax)
    axis(4, at=c(-sqrt(rev(cook.levels)) * ymult, sqrt(cook.levels)*ymult),
         labels=paste(c(rev(cook.levels), cook.levels)),
         mgp=c(.25,.15,0), las=2, tck=0,
         cex.axis=.7, col.axis=col,
         font=2) # makes the gray labels a bit more legible
}
# Plot points with unity leverage as stars.  We plot them on
# the axis, which is arguably incorrect but still useful.
# TODO add a test for this to the test suite

draw.bad.leverage.as.star <- function(x, rinfo, iresids, pt.cex, smooth.col)
{
    which <- which(is.na(rinfo$scale[iresids]))
    if(length(which) > 0) {
        points(x[which], 0, col=1, cex=pt.cex[iresids], pch=8) # pch 8 is a star
        # add label if possible (not poss if not all points plotted, see npoints)
        if(length(iresids) == length(rinfo$scale)) {
            label <- which(is.na(rinfo$scale))
            text.on.white(x=x[which], y=0, label=label,
                          col=if(is.specified(smooth.col)) smooth.col else 2,
                          cex=.8, adj=-.5, xpd=NA)
        }
    }
}
draw.smooth <- function(x, resids, scale, smooth.col, ...)
{
    if(!is.specified(smooth.col))
        return(NULL)

    # na.rm is needed if we take logs of nonpos, see check.that.most.are.positive.
    # That's why we calculate delta explicitly instead of using lowess default.
    delta <- .01 * diff(range1(x, na.rm=TRUE))

    # Replace points with NA scale with 0 (else lowess stops at the NA).
    # Zero is appropriate because the points are 0 resids with leverage 1.
    resids[which(is.na(scale))] <- 0

    # we use lowess rather than loess because loess tends to give warnings
    smooth.f    <- dota("smooth.f loess.f", DEF=2/3, NEW=1, ...)
    smooth.iter <- dota("smooth.iter",      DEF=3,          ...)
    check.numeric.scalar(smooth.f)
    stopifnot(smooth.f > .01, smooth.f < 1)
    smooth <- lowess(x, resids, f=smooth.f, iter=smooth.iter, delta=delta)
    call.plot(graphics::lines.default, PREFIX="smooth.", drop.f=1,
          force.x   = smooth$x,
          force.y   = smooth$y,
          force.col = smooth.col,
          force.lwd = dota("smooth.lwd lwd.smooth lwd.loess",
                           EX=c(0,1,1), DEF=1, NEW=1, ...),
          force.lty = dota("smooth.lty lty.smooth",
                           EX=c(0,1), DEF=1, NEW=1, ...),
          ...)
}
derive.xlab <- function(xlab, which, colname.versus1, nversus)
{
    if(is.specified(xlab)) {
        stopifnot.string(xlab, allow.empty=TRUE)
        if(!nzchar(xlab))
            return("")
    }
    if(!is.specified(xlab))
        xlab <- colname.versus1
    stopifnot.string(xlab)
    if(which %in% (W7VLOG:W9LOGLOG))
        xlab <- sprint("Log %s", xlab)
    if(nversus == V2INDEX)
        xlab <- sprint("%s index", xlab)
    xlab
}
derive.ylab <- function(ylab, which, rinfo.name)
{
    if(is.specified(ylab)) {
        stopifnot.string(ylab, allow.empty=TRUE)
        if(!nzchar(ylab))
            return("")
    }
    if(!is.specified(ylab))
        ylab <- sprint("%ss", rinfo.name)
    if(which == W5ABS)
        ylab <- sprint("Abs %s", ylab)
    else if(which == W6SQRT)
        ylab <- sprint("Sqrt Abs %s", ylab)
    else if(which == W7VLOG)
        ylab <- sprint("Abs %s", ylab)
    else if(which == W8CUBE)
        ylab <- sprint("Cube Root Squared %s", ylab)
    else if(which == W9LOGLOG)
        ylab <- sprint("Log Abs %s", ylab)
    ylab
}
derive.main <- function(main, xlab, ylab, level, predict.s) # title of plot
{
    # TODO should really use strwidth for newline calculation
    # The "Fitted" helps with limitations of the formula below
    newline <- xlab != "Fitted" && xlab != "Fitted index" && xlab != "Response" &&
               nchar(ylab) + nchar(xlab) > 15
    if(xlab == "Leverage" && ylab == "Residuals") # special case, mainly for which=1 with lm
        newline <- FALSE
    else if(grepl("Standardized", ylab[1]) || grepl("Delevered", ylab[1]))
        newline <- TRUE

    if(!is.specified(main)) { # generate a main only if user didn't specify main
        main <- sprint("%s vs%s%s", ylab, if(newline) "\n" else " ", xlab)
        if(!is.null(predict.s)) {
            # include the s argument that is used to make the model predictions
            if(is.character(predict.s)) # "lambda.1se" or "lambda.min"
                main <- sprint("%s   (s=\"%s\")", main, predict.s)
            else if(is.numeric(predict.s)) {
                main <- sprint("%s   (s=%s)", main,
                                if(predict.s == 0) "0"
                                else               signif(predict.s,2))
            } else
                warning0("predict.s has an unexpected class ",
                         quotify(class(predict.s)))
        }
    }
    if(xlab != "Leverage" && level && !newline) # two newlines is too many
        main <- sprint("%s\n%g%% level shaded", main, 100*(level))
    main
}
# plot resids of oof meanfit with col.cv (default lightblue)

draw.oof.meanfit <- function(cv.oof.fit.tab, fitted, versus1,
                             rinfo, which, col.cv, nversus)
{
    # mean of each row of oof.fit.tab
    meanfit <- apply(cv.oof.fit.tab,  1, mean)
    meanfit <- rinfo$scale * (meanfit - fitted)
    order <- order(versus1)
    trans.versus1 <- trans.versus(versus1[order], which)
    x <- if(nversus == V2INDEX) 1:length(trans.versus1) else trans.versus1
    lines(x, trans.resids(meanfit[order], which), col=col.cv)
}
draw.density.along.the.bottom <- function(x, den.col=NULL, scale=NULL, ...)
{
    if(is.null(den.col))
        den.col <- dota("density.col", DEF="gray57", EX=0, ...)
    den <- try(density(x, adjust=dota("density.adjust", DEF=.5, EX=0, ...), na.rm=TRUE),
               silent=TRUE)
    if(is.try.err(den))
        warning0("draw.density.along.the.bottom: cannot determine density")
    else {
        usr <- par("usr") # xmin, xmax, ymin, ymax
        if(is.null(scale))
            scale <- .08 / (max(den$y) - min(den$y))
        den$y <- usr[3] + den$y * scale * (usr[4] - usr[3])
        call.plot(graphics::lines.default, PREFIX="density.", drop.adjust=1,
                  force.x=den, force.y=NULL, def.col=den.col, ...)
    }
}
draw.rlm.line <- function(which, versus1, resids, nversus, ...)
{
    trans.resids <- trans.resids(resids, which)
    trans.versus <- trans.versus(versus1, which)
    x <- if(nversus == V2INDEX) 1:length(trans.versus) else trans.versus
    if(which == W9LOGLOG) {
        # ignore lower 10% of points (very small residuals i.e. very neg logs)
        quant <- quantile(trans.versus, probs=.1, na.rm=TRUE, names=FALSE)
        ok <- which(x > quant)
        x <- x[ok]
        trans.resids <- trans.resids[ok]
    }
    # # regression on 10 bootstrap samples so we can see variance of versus1
    # for(i in 1:10) {
    #   j <- sample.int(length(x), replace=TRUE)
    #   trans.resids1 <- trans.resids[j]
    #   trimmed.trans.fit1 <- x[j]
    #   rlm <- MASS::rlm(trans.resids1~trimmed.trans.fit1,
    #                    method="MM", na.action="na.omit")
    #   if(draw)
    #       abline(coef(rlm), col="lightgray", lwd=.6)
    # }

    # robust linear regression of trans.resids on x
    # na.omit is needed if some versus1 (or resids) were nonpos so log(versus1) is NA
    rlm <- MASS::rlm(trans.resids~x, method="MM", na.action="na.omit")
    call.dots(abline,
        force.coef = coef(rlm),
        force.col  = "lightblue",
        force.lwd  = dota("smooth.lwd lwd.smooth lwd.loess",
                          EX=c(0,1,1), DEF=1, NEW=1, ...) + 1,
        ...)
    box() # abline overplots the box
    coef(rlm)
}
draw.resids.info <- function(which, info, versus1, resids, nversus, rsq, coef.rlm, ...)
{
    trans.versus <- trans.versus(versus1, which)
    x <- if(nversus == V2INDEX) 1:length(trans.versus) else trans.versus
    # TODO consider also drawing the density along the right side
    draw.density.along.the.bottom(x, ...)
    if(nversus != V4LEVER) {
        lm.text <- ""
        slope.text <- ""
        if(which == W5ABS || which == W9LOGLOG) { # added linear regression line?
            stopifnot(length(coef.rlm) == 2)
            slope.text <- sprint("    slope %.2g", coef.rlm[2])
        }
        # exact=FALSE else get warning "Cannot compute exact p-value with ties"
        cor.abs <- cor.test(versus1, abs(resids), method="spearman", exact=FALSE)
        if(nversus == V3RESPONSE) {
            cor <- cor.test(versus1, resids, method="spearman", exact=FALSE)
            text <- sprint("spearman abs  %.2f   resids %.2f\n%s",
                             cor.abs$estimate, cor$estimate, slope.text)
        } else if(which == W3RESID && nversus == V1FITTED)
            text <- sprint("rsq  %.2f     spearman abs  %.2f",
                            rsq, cor.abs$estimate)
        else
            text <- sprint("spearman abs  %.2f%s", cor.abs$estimate, slope.text)
        cex <- .9
        usr <- par("usr") # xmin, xmax, ymin, ymax
        text.on.white(x     = usr[1] + strwidth("x", font=1),
                      y     = usr[4] - cex * (strheight(text, font=1) +
                                  .5 * strheight("X", font=1)),
                      label = text,
                      cex   = cex, adj=c(0, 0), font=1, col=1, xpd=NA)
    }
}
my.log10 <- function(x) # log of very small values is silently set to NA
{
    i <- which(x < max(x) / 1e6)
    x[i] <- 1
    x <- log10(x)
    x[i] <- NA
    x
}
trans.versus <- function(versus, which)
{
    if(which %in% (W7VLOG:W9LOGLOG))
        my.log10(versus)
    else
        versus
}
trans.resids <- function(resid, which) # transform the residuals
{
    if(which == W5ABS)
        abs(resid)
    else if(which == W6SQRT)
        sqrt(abs(resid))
    else if(which == W7VLOG)
        abs(resid)
    else if(which == W8CUBE) {
        # do it in two steps so no problem with negative numbers
        resid <- resid^2
        resid^(1/3)
    } else if(which == W9LOGLOG)
        my.log10(abs(resid))
    else
        resid
}
# Get a subset of x.  Size of subset is nsubset.  Returns an index vector.
# The subset includes the 20 biggest absolute values in x.
# If you want more than the 20 biggest values, set nbiggest.

get.isubset <- function(x, nsubset, nbiggest=0, use.all=FALSE, scale=NULL)
{
    check.vec(x, "x passed to get.isubset", length(x))
    ix <- seq_len(length(x))
    if(nsubset > 0 && nsubset < length(x) && !use.all) { # TODO move this into caller
        # take a sample, but make sure it includes the 20 biggest absolute values
        nsubset <- min(nsubset, length(x))
        nbiggest <- min(max(20, nbiggest), nsubset)
        isorted <- order(abs(x), decreasing=TRUE) # expensive
        ikeep <- seq_len(nbiggest)
        if(nsubset > nbiggest)
            ikeep <- c(ikeep, seq(from=nbiggest + 1, to=length(x),
                                  length.out=nsubset - nbiggest))
        ix <- isorted[ikeep]
        # force in points with unity leverage
        if(!is.null(scale)) {
            which <- which(is.na(scale))
            if(length(which) > 0)
                ix <- sort.unique(c(which, ix))
        }
    }
    ix # index vector
}
# get the indices of the id.n biggest resids (requires a sort)

get.id.indices <- function(resids, id.n, hatvalues=NULL)
{
    # id.n has already been checked in plotres.data
    if(id.n == 0)
        return(NULL)
    if(id.n > 0) { # same as plot.lm
        i <- sort.list(abs(resids), decreasing=TRUE, na.last=NA)
        if(length(i) > id.n)
            i <- i[1:id.n]
    } else { # id.n is negative: most positive and most negative residuals
        id.n <- -id.n
        i <- sort.list(resids, decreasing=TRUE, na.last=NA)
        if(length(i) > id.n)
            i <- i[c(1:id.n, length(i) + 1 - (1:id.n))]
    }
    if(!is.null(hatvalues)) {
        # add the worst hatvalues i.e. the worst leverages
        i <- unique(c(i, order(hatvalues, decreasing=TRUE)[1:id.n]))
    }
    i
}
possible.plotres.legend <- function(which, level, smooth.col,
                                    oof.meanfit.was.plotted, ...)
{
    # add legend, else red and blue may confuse the user
    legend.pos <- dota("legend.pos", DEF=NULL, ...)
    if(level && oof.meanfit.was.plotted &&
           (is.null(legend.pos) || !all(is.na(legend.pos)))) {
        if(is.null(legend.pos)) { # auto?
            legend.x <- "bottomleft"
            legend.y <- NULL
        } else { # user specified legend position
            legend.x <- legend.pos[1]
            legend.y <- if(length(legend.pos) > 1) legend.pos[2] else NULL
        }
        legend.txt <- NULL
        legend.col <- NULL
        legend.lwd <- NULL
        legend.lty <- NULL
        if(which != W9LOGLOG && is.specified(smooth.col)) { # smooth plotted?
            legend.txt <- "smooth"
            legend.col <- smooth.col
            legend.lwd <- dota("smooth.lwd lwd.smooth lwd.loess",
                               EX=c(0,1,1), DEF=1, NEW=1, ...)
            legend.lty <- 1
        }
        if(oof.meanfit.was.plotted) {
            legend.txt <- c(legend.txt, "cross validated oof fit")
            legend.col <- c(legend.col, dota("col.cv", ...))
            legend.lwd <- c(legend.lwd, 1)
            legend.lty <- c(legend.lty, 1)
        }
        if(!is.null(legend.txt))
            call.dots(graphics::legend,
                DROP="*", KEEP="PREFIX",
                force.x      = legend.x,
                force.y      = legend.y,
                force.legend = legend.txt,
                def.col      = legend.col,
                def.lwd      = legend.lwd,
                def.lty      = legend.lty,
                def.bg       = "white",
                def.cex      = .8,
                ...)
    }
}

Try the plotmo package in your browser

Any scripts or data that you put into this service are public.

plotmo documentation built on May 22, 2022, 1:05 a.m.