R/lib.R
In rpart.plot: Plot 'rpart' Models: An Enhanced Version of 'plot.rpart'

# lib.R: miscellaneous functions for plotmo and related packages
#        functions in this file are in alphabetical order

any1 <- function(x)
{
    any(x != 0) # like any but no warning if x not logical
}
cat0 <- function(...) # cat with no added spaces
{
    cat(..., sep="")
}
check <- function(object, object.name, check.name, check.func, na.ok=FALSE)
{
    any <- check.func(object)
    if(na.ok)
        any <- any[!is.na(any)]
    else {
        which.na <- which(is.na(any))
        if(length(which.na)) {
            stopf("NA in %s\n       %s[%d] is %g",
                  object.name, object.name,
                  which.na[1], object[which.na[1]])
        }
    }
    if(any(any)) {
        which <- which(check.func(object))
        stopifnot(length(which) > 0)
        stopf("%s in %s\n       %s[%d] is %g",
              check.name, object.name, object.name,
              which[1], object[which[1]])
    }
}
# TODO commented out the following because it is too slow for big data
#      (the as.character is very slow)
#
# # The args argument is assumed to be a list of arguments for do.call.
# # An argument in args will be an unforced promise if it couldn't be
# # evaluated earlier e.g. if call.plot was invoked with arg=nonesuch.
# # If an argument is such an unforced promise, issue an error message now
# # to prevent very confusing error messages later.  To do this, we have to
# # determine if the arg is a promise, which we do with the if statement
# # below.
# # This makes me nervous, because the R language manual says "There is
# # generally no way in R code to check whether an object is a promise or not".
#
# check.do.call.args <- function(func, args, fname)
# {
#     stopifnot(is.list(args))
#     for(i in seq_along(args)) {
#         if(length(args[i]) == 1 && !is.na(args[i]) &&
#            substr(as.character(args[i]), 1, 2) == "..") {
#             printf("\n")
#             s <- paste0(strwrap(list.as.char(args),
#                         width=getOption("width"), exdent=7), collapse="\n")
#             stop0("cannot evaluate ", quotify(names(args)[i], "'"),
#                   " in\n       ", fname, "(", s, ")")
#         }
#     }
# }

# mostly for checking user arguments (so error wording is for that)
# but also occasionally used for other sanity checking

check.boolean <- function(b) # b==0 or b==1 is also ok
{
    if(length(b) != 1)
        stop0("the ", short.deparse(substitute(b), "given"),
              " argument is not FALSE, TRUE, 0, or 1")
    if(!(is.logical(b) || is.numeric(b)) || is.na(b) || !(b == 0 || b == 1))
        stop0(short.deparse(substitute(b), "the argument"), "=", as.char(b),
            " but it should be FALSE, TRUE, 0, or 1")
    b != 0 # convert to logical
}
is.boolean <- function(b) # b==NA or b==0 or b==1
{
    length(b) == 1 &&
    (is.logical(b) || is.numeric(b)) &&
    (is.na(b) || b == 0 || b == 1)
}
check.classname <- function(object, substituted.object, allowed.classnames)
{
    expected.classname <- quotify(allowed.classnames)
    if(length(allowed.classnames) > 1)
        expected.classname <- sprint("one of\n%s", expected.classname)
    if(is.null(object))
        stopf("object is NULL but expected an object of class of %s",
              expected.classname)
    if(!inherits(object, allowed.classnames)) {
        stopf("the class of %s is \"%s\" but expected the class to be %s",
              quotify(paste.trunc(substituted.object, maxlen=30)),
              class(object)[1], expected.classname)
    }
}
# adjust name so e.g. error message is "argument is NULL" not "NULL is NULL"
tweak.name <- function(name, quote=TRUE)
{
    quoted.name <- quotify(name, quote="'")
    if(name %in% c("NULL", "NA") ||
           (substr(name[1], 1, 1) %in% c("+", "-")) ||
           grepl("[0-9]", substr(name[1], 1, 1))) {
        quoted.name <- name <- "argument"
    }
    if(quote) quoted.name else name
}
check.integer.scalar <- function(object, min=NA, max=NA, null.ok=FALSE,
                                 na.ok=FALSE, logical.ok=TRUE,
                                 char.ok=FALSE,
                                 object.name=short.deparse(substitute(object)))
{
    stop.msg <- function(s)
    {
        s.null    <- if(null.ok)    ", or NULL"          else ""
        s.na      <- if(na.ok)      ", or NA"            else ""
        s.logical <- if(logical.ok) ", or TRUE or FALSE" else ""
        s.char    <- if(char.ok)    ", or a string"      else ""
        stop0(s,
              " but it should be an integer",
              s.null, s.na, s.logical, s.char)
    }
    if(is.character(object)) {
        if(!char.ok || length(object) != 1)
            stop.msg(paste0(tweak.name(object.name), " is a string"))
    } else {
        check.numeric.scalar(object, min, max, null.ok, na.ok, logical.ok,
                             char.ok.msg=char.ok, object.name=object.name)
        if(!is.null(object) && !is.na(object) && object != floor(object))
            stop.msg(paste0(tweak.name(object.name, quote=FALSE), "=", object[1]))
    }
    object
}
check.level.arg <- function(level, zero.ok)
{
    if(anyNA(level) || is.null(level)) # treat NA and NULL as 0
        level <- 0
    check.numeric.scalar(level)
    if(!((zero.ok && level == 0) || level >= .5 || level < 1)) {
        stop0("level=", level, " but it should be ",
              if(zero.ok) "zero or " else "", "between 0.5 and 1")
    }
    level
}
check.no.na.in.mat <- function(object)
{
    if(anyNA(object)) { # quick initial check
        # detailed check for detailed error message
        for(icol in seq_along(ncol(object))) {
            check.name <-
                if(!is.null(colnames(object)))
                    colnames(object)[icol]
                else
                    sprint("%s[,%d]",
                        short.deparse(substitute(object), "matrix"), icol)

            check(object[,icol], check.name, "NA", is.na, na.ok=FALSE)
        }
    }
}
# x can be a data.frame or matrix
check.df.numeric.or.logical <- function(x, xname=trunc.deparse(substitute(x)))
{
    stopifnot(!is.null(x), length(dim(x)) == 2)
    for(icol in seq_len(NCOL(x))) {
        if(!is.numeric(x[,icol]) && !is.logical(x[,icol]))
            stopf("the class of %s is \"%s\" (expected numeric or logical)",
                  colname(x, icol, xname), class(x[,icol]))
        is.na <- is.na(x[,icol])
        if(any(is.na))
            stopf("%s[%g] is NA", colname(x, icol, xname), which(is.na)[1])
        is.infinite <- !is.finite(x[,icol])
        if(any(is.infinite))
            stopf("%s[%g] is Inf", colname(x, icol, xname), which(is.infinite)[1])
    }
}
check.numeric.scalar <- function(object, min=NA, max=NA, null.ok=FALSE,
                                 na.ok=FALSE, logical.ok=FALSE,
                                 char.ok.msg=FALSE, # only affects error msg
                                 object.name=short.deparse(substitute(object)))
{
    s.logical <- if(logical.ok) ", or TRUE or FALSE" else ""
    if(na.ok)
        logical.ok <- TRUE # needed because NA is a logical
    any.na <- !is.null(object) &&
                # following needed because anyNA gives error on some objects
                (is.numeric(object) || is.logical(object) ||
                    is.list(object) || is.character(object)) &&
                anyNA(object)
    if(is.null(object)) {
        if(!null.ok)
            stop0(tweak.name(object.name), " is NULL")
    } else if(any.na && !na.ok)
        stop0(tweak.name(object.name), " is NA")
    else if(!is.numeric(object) && !(is.logical(object) && logical.ok)) {
        s.na   <- if(na.ok)       ", or NA"       else ""
        s.null <- if(null.ok)     ", or NULL"     else ""
        s.char <- if(char.ok.msg) ", or a string" else ""
        stopf("%s must be numeric%s%s%s%s (whereas its current class is %s)",
              tweak.name(object.name),
              s.null, s.na, s.char, s.logical,
              class.as.char(object, quotify=TRUE))
    } else if(length(object) != 1)
        stopf("the length of %s must be 1 (whereas its current length is %d)",
              tweak.name(object.name), length(object))
    if(!is.null(object) && !any.na) {
        if(!is.na(min) && !is.na(max) && (object < min || object > max)) {
            stop0(tweak.name(object.name, quote=FALSE), "=", object,
                  " but it should be between ", min, " and ", max)
        }
        if(!is.na(min) && object < min) {
            stop0(tweak.name(object.name, quote=FALSE), "=", object,
                  " but it should be at least ", min)
        }
        if(!is.na(max) && object > max) {
            stop0(tweak.name(object.name, quote=FALSE), "=", object,
                  " but it should not be greater than ", max)
        }
    }
    object
}
# We allow 20% of x to be nonpositive, useful if the response is essentially
# positive, but the predicted response has a few nonpositive values at the extremes.
# Needed for example if we will later take log(x) or sqrt(x).

check.that.most.are.positive <- function(x, xname, user.arg, non.positive.msg, frac.allowed=.2)
{
    check.numeric.scalar(frac.allowed)
    stopifnot(frac.allowed >= 0, frac.allowed <= 1)
    nonpos <- x <= 0
    if(sum(nonpos, na.rm=TRUE) > frac.allowed * length(x)) { # more than frac.allowed nonpos?
        ifirst <- which(nonpos)[1]
        stop0(sprint(
                "%s is not allowed because too many %ss are %s\n",
                user.arg, unquote(xname), non.positive.msg),
              sprint(
                "       %.2g%% are %s (%g%% is allowed)\n",
                100 * sum(nonpos) / length(x), non.positive.msg, 100 * frac.allowed),
               sprint("       e.g. %s[%d] is %g", unquote(xname), ifirst, x[ifirst]))
    }
}
check.vec <- function(object, object.name, expected.len=NA, logical.ok=TRUE, na.ok=FALSE)
{
    if(!(NROW(object) == 1 || NCOL(object) == 1))
        stop0(tweak.name(object.name), " is not a vector\n       ",
              "It has dimensions ", NROW(object), " by ", NCOL(object))
    if(!((logical.ok && is.logical(object)) || is.numeric(object)))
        stop0(tweak.name(object.name), " is not numeric")
    if(!is.na(expected.len) && length(object) != expected.len)
        stop0(tweak.name(object.name), " has the wrong length ",
              length(object), ", expected ", expected.len)
    if(na.ok)
        object[is.na(object)] <- 1 # prevent check is.finite from complaining
    else
        check(object, object.name, "NA", is.na)
    check(object, object.name, "non-finite value", function(object) {!is.finite(object)})
}
cleantry <- function(err) # clean up a try.err (remove "Error: " etc.)
{
    stopifnot(is.try.err(err))
    attributes(err) <- NULL
    err <- gsub("^[^:]*: *", "", err) # remove "Error: " (actually everything up to the first colon)
    err <- gsub("\n", " ", err, fixed=TRUE)  # remove newlines
    err <- gsub("  +", " ", err)             # multiple spaces to single spaces
    gsub(" $", "", err)                      # remove trailing space
}
# returns the column name, if that is not possible then something like x[,1]
colname <- function(object, i, object.name=trunc.deparse(substitute(object)))
{
    check.numeric.scalar(i)
    check.index(i, object.name, object, is.col.index=TRUE, allow.negatives=FALSE)
    colnames <- safe.colnames(object)
    if(!is.null(colnames))
        colnames[i]
    else if(NCOL(object) > 1)
        sprint("%s[,%g]", object.name, i)
    else
        sprint(object.name)
}
# if trace>0 or the func fails, then print the call to func

do.call.trace <- function(func, args,
                          fname=short.deparse(deparse(func), "FUNC"), trace=0)
{
    stopifnot(is.logical(trace) || is.numeric(trace), length(trace) == 1)
    # TODO commented out the following because it is too slow for big data
    # check.do.call.args(func, args, fname)
    trace <- as.numeric(trace)
    if(trace > 0)
        printf.wrap("%s(%s)\n", fname, list.as.char(args))
    try <- try(do.call(what=func, args=args), silent=TRUE)
    if(is.try.err(try)) {
        if(trace == 0) # didn't print call above? then print it now
            printf.wrap("\n%s(%s)\n\n", fname, list.as.char(args))
        else if(trace >= 2) # TODO is this best?
            printf("\n")
        # Re-call func so user can do a traceback within the function.  Note that
        # if do.call.trace was called with try, this will be caught by that try.
        # TODO is there a better way to achieve this, perhaps using tryCatch
        #      this could be confusing if func has side effects (unlikely)
        do.call(what=func, args=args)
        # should never get here
        stop0("second do.call(", fname,
              ", ...) did not give the expected error: ", try[1])
    }
    invisible(try) # TODO is invisible necessary?
}
# identical to base::eval() but has trace and expr.name arguments
eval.trace <- function(
    expr,
    envir     = parent.frame(),
    enclos    = if(is.list(envir) || is.pairlist(envir))
                   parent.frame()
                else
                   baseenv(),
    trace     = 0,
    expr.name = NULL)
{
    stopifnot(is.environment(envir))
    stopifnot(is.environment(enclos))
    if(trace >= 2)
        printf("eval(%s, %s)\n",
            if(is.null(expr.name)) trunc.deparse(substitute(expr))
            else                   expr.name,
            environment.as.char(envir))

    eval(expr, envir, enclos)
}
exp10 <- function(x) # e.g. exp10(-3) = 1e-3
{
    exp(x * log(10))
}
# This function is used for checking both xlim and ylim.
# This checks that lim is is a 2 element numeric vector.
# Also, if xlim[1] == xlim[2], then plot() issues a confusing message.
# We don't want that, so use this function to make sure xlim[2]
# is different to xlim[1].

fix.lim <- function(lim)
{
    if(!is.null(lim) && !inherits(lim, "Date")) {
        stopifnot(is.numeric(lim), length(lim) == 2)
        # constants below are arbitrary
        small <- max(1e-6, .01 * abs(lim[1] - lim[2]))
        if(abs(lim[2] - lim[1]) < small) # illegal lim?
            lim <- c(lim[1] - small, lim[2] + small)
    }
    lim
}
# Ensure all columns of x have column names.  Won't overwrite existing column names.

gen.colnames <- function(x, prefix="x", alt.prefix=prefix, trace=0)
{
    if(NCOL(x) == 0)
        return(NULL)

    # If prefix is long and has characters like ( or [ then use the
    # alternate prefix.  This is sometimes necessary when prefix is
    # generated using deparse and the arg is something like
    # "cbind(trees$Volume,trees$Volume+100)"

    if(any(nchar(prefix) > 30) && grepany("[([,]", prefix)) {
        trace2(trace, "using alt.prefix \"%s\" instead of prefix \"%s\"\n",
               alt.prefix, prefix)
        prefix <- alt.prefix
    }
    stopifnot(length(prefix) <= NCOL(x))
    prefix <- substr(prefix, 1, 60)
    new.colnames <-
        if(NCOL(x) == length(prefix))
            prefix
        else if(grepany("\\[", prefix))
            new.colnames <- paste0(prefix, "[", seq_len(NCOL(x)), "]")
        else
            new.colnames <- paste0(prefix, seq_len(NCOL(x)))
    colnames <- org.colnames <- colnames(x)
    if(is.null(colnames))
        colnames <- new.colnames
    else {
        missing <- !nzchar(colnames)
        if(any(missing))
            colnames[missing] <- new.colnames[missing]
    }
    if(length(unique(colnames)) != length(colnames))
        stop0("Duplicate colname in ", paste.trunc(prefix),
              " (colnames are ",
              paste.with.quotes(colnames, maxlen=60), ")")
    if(trace >= 2 && !identical(org.colnames, colnames))
        trace2(trace, "colname%s %s now %s\n",
            if(length(colnames) > 1) "s were" else " was",
            if(is.null(org.colnames)) "NULL"
            else paste.trunc(quotify(org.colnames)),
            paste.trunc(quotify(colnames)))
    colnames
}
get.mean.rsq <- function(rss, tss, wp)
{
    if(is.null(wp))
        wp <- repl(1, length(rss))
    stopifnot(length(rss) == length(tss), length(wp) == length(tss))
    total.rsq <- 0
    for(iresp in seq_along(rss))
        total.rsq <- total.rsq + wp[iresp] * get.rsq(rss[iresp], tss[iresp])
    sum(total.rsq) / sum(wp)
}
# Get the environment for evaluating the model data:
#   1. Return the environment in which the model function
#      was originally called.
#   2. Else if the model already has an attribute .Environment, use that.
#   3. Else return the environment in which the caller of this function
#      was called (e.g. return the environment of plotmo's caller).

get.model.env <- function(object, object.name="object", trace=0, use.submodel=FALSE)
{
    # check args, because this func is called very early in plotmo (and friends)
    check.numeric.scalar(trace, logical.ok=TRUE)
    if(trace >= 2) {
        callers.name <- callers.name()
        my.call <- call.as.char(n=2)
        printf.wrap("%s trace %g: %s\n", callers.name, trace, my.call)
        printf("--get.model.env for object with class %s\n", class.as.char(object))
    }
    stopifnot.string(object.name)
    if(is.null(object))
        stopf("argument %s is NULL", object.name)
    if(!is.list(object))
        stopf("%s is not an S3 model", object.name)
    if(class(object)[1] == "list") # some packages build models without a specific class
        stopf("%s is a plain list, not an S3 model", object.name)
    obj <- object

    # Special handling for parsnip models. Their class is like c("_earth", "model_fit").
    # For these models, use the env if any saved with the submod (e.g. earth)
    # (We don't do this for caret models because caret models have a terms field.)
    #
    # TODO this code is preliminary (works with parsnip 0.1.3)
    # and only works if model saves the data (e.g. lm, earth(keepxy=TRUE), not rpart
    if(use.submodel && inherits(object, "model_fit")) { # parsnip
        trace2(trace, "plotmo parsnip model: will plot %s$fit, not %s itself\n",
               gsub("'", "", object.name), object.name)
        obj <- object[["fit"]]
        if(!is.list(obj)) # sanity check
            stopf("plotmo parsnip model: %s$fit is not an S3 model",
                  gsub("'", "", object.name))
        # TODO following is temporary, hopefully
        if(inherits(obj, "rpart") && is.null(obj$model))
            stop0(
                "Cannot plot parsnip rpart model: need model=TRUE in call to rpart\n",
                "       Do it like this: set_engine(\"rpart\", model=TRUE)")
    }
    if(trace >= 2) {
        call <- getCall(obj)
        if(is.null(call))
            printf("object has no call field (it's class is %s)\n", class.as.char(object))
        else
            printf.wrap("object call is %s\n", strip.deparse(call), maxlen=120)
    }
    terms <- try(terms(obj), silent=trace < 3)

    # Following will fail (correctly) for non-formula models because they have no terms.
    #
    # TODO Also, if use.submodel, don't use terms (because the term env was
    #      inside the parsnip func that created the submodel)
    #      But that also fails later when we eval the formula because
    #      eval will use GlobalEnv instead of the data passed to the model

    if(!is.null(terms) && !is.try.err(terms)) {
        model.env <- attr(terms, ".Environment")
        if(is.null(model.env)) {
            if(inherits(obj, "glmnet.formula") ||  # glmnetUtils package
               inherits(obj, "cv.glmnet.formula"))
                if(inherits(obj, "glmnet.formula"))
                    stop0(
"for this plot, glmnet.formula must be called with use.model.frame=TRUE")
                if(inherits(obj, "cv.glmnet.formula"))
                    stop0(
"for this plot, cv.glmnet.formula must be called with use.model.frame=TRUE")
            stop0("attr(terms, \".Environment\") is NULL")
        }
        if(!is.environment(model.env))
            stop0("attr(terms, \".Environment\") is not an environment")
        else {
            trace2(trace, "using the environment saved in $terms of the %s model: %s\n",
                   class.as.char(obj), environment.as.char(model.env))
            return(model.env)
        }
    }
    model.env <- attr(obj, ".Environment")
    if(is.environment(model.env)) {
        trace2(trace, "using attr(obj,\".Environment\") saved with %s model: %s\n",
               class.as.char(obj), environment.as.char(model.env))
        return(model.env)
    }
    if(!is.null(model.env))
        stop0("attr(obj, \".Environment\") is not an environment")

    # n=2 is the caller of the function that called get.model.env
    # for plotmo it will be the caller of plotmo, typically R_GlobalEnv
    model.env <- parent.frame(n=2)

    trace2(trace,
           "assuming the environment of the %s model is that of %s's caller: %s\n",
           class.as.char(obj), callers.name, environment.as.char(model.env))

    model.env
}
get.rsq <- function(rss, tss)
{
    rsq <- 1 - rss / tss
    # following makes testing easier across machines in presence of numerical error
    rsq[rsq > -1e-5 & rsq < 1e-5] <- 0
    rsq
}
get.weighted.rsq <- function(y, yhat, w=NULL) # NAs will be dropped before calc
{
    stopifnot(length(y) > 0, length(y) == length(yhat))
    if(is.null(w)) {
        is.na <- is.na(y) | is.na(yhat)
        y    <- y[!is.na]
        yhat <- yhat[!is.na]
        if(length(y) == 0)
            stop0("length(y) == 0 after deleting NAs in y or yhat")
        rss <- sos(y - yhat)
        tss <- sos(y - mean(y))
    } else {
        stopifnot(length(w) == length(yhat))
        is.na <- is.na(y) | is.na(yhat) | is.na(w)
        y    <- y[!is.na]
        yhat <- yhat[!is.na]
        w    <- w[!is.na]
        if(length(y) == 0)
            stop0("length(y) == 0 after deleting NAs in y or yhat or w")
        rss <- sos(y - yhat, w)
        tss <- sos(y - weighted.mean(y, w), w)
    }
    get.rsq(rss, tss)
}
# TRUE if pattern is in any of the strings in x
grepany <- function(pattern, x, ignore.case=FALSE, ...)
{
    any(grepl(pattern, x, ignore.case=ignore.case, ...))
}
# scalar form of ifelse, with short name :-)
# only evaluates the "no" argument if necessary
ife <- function(ife.test, ife.yes, ife.no)
{
    ife.test <- check.boolean(ife.test)
    stopifnot(!missing(ife.yes))
    stopifnot(!missing(ife.no))
    if(ife.test) ife.yes else ife.no
}
# returns an index, choices is a vector of strings
imatch.choices <- function(arg, choices,
            argname=short.deparse(substitute(arg), "function"),
            errmsg.has.index=FALSE, # TRUE if integer "arg" is legal elsewhere
            errmsg="",              # error message, "" for automatic
            errmsg.ext="")          # extension to error message
{
    errmsg.ext <- paste0(
               if(errmsg.has.index) " an integer index or" else "",
               if(nchar(errmsg.ext)) paste0(" ", errmsg.ext, " or") else "")
    if(nchar(errmsg) == 0)
        errmsg <- sprint("Choose%s one of: %s", errmsg.ext, quotify(choices))
    if(!is.character(arg) || length(arg) != 1 || !nzchar(arg))
         stopf("illegal %s argument\n%s", quotify(argname, "'"), errmsg)
    if(argname %in% c("NULL", "NA"))
        argname <- "argument"
    imatch <- pmatch(arg, choices)
    if(anyNA(imatch)) {
        imatch <- NULL
        for(i in seq_along(choices))
            if(pmatch(arg, choices[i], nomatch=0))
                imatch <- c(i, imatch)
        if(length(imatch) == 0) {
            if(length(choices) == 1)
                stopf("%s=\"%s\" is not allowed\n       Only%s %s is allowed",
                      argname, paste(arg), errmsg.ext, quotify(choices))
            else
                stopf("%s=\"%s\" is not allowed\n%s",
                      argname, paste(arg), errmsg)
        }
        if(length(imatch) > 1)
            stopf("%s=\"%s\" is ambiguous\n%s",
                  argname, paste(arg), errmsg)
    }
    imatch
}
# TRUE if all values in object are integers, ignoring NAs
# assumes object is numeric or logical (check this before call this function)
is.integral <- function(object)
{
    object <- object[!is.na(object)]

    length(object) > 0 &&
    is.null(dim(object)) && # prevent error in floor for e.g. survival objects
    all(floor(object) == object)
}
# is.specified's main purpose is to see if a plot component should be
# drawn, i.e., to see if the component "has a color"

is.specified <- function(object)
{
    try <-
      try(!is.null(object) && !anyNA(object) && !is.zero(object) &&
          # following needed for e.g. col=c("red", 0) because 0 is converted to string
          !identical(object, "0") && !identical(object, "0L") &&
          !identical(object, "NA"), silent=FALSE)
    if(is.try.err(try)) {
        # this occurs if object is say a closure and anyNA fails
        # anyNA was introduced in R 3.1.0
        printf("\n") # separate from any message printed by try() above
        stop0(deparse(substitute(object)), ": illegal value")
    }
    try
}
is.try.err <- function(object)
{
    class(object)[1] == "try-error"
}
is.zero <- function(object) # needed because identical(object, 0) fails if object is 0L
{
    identical(object, 0) || identical(object, 0L)
}
# Lighten color by amount 0 ... 1 where 1 is white.
# If amount is negative, then darken the color, -1 is black.

lighten <- function(col, lighten.amount, alpha=1)
{
    # stopifnot.scalar(lighten.amount)
    # stopifnot(lighten.amount >= -1 && lighten.amount <= 1)
    rgb <- col2rgb(col) / 255
    # empirically, sqrt makes visual effect of lighten.amount more linear
    lighten.amount2 <- sqrt(abs(lighten.amount))
    rgb <-
        if(lighten.amount > 0)
            rgb + lighten.amount2 * (c(1,1,1) - rgb) # move each r,g,b towards 1
        else # darken
            rgb - lighten.amount2 * rgb              # move each r,g,b towards 0
    rgb[rgb < 0] <- 0 # clamp
    rgb[rgb > 1] <- 1
    if(alpha == 1)
        rgb(rgb[1,], rgb[2,], rgb[3,])
    else
        rgb(rgb[1,], rgb[2,], rgb[3,], alpha)
}
# returns the expanded arg (error msg if arg is not an allowed choice in calling func)
match.arg1 <- function(arg, argname=deparse(substitute(arg)))
{
    formal.args <- formals(sys.function(sys.parent()))
    formal.argnames <- eval(formal.args[[argname]])
    formal.argnames[imatch.choices(arg[1], formal.argnames, argname)]
}
# returns a string, choices is a vector of strings
# error msg if arg is not an allowed choice
match.choices <- function(arg,
            choices,
            argname=deparse(substitute(arg)),
            errmsg="",              # error message ("" for automatic)
            errmsg.ext="")          # extension to error message
{
    choices[imatch.choices(arg, choices, argname,
                           errmsg=errmsg, errmsg.ext=errmsg.ext)]
}
# This uses the object's .Environment attribute, which was
# pre-assigned to the object via get.model.env
# If this gives an error saying that class(model.env) is "NULL"
# then that pre-assignment wasn't done.

model.env <- function(object)
{
    model.env <- attr(object, ".Environment")
    if(!is.environment(model.env))
        stopf("class(model.env) is \"%s\"", class(model.env)[1])
    model.env
}
# Like as.data.frame() but retains the original colnames, if any, and can
# handle matrices from the Matrix etc. packages, if as.matrix() works for
# them.  Also it has a stringsAsFactors argument which works even if x is
# already a data.frame.

my.data.frame <- function(x, trace, stringsAsFactors=TRUE)
{
    if(is.data.frame(x)) {
        if(stringsAsFactors) {
            # Convert any character columns to factors.  Note as.data.frame
            # won't do this for us when x is already a data.frame.
            # We don't have a levels argument to pass to factor()
            # but I believe that this will not be a problem in the
            # context in which we use my.data.frame (plotmo_x).
            for(i in seq_len(length(x)))
                if(is.character(x[[i]]))
                    x[[i]] <- factor(x[[i]])
            }
       return(x)
    }
    df <- try(as.data.frame(x, stringsAsFactors=stringsAsFactors), silent=TRUE)
    if(is.try.err(df)) {
        # come here for sparse matrices from the Matrix package
        df <- try(as.matrix(x))
        if(is.try.err(df))
            stopf("Cannot convert %s object to a data.frame or matrix",
                  quotify(class(x)[1]))
        df <- as.data.frame(df, stringsAsFactors=stringsAsFactors)
        trace2(trace, "converted %s object to data.frame\n", class(x)[1])
    }
    colnames(df) <- safe.colnames(x) # restore original column names
    df
}
# default min.nrow=3 to use fixed point only if more than intercept and one other term
my.fixed.point <- function(x, digits, min.nrow=3)
{
    if(is.null(dim(x)))
        x <- as.matrix(x)
    if(NROW(x) >= min.nrow)
        x <- apply(x, 2, zapsmall, digits+1)
    x
}
# If s is a string vector s, return the number of lines in
# the element that has the most lines
# Examples: nlines(c(" ", " \n ") is 2
#           nlines(c(" ", " \n")  is 2
#           nlines(" ")           is 1
#           nlines("")            is 0  (special case)

nlines <- function(s)
{
    if(!nzchar(s[1])) # special case, caption="" is not printed
        0
    else if(anyNA(s))
        0
    else
        length(strsplit(s, "\n")[[1]])
}
paste.c <- function(object, maxlen=16) # return 'x1' or 'c(x1, x2)'
{
    if(length(object) == 1)
        paste.trunc(object)
    else
        paste0("c(", paste.trunc(object, collapse=",", maxlen=maxlen), ")")
}
paste.with.quotes <- function(object, maxlen=16) # return '"x1"' or '"x1", "x2"'
{
    if(is.null(object[1]))
        "NULL"
    else if(length(object) == 0)
        "EMPTY"
    else
        paste0(paste.trunc("\"", object, "\"", collapse=", ", sep="", maxlen=maxlen))
}
paste.collapse <- function(...)
{
    paste(..., collapse=" ")
}
# collapse, and truncate if strings in ... are too long
paste.trunc <- function(..., sep=" ", collapse=" ", maxlen=60)
{
    s <- paste(..., sep=sep, collapse=collapse)
    if(nchar(s) > maxlen) {
        stopifnot(maxlen > 3)
        s <- paste0(substr(s, 1, maxlen-3),
                    if(substr(s, maxlen-3, maxlen-3) == ".") ".." # avoid 4 dots
                    else                                     "...")
    }
    s
}
pastef <- function(s, fmt, ...) # paste the printf style args to s
{
    paste0(s, sprint(fmt, ...))
}
print_first_few_elements_of_vector <- function(x, trace, name=NULL)
{
    try(cat(" min", min(x), "max", max(x)), silent=TRUE)
    spaces <- "               "
    if(!is.null(name))
        spaces <- sprint("%*s", nchar(name), " ")  # nchar spaces
    cat0("\n", spaces, " value")
    len <- if(trace >= 4) length(x)
           else           min(if(is.logical(x)) 20 else 10, length(x))
    if(is.logical(x))
        for(i in 1:len)
            cat0(if(x[i]) " T" else " F")
    else
        for(i in 1:len)
            cat0(" ", x[i])
    if(length(x) > len)
        cat(" ...")
   cat("\n")
    if(trace >= 4) {
        cat("\n")
        print(summary(x))
    }
}
# A safe version of sprintf.
# Like sprintf except that %s on NULL prints "NULL" rather than
# preventing the entire string from being printed
#
# e.g. sprintf("abc %s def", NULL) returns an empty string -- a silent failure!
# but   sprint("abc %s def", NULL) returns "abc NULL def"
#
# e.g. sprintf("abc %d def", NULL) returns an empty string!
# but   sprint("abc %d def", NULL) causes an error msg (not a silent failure)

sprint <- function(fmt, ...)
{
    dots <- list(...)
    dots <- lapply(dots, function(e) if(is.null(e)) "NULL" else e)
    do.call(sprintf, c(fmt, dots))
}
printf <- function(fmt, ...) # like c printf
{
    cat(sprint(fmt, ...), sep="")
}
# like printf but wrap at terminal width
# exdent=NULL for automatic determination of xdent (line up to func opening paren)
# TODO maxlen seems to be ignored, strwrap truncates before that?
printf.wrap <- function(fmt, ..., exdent=NULL, maxlen=2000)
{
    s <- paste.trunc(paste.collapse(sprint(fmt, ...)), maxlen=maxlen)
    if(is.null(exdent)) {
        # align to opening paren of func call e.g. "graphics::par(xxx)" or "foo$method("
        # TODO this doesn't account for leading newlines if any
        exdent <- 4
        igrep <- gregexpr("[ ._$:[:alnum:]]+\\(", s)[[1]]
        if(igrep[1] == 1) {
            len <- attr(igrep, "match.length")[1]
            exdent <- min(25, len)
        }
    }
    # strwrap doesn't preserve newlines in the input string, so do it manually :(
    for(i in seq_len(nchar(s))) # print leading newlines
        if(substr(s, i, i) == "\n") cat0("\n") else break

    cat(paste0(strwrap(s, width=getOption("width"), exdent=exdent),
               collapse="\n"))

    if(nchar(s) > i) for(j in nchar(s):i) # print trailing newlines
        if(substr(s, j, j) == "\n") cat0("\n") else break
}
pt.cex <- function(ncases, npoints=ncases)
{
    n <- if(npoints > 0) min(npoints, ncases) else ncases

    if     (n >= 20000) .2
    else if(n >=  5000) .3
    else if(n >=  3000) .4
    else if(n >=  1000) .6
    else if(n >=  300)  .8
    else if(n >=  30)   1
    else                1.2
}
# like short.deparse but quotify the deparsed obj (unless the alternative is used)
quote.deparse <- function(object, alternative="object")
{
    s <- strip.deparse(object)
    if(nchar(s) > 60)
        alternative
    else
        quotify(s, quote="'")
}
quote.with.c <- function(names) # return "x" or c("x1", "x2")
{
    if(length(names) == 1)
        sprint("\"%s\"", names)
    else
        sprint("c(%s)", paste0("\"", paste(names, collapse="\", \""), "\""))
}
quotify <- function(s, quote="\"") # add quotes and collapse to a single string
{                                  # called quotify because quote is taken
    if(is.null(s))
        "NULL"
    else if(length(s) == 0)
        paste0(quote, quote)
    else if(substr(s[1], 1, 1) == "'" || substr(s[1], 1, 1) == "\"")
        paste.collapse(s) # already has quotes
    else
        paste0(quote, paste(s, collapse=paste0(quote, " ", quote)), quote)
}
# like quotify, but use the alternative name if s is too long
quotify.short <- function(s, alternative="object", quote="\"")
{
    stopifnot(is.character(s))
    s <- paste0(s, collapse="")
    if(nchar(s) > 60) # 60 is arb but seems ok for plot titles etc
        alternative
    else
        quotify(s, quote)
}
quotify.trunc <- function(s, quote="\"", maxlen=60)
{
    stopifnot(is.character(s))
    s <- quotify(s, quote)
    if(nchar(s) > maxlen) {
        stopifnot(maxlen > 3)
        paste0(substr(s, 1, maxlen-3), "...")
    } else
        s
}
range1 <- function(object, ...)
{
    stopifnot(length(dim(object)) <= 2)
    if(!is.null(dim(object)))
        object <- object[,1]
    if(is.factor(object))
        c(1, nlevels(object))
    else if(inherits(object, "Date")) # Sep 2020, R 4.0.2: range no longer works with Date objects
        c(min(object), max(object))
    else
        range(object, finite=TRUE, ...)
}
recycle <- function(object, ref.object)
{
    repl(object, length.out=length(ref.object))
}
repl <- function(object, length.out)
{
    # following "if" added for R-2.15.3 otherwise
    # get warning: 'x' is NULL so the result will be NULL
    if(is.null(object))
        return(NULL)
    check.numeric.scalar(length.out)
    stopifnot(floor(length.out) == length.out)
    stopifnot(length.out > 0)
    rep(object, length.out=length.out)
}
# the standard colnames() can crash for certain objects
# TODO figure out when and why

safe.colnames <- function(object)
{
    colnames <- try(colnames(object), silent=TRUE)
    if(is.try.err(colnames))
        NULL
    else
        colnames
}
# if deparse(object) is too long, return the alternative
short.deparse <- function(object, alternative="object")
{
    s <- strip.deparse(object)
    if(nchar(s) > 60)
        alternative
    else
        s
}
# Remove duplicates in x, then sort (smallest first).
# Also works for Dates.
sort_unique <- function(x)
{
    sort(unique(x), na.last=NA) # na.last=NA drops NAs
}
sos <- function(x, weights=NULL) # sum of squares
{
    if(is.null(weights))
        sum(as.vector(x^2))
    else {
        stopifnot(length(weights) == length(x))
        sum(weights * as.vector(x^2))
    }
}
stop0 <- function(...)
{
    stop(..., call.=FALSE)
}
stopf <- function(fmt, ...) # args like printf
{
    stop(sprint(fmt, ...), call.=FALSE)
}
# stop if s is not a one element character vector
stopifnot.string <- function(s, name=short.deparse(substitute(s)),
                             null.ok=FALSE, allow.empty=FALSE)
{
    if(name %in% c("NULL", "NA"))
        name <- "argument"
    if(is.null(s)) {
        if(null.ok)
            return()
        else
            stop0(quotify(name, "'"), " is NULL (it should be a string)")
    }
    if(!is.character(s))
        stop0(quotify(name, "'"), " is not a character variable (class(",
              name, ") is \"", class(s), "\")")
    if(length(s) == 0)
        stop0(quotify(name, "'"), " is empty (it has no elements)")
    if(length(s) != 1)
        stop0(quotify(name, "'"), " has more than one element\n       ",
              name, " = c(", paste.trunc("\"", s, "\"", sep=""), ")")
    if(!allow.empty && !nzchar(s))
        stop0(quotify(name, "'"), " is an empty string")
}
strip.deparse <- function(object) # deparse, collapse, remove most white space
{
    s <- strip.space.collapse(deparse(object))
    gsub(",", ", ", s) # put back space after commas
}
strip.space <- function(s)
{
    gsub("[ \t\n]", "", s)
}
strip.space.collapse <- function(s) # returns a single string
{
    gsub("[ \t\n]", "", paste(s, collapse="")) # paste converts vec to single
}
# like text, but with a white background
# TODO sign of adj is backwards?

text.on.white <- function(x, y, label,
                          cex=1, adj=.5, font=1, xmar=.3, srt=0, white="white", ...)
{
    stopifnot(length(label) == 1)
    if(length(adj) == 1)
        adj <- c(adj, .5)
    width       <- strwidth(label,  cex=cex, font=font)
    char.width  <- strwidth("X",    cex=cex, font=font)
    height      <- strheight(label, cex=cex, font=font)
    char.height <- strheight("X",   cex=cex, font=font)
    if(srt == 0) {
        if(is.specified(label))
            rect(x - adj[1]     * width  - xmar * char.width,
                 y - adj[2]     * height - .3   * char.height, # .3 for extra space at bottom
                 x + (1-adj[1]) * width  + xmar * char.width,
                 y + (1-adj[2]) * height + .1   * char.height,
                 col=white, border=NA)
        text(x=x, y=y, labels=label, cex=cex, adj=adj, font=font, ...)
    } else if(srt == 90 || srt == -90) {
        # width and height are in usr coords, adjust these for flip of coords
        usr <- par("usr") # xmin, xmax, ymin, ymax
        xrange <- abs(usr[2] - usr[1])
        yrange <- abs(usr[4] - usr[3])
        height <- xrange / yrange * height
        width  <- yrange / xrange * width
        char.height <- xrange / yrange * char.height
        char.width  <- yrange / xrange * char.width
        if(is.specified(label))
            rect(x + (1-adj[1]) * height,                    # left
                 y + (1-adj[2]) * width + xmar * char.width, # bottom
                 x - adj[1]     * height,                    # right
                 y - adj[2]     * width - xmar * char.width, # top
                 col=white, border=NA)
        text(x=x, y=y, labels=label, cex=cex, adj=adj, font=font, srt=srt, ...)
    }
    else
        stop0("srt=", srt, " is not allowed (only 0, 90, and -90 are supported)")
}
to.logical <- function(object, len) # object can be a boolean or numeric vector
{
    xlogical <- repl(FALSE, len)
    xlogical[object] <- TRUE
    xlogical
}
trace0 <- function(trace, fmt, ...)
{
    stopifnot(!(is.numeric(trace) && is.logical(trace)))
    if(trace >= 0)
        cat(sprint(fmt, ...), sep="")
}
trace1 <- function(trace, fmt, ...)
{
    stopifnot(!(is.numeric(trace) && is.logical(trace)))
    if(trace >= 1)
        cat(sprint(fmt, ...), sep="")
}
trace2 <- function(trace, fmt, ...)
{
    stopifnot(is.numeric(trace))
    if(trace >= 2)
        cat(sprint(fmt, ...), sep="")
}
# Truncate deparse(object) if it is too long.
# Necessary because deparse(substitute(x)) might return something very
# long, like c(1000, 1001, 1002, 1003, 1004, 1005, 1006, 1008, 1009, etc.)
# Return a one element character vector.

trunc.deparse <- function(object, maxlen=60)
{
    s <- strip.deparse(object)
    if(nchar(s) > maxlen) {
        stopifnot(maxlen > 3)
        paste0(substr(s, 1, maxlen-3), "...")
    } else
        s
}
# Return the number of lines in s (where lines are separated by \n).
try.eval <- function(
    expr,
    envir     = parent.frame(),
    trace     = 0,
    expr.name = NULL,
    silent    = trace < 2)
{
    if(trace && is.null(expr.name))
        expr.name <- trunc.deparse(substitute(expr))
     try(eval.trace(expr, envir, trace=trace, expr.name=expr.name), silent=silent)
}
unquote <- function(s) # remove leading and trailing quotes, if any
{
    if(is.character(s)) {
        s <- gsub("^\"|^'", "", s)  # leading quotes
        s <- gsub("\"$|'$", "", s)  # trailing quotes
    }
    s
}
# warn.if.not.all.finite helps preempt confusing message from code later.
# Return TRUE if warning issued.

warn.if.not.all.finite <- function(object, text="unknown")
{
    is.factors <- sapply(object, is.factor)
    if(any(is.factors)) {
        if(NCOL(object) == 1 || all(is.factors)) # TODO suspect
            return(FALSE)
        object <- object[, !is.factors]  # remove factor columns before is.finite check
    }
    if(any(sapply(object, is.na))) {
        warning0("NA in ", text)
        return(TRUE)
    }
    if(!all(sapply(object, is.finite))) {
        warning0("non finite value in ", text)
        return(TRUE)
    }
    FALSE
}
warnf <- function(fmt, ...) # args like printf
{
    warning(sprint(fmt, ...), call.=FALSE)
}
warning0 <- function(...)
{
    warning(..., call.=FALSE)
}
# Binomial pairs response: fraction true for each row.
#
# This function is used by both earth and plotmo.
# If you change it here, change it there too.
#
# The first column of y is considered to be "true", the second "false".
#
# Example y:
#   survived died
#      1        1
#      0        0   # both values zero
#      3        4
#
# becomes:
#   survived
#         .5        # 1 / (1 + 1)
#          0        # special case (both survived and died equal to 0)
#        .43        # 3 / (3 + 4)

bpairs.yfrac <- function(y, trace)
{
    stopifnot(NCOL(y) == 2)
    both.zero <- (y[,1] == 0) & (y[,2] == 0)
    y[both.zero, 2] <- 1 # so zero rows will be translated to 0 in next line
    yfrac <- y[, 1, drop=FALSE] / (y[,1] + y[,2]) # fraction true
    trace.bpairs.yfrac(yfrac, trace)
    yfrac
}
trace.bpairs.yfrac <- function(yfrac, trace)
{
    # based on code in print.earth.fit.args
    if(trace >= 4)
        cat("\n")
    if(trace >= 1 && trace < 7) { # don't print matrices when doing very detailed earth.c tracing
        tracex <- if(trace >= 5) 4 else 2 # adjust trace for print_summary
        details <- if(trace >= 4) 2 else if(trace >= 1) -1 else 0
        print_summary(yfrac, "yfrac", tracex, details=details)
        if(details > 1) printf("\n")
    }
}