Nothing
# xy.R: get a model's x or y (the plotmo_x and plotmo_y functions)
#
# Tracing is verbose and error messages are detailed throughout this
# file, to facilitate diagnosis when a model doesn't work with plotmo.
#------------------------------------------------------------------------------
# Return the "x" matrix for a model. This returns a data.frame which
# always has column names. It tries hard to get x regardless of the model.
# It can be used for models without a formula, provided that getCall(object)
# or model$x is available.
#
# The returned columns are for the "naked" predictors e.g. "x3" instead of
# "ns(x3,4)". Column names are manufactured when necessary, as "x1",
# "x2", etc. This is needed for example for rpart(x,y) where x does not
# have column names.
#
# It can handle sparse matrices from the Matrix package. These get
# returned as a (non sparse) data.frame.
#
# If stringsAsFactors=FALSE, strings do not get converted to factors.
plotmo_x <- function(object, trace, stringsAsFactors=TRUE)
{
trace2(trace, "--plotmo_x for %s object\n", class.as.char(object))
x <- plotmo.x(object, trace)
do.subset <- TRUE
# plotmo.x.default returns list(field, do.subset), so handle that
if(is.list(x) && !is.data.frame(x) && !is.null(x$do.subset)) {
do.subset <- check.boolean(x$do.subset)
x <- x$field
}
# Following are mainly for when plotmo.x didn't invoke plotmo.x.default.
# It shouldn't be needed but is included here to make sure.
x <- cleanup.x.or.y(object, x, "x", trace, check.naked=FALSE)
stopifnot(is.good.data(x, "plotmo_x", check.colnames=FALSE))
x <- my.data.frame(x, trace, stringsAsFactors)
if(do.subset) {
subset <- get.and.check.subset(x, object, trace)
if(!is.null(subset)) {
trace2(trace, "subset applied to x[%d,%d] ", NROW(x), NCOL(x))
x <- x[subset, , drop=FALSE]
trace2(trace, "to yield x[%d,%d]\n", NROW(x), NCOL(x))
}
}
colnames(x) <- gen.colnames(x, "x", "x", trace)
print_summary(x, "plotmo_x returned", trace)
x
}
plotmo.x <- function(object, trace, ...)
{
# returns x or list(field=x, do.subset=do.subset)
UseMethod("plotmo.x")
}
plotmo.x.default <- function(object, trace, ...)
{
# returns list(field=x, do.subset=do.subset)
get.x.or.y(object, "x", trace, naked=TRUE)
}
# plotmo_y is similar to model.response but can handle models
# that were created without a formula.
#
# For more details on the args and return value, see process.y.
# If nresponse is not NULL we return the naked response variables
# e.g. Volume not log(Volume).
#
# If convert.glm.response=TRUE and the model is a glm model we may
# convert the response. See convert.glm.response() for details.
plotmo_y <- function(object, nresponse=NULL, trace=0,
expected.len=NULL, resp.levs=NULL,
convert.glm.response=!is.null(nresponse))
{
trace2(trace, "--plotmo_y with nresponse=%s for %s object\n",
if(is.null(nresponse)) "NULL" else format(nresponse),
class.as.char(object))
y <- plotmo.y(object, trace, naked=FALSE, expected.len, nresponse)
do.subset <- TRUE
# plotmo.y.default returns list(field, do.subset), so handle that
if(is.list(y) && !is.data.frame(y) && !is.null(y$do.subset)) {
do.subset <- check.boolean(y$do.subset)
y <- y$field
}
if(convert.glm.response)
y <- convert.glm.response(object, y, trace)
if(do.subset) {
subset <- get.and.check.subset(y, object, trace)
if(!is.null(subset)) {
trace2(trace, "subset applied to y[%d,%d] ", NROW(y), NCOL(y))
y <- if(is.null(dim(y))) y[subset] else y[subset, , drop=FALSE]
trace2(trace, "to yield y[%d,%d]\n", NROW(y), NCOL(y))
}
}
process.y(y, object, type="response", nresponse,
expected.len, resp.levs, trace, "plotmo_y")
}
# Note that the naked argument is irrelevant unless the response was
# specified with a wrapper function like log(Volume) instead of plain Volume.
#
# The default for nresponse allows this to work with old versions of earth
# (old plotmo.y.earth doesn't have a nresponse argument).
plotmo.y <- function(object, trace, naked, expected.len, nresponse=1, ...)
{
# returns y or list(field=y, do.subset=do.subset)
UseMethod("plotmo.y")
}
plotmo.y.default <- function(object, trace, naked, expected.len, ...)
{
# returns list(field=y, do.subset=do.subset)
get.x.or.y(object, "y", trace, try.object.x.or.y=TRUE,
argn=2, nrows.argn=expected.len, naked)
}
# Get x or y from the given model object
# Returns list(field=x, do.subset=do.subset) where x is "x" or "y".
get.x.or.y <- function(
object, # the model
field, # "x" or "y"
trace,
try.object.x.or.y=TRUE, # FALSE if object[[field]] should be ignored
argn=0, # if nonzero, consider argument nbr argn of the model call
nrows.argn=NULL, # expected NROWS of argument argn
naked=TRUE) # TRUE to return colnames like "x3" not "ns(x3,4)"
{
ret.good.field <- function(x, do.subset=TRUE, source)
{
if(trace.call.global >= 1 && field == "y") {
field <- if(field == "x") "predictors" else "response"
if(grepl("model.frame(", source, fixed=TRUE))
source <- sub(",", # insert newline after first comma
if(field == "response")
",\n "
else
",\n ", source)
printf("got model %s from %s\n", field, source)
}
list(field=x, do.subset=do.subset)
}
stopifnot(is.list(object))
stopifnot(field == "x" || field == "y")
# try using object$x (where x is actually x or y throughout this file)
object.x <- get.object.x.or.y.field(object, field, trace, try.object.x.or.y, naked)
# object.x is object$x or NULL or an err msg
if(is.good.data(object.x))
return(ret.good.field(object.x, FALSE, sprint("object$%s", field)))
call <- getCall(object)
if(!is.null(call))
trace2(trace, "\nobject call is %s\n", trunc.deparse(call, maxlen=80))
# try getting x or y from the model formula and model frame
temp <- get.x.or.y.from.model.frame(object, field, trace, naked)
model.frame.x <- temp$x
do.subset <- temp$do.subset # TRUE when newdata is NULL
source <- temp$source
# model.frame.x is now x or y or NULL or an err msg
if(is.good.data(model.frame.x)) {
formula.as.char <- paste.collapse(format(temp$formula))
if(naked && grepl("\`", formula.as.char)) { # exception for hinge funcs etc
trace2(trace, "setting check.naked=FALSE because backtick in formula\n")
naked <- FALSE
}
model.frame.x <- cleanup.x.or.y(object, model.frame.x, field, trace,
check.naked=naked && field != "y")
if(!is.errmsg(model.frame.x))
return(ret.good.field(model.frame.x, do.subset, source))
}
# try getCall(object)$x
call.x <- get.data.from.object.call.field(object, field, trace)
# call.x is getCall(object)$x or an error message
if(is.good.data(call.x))
return(ret.good.field(call.x, TRUE, sprint("getCall(object)$%s", field)))
# else { # TODO may not want to do this if x is ok except for no colnames
# # try getCall(object)$X (note upper case "X")
# upfield <- toupper(field)
# call.x <- get.data.from.object.call.field(object, upfield, trace)
# # call.x is getCall(object)$X or an error message
# if(is.good.data(call.x)) {
# # paranoia, check that argument number is correct
# ifield <- if(field == "x") 2 else 3
# ok <- names(getCall(object))[ifield] == upfield
# if(!is.na(ok) && length(ok == 1) && ok)
# return(ret.good.field(call.x, TRUE,
# sprint("getCall(object)$%s", upfield)))
# else if(trace >= 2)
# printf("ignoring getCall(object)$%s because it isn't arg number %d\n",
# upfield, ifield)
# }
# }
trace2(trace, "\n")
# consider argument number argn of the model call (ignoring its name)
temp <- get.argn.from.call(argn, object, field, trace, nrows.argn)
argn.x <- temp$x
argn <- temp$argn # may clear argn (for uncluttered errmsg later)
# argn.x is the evaluated n'th arg or NULL or an err msg
argn.name <- sprint("argument %g of the model call", argn)
if(is.good.data(argn.x))
return(ret.good.field(argn.x, TRUE, argn.name))
# We don't have an x with colnames, so see if we have one without colnames.
# We re-call is.errmsg() below to prevent re-issuing messages
# in is.good.data() which we have already issued previously.
if(try.object.x.or.y &&
!is.errmsg(object.x) &&
is.good.data(object.x, sprint("object$%s", field),
trace, check.colnames=FALSE))
return(ret.good.field(object.x, FALSE, sprint("object$%s", field)))
if(!is.errmsg(call.x) &&
is.good.data(call.x, sprint("call$%s", field),
trace, check.colnames=FALSE))
return(ret.good.field(call.x, TRUE, sprint("getCall(object)$%s", field)))
if(argn && !is.errmsg(argn.x) &&
is.good.data(argn.x, argn.name, trace, check.colnames=FALSE))
return(ret.good.field(argn.x, TRUE, sprint("object$%s", field)))
# unsuccessful
errmsg.for.get.x.or.y(field, trace,
try.object.x.or.y, argn, object.x,
model.frame.x, call.x, argn.x)
is.earth.cv.model <- is.null(object.x) &&
!is.null(object$ifold) &&
inherits(object, "earth")
stopf("cannot get the original model %s%s",
if(field == "x") "predictors" else "response",
if(is.earth.cv.model) " (use keepxy=2 in the call to earth)" else "")
}
is.errmsg <- function(x)
{
is.try.err(x) || (is.character(x) && length(x) == 1)
}
# Is the x argument a valid x or y for a model?
# This returns TRUE or FALSE, silently unless trace >= 2.
is.good.data <- function(x, xname="field", trace=0, check.colnames=TRUE)
{
good <- !is.null(x) && !is.try.err(x) && NROW(x) >= 3
has.colnames <- good && !is.null(colnames(x)) && !any(colnames(x) == "")
if(trace >= 2)
trace.data(good, has.colnames, x, xname, trace, check.colnames)
good && (!check.colnames || has.colnames)
}
trace.data <- function(good, has.colnames,
x, xname, trace, check.colnames)
{
stopifnot.string(xname)
colnames.msg <-
if(good && has.colnames) {
sprint(" and has column name%s %s",
if(length(colnames(x)) == 1) "" else "s",
paste.trunc(colnames(x), maxlen=100))
} else if(good)
sprint(" but without colnames %s",
if(check.colnames) "so we will keep on searching"
else "but we will use it anyway")
else
""
if(good)
printf("%s is usable%s\n", xname, colnames.msg)
else if(is.null(x))
printf("%s is NULL%s\n", xname,
if(check.colnames) " (and it has no colnames)" else "")
else if(!is.character(x) && NROW(x) < 3)
printf("%s has less than three rows\n", xname,
if(check.colnames) " (and it has no colnames)" else "")
else
printf("%s is not usable%s\n", xname, colnames.msg)
# print bad data, but only on the first go around for this data
# (use check.colnames as an indicator of first go around)
if(!is.null(x) && check.colnames) {
if(!good)
printf("%s:%s\n", xname, format.err.field(x, xname, trace))
else if(trace >= 4) {
printf("trace>=4: ")
print_summary(x, xname, trace=2)
}
}
}
errmsg.for.get.x.or.y <- function(field, trace, try.object.x.or.y,
argn, object.x, model.frame.x, call.x, argn.x)
{
printf("\nLooked unsuccessfully for the original %s in the following places:\n",
if(field == "x") "predictors" else "response")
ifield <- 1
if(try.object.x.or.y) {
printf("\n(%d) object$%s:%s\n",
ifield, field, format.err.field(object.x, field, trace))
ifield <- ifield + 1
}
printf("\n(%d) model.frame:%s\n",
ifield, format.err.field(model.frame.x, field, trace))
ifield <- ifield + 1
printf("\n(%d) getCall(object)$%s:%s\n",
ifield, field, format.err.field(call.x, field, trace))
ifield <- ifield + 1
if(argn)
printf("\n(%d) argument %d of the model call:%s\n",
ifield, argn+1, format.err.field(argn.x, field, trace))
printf("\n")
}
format.err.field <- function(x, xname, trace=0)
{
if(is.try.err(x)) {
errmsg <- sub(".* : *", "", x[1]) # strip prefix "Error in xxx : "
errmsg <- gsub("\n *\\^", "", errmsg) # strip " ^" in some err msgs
errmsg <- gsub("[\n\t ]+", " ", errmsg) # collapse newlines and multiple spaces
errmsg <- gsub("^ *| *$", "", errmsg) # delete remaining leading and trailing space
sprint(" %s", errmsg)
} else if(is.errmsg(x))
sprint(" %s", x)
else if(is.null(x))
sprint(" NULL")
else if(NROW(x) < 3)
sprint(" less than three rows")
else if(!is.null(dim(x))) {
print_summary(x, xname, trace=2)
sprint(" is not usable (see above)")
} else
sprint(" class \"%s\" with value %s",
class(x), try(paste.trunc(format(x))[1]))
}
# Get object$x or object$y from the model.
# Return x (or y) or NULL or an error message.
#
# The approach taken in all helper routines for get.x.or.y
# (such as get.object.x.or.y.field) is that we issue trace messages
# here in the helper routine, and the caller silently checks
# the returned value for good data.
#
# For a model with a formula, the standard path is to apply the
# naked formula to the data using model.frame().
# Example with argument field="x":
#
# formula(object) resp~num + sqrt(num) + bool + ord:num + fac
# naked formula resp~num + bool + ord + fac
# data colnames resp bool ord fac str num nx int date
# returned colnames num bool ord fac
get.object.x.or.y.field <- function( # get object$x or object$y
object, # the model
field, # "x" or "y"
trace,
try.object.x.or.y=TRUE, # FALSE if object[[field]] should be ignored
naked=TRUE) # TRUE for columns like "x3" not "ns(x3,4)"
{
trace2(trace, "\nget.object.%s:\n", field)
x <- NULL
xname <- sprint("object$%s", field) # for tracing
if(!try.object.x.or.y) # e.g. we must ignore object$x for mda::mars models
trace2(trace, "ignoring %s for this %s object\n", xname, class.as.char(object))
else {
# note we use object[["x"]] rather than object$x to prevent partial
# matching (but the error messages use object$x for readability)
x <- object[[field]]
if(is.good.data(x, xname, trace))
x <- cleanup.x.or.y(object, x, field, trace,
check.naked=naked && field != "y")
else if(!is.null(x) && !is.good.data(x, check.colnames=FALSE)) {
# Issue a warning because predict.lm will probably crash
# later when it internally accceses object$x.
# We call is.good.data(check.colnames=FALSE) above to check if the
# prior call to is.good.data() failed merely because of a colname
# issue (if it's just a colname issue then don't issue warning).
warnf("object$%s may be corrupt", field)
}
}
x # return x or NULL or an error message
}
# Get getCall(object)$x (or similar) from the model's call field.
# Return x (or similar) or NULL or an error message.
get.data.from.object.call.field <- function(object, field, trace,
check.is.good.data=TRUE)
{
trace2(trace, "\nget.data.from.object.call.field:\n")
x <- NULL
xname <- sprint("getCall(object)$%s", field)
call <- getCall(object)
if(is.null(call))
trace2(trace, "getCall(object) is NULL so cannot get %s\n", xname)
else if(!is.call(call))
trace2(trace, "getCall(object) is not actually a call so cannot get %s", xname)
else {
x <- try.eval(call[[field]], model.env(object), trace=trace, expr.name=xname)
if(is.errmsg(x))
trace2(trace, "%s\n", x)
else if(check.is.good.data) # invoke is.good.data purely for issuing trace messages
is.good.data(x, xname, trace)
}
x
}
# Get the n'th arg in the call to the model function.
#
# This is for those model functions whose second argument is the
# response (what we call "y"), although that argument's name is
# not "y". For example, argn=2 will select the "grouping" arg in
# qda(x=lcush[,2:3], grouping=lcush[,1]).
#
# Returns list(argn.x, argn)
# where argn.x is the evaluated n'th argument or NULL or an error message.
# and argn will be set 0 if routine processing says we should ignore argn.
get.argn.from.call <- function(argn, object, field, trace, nrows.argn)
{
x <- NULL
if(argn) {
temp <- get.argn.from.call.aux(argn, object, field, trace, nrows.argn)
x <- temp$x
argn <- temp$argn
if(is.errmsg(x))
trace2(trace, "%s\n", x)
else # invoke is.good.data purely for issuing trace messages
is.good.data(x, sprint("argument %d of the model call", argn), trace)
}
list(x=x, argn=argn)
}
# auxilary function for get.argn.from.call
get.argn.from.call.aux <- function(argn, object, field, trace, nrows.argn)
{
ret <- function(x, argn)
{
list(x=x, argn=argn)
}
#--- get.argn.from.call.x starts here
stopifnot(argn > 0)
call <- getCall(object)
if(is.null(call))
return(ret("getCall(object) is NULL so cannot use argn", argn))
if(!is.call(call))
return(ret("getCall(object) is not actually a call so cannot use argn", argn))
if(length(call) <= argn)
return(ret(sprint(
"cannot use argn %d because getCall(object) does not have %d arguments",
argn, argn), argn))
names.call <- names(call) # some names may be ""
trace2(trace, "names(call) is %s\n", quotify(names.call))
# If argn is field (i.e. "x" or "y"), don't process it here because
# we process call$x and call$y elsewhere (in get.data.from.object.call.field).
# This is a common case, so we clear argn for uncluttered message
# later in errmsg.for.get.x.or.y.
# If the arg name is "" in getCall(object) this won't work, not serious.
if(identical(names.call[argn+1], field))
return(ret(sprint(
"the name of argument %d is \"%s\" so we will not process it with argn",
argn, field),
argn=0))
# If an argument of the call is "formula" then return, because
# any arg named "x" or "y" is unlikely to be model data.
# This is a a common case, so clear argn.
if(pmatch("formula", names.call[2], 0))
return(ret(sprint(
"ignoring argn %g because there is a formula argument", argn),
argn=0))
x <- try.eval(call[[argn+1]], model.env(object), trace=trace,
sprint("argument %d of the model call", argn))
if(is.data.frame(x))
x <- x[[1]]
if(!(is.numeric(x[1]) || is.logical(x[1]) || is.factor(x[1])))
return(ret(sprint(
"cannot use argn %d because it is not numeric, logical, or a factor",
argn), argn))
if(is.null(nrows.argn)) # should never happen
stop0("cannot use argn because the expected number of rows is unspecified")
if(NROW(x) != nrows.argn)
return(ret(sprint(
"cannot use argn %g because it has %g rows but expected %g rows",
argn, NROW(x), nrows.argn), argn))
list(x=x, argn=argn)
}
# If object has a formula, use that formula to get x or y (field is "x" or "y").
# Returns list(x, do.subset, form.as.char, source) where x may be an err msg and source
# is a string describing where we got the data from (only used if no err msg).
get.x.or.y.from.model.frame <- function(object, field, trace, naked,
na.action="auto", newdata=NULL)
{
ret <- function(...) # ... is an err msg in printf form
{
errmsg <- sprint(...)
trace2(trace, "%s\n", errmsg)
list(x=errmsg, do.subset=FALSE, formula=NULL, source="model frame")
}
#--- get.x.or.y.from.model.frame starts here
stopifnot(field == "x" || field == "y")
trace2(trace, "\nget.%s.from.model.frame:\n", field)
mf <- get.model.frame(object, field, trace, naked, na.action, newdata)
if(!is.good.data(mf$x))
return(mf)
model.frame <- mf$x
if(field == "x") {
# Check if any vars have $ in their name, this confuses predict() later.
# They cause "Error in model.frame.default: variable lengths differ"
# or "newdata had 50 rows but variables found have 330 rows"
ibad <- grep("[._[:alnum:]]\\$", colnames(model.frame))
if(any(ibad)) {
warnf("%s: \"$\" in colnames(model.frame) is not supported by plotmo, %s",
colnames(model.frame)[ibad[1]],
"will try to get the data elsewhere")
return(ret("\"$\" in colnames(model.frame)"))
}
}
# got the model.frame, now get the column index(s) of the response in the model.frame
iresponse.col <- get.iresponse.col(object, model.frame, mf$isFormula,
trace=if(field=="y") trace else 0) # reduce number of msgs
if(field == "x") {
# drop the response column(s)
x <- model.frame[, -iresponse.col, drop=FALSE]
if(!is.good.data(x, sprint("x=model.frame[,-%s]", paste.c(iresponse.col)), trace))
return(ret("invalid model.frame[,-iresponse]"))
} else { # field == "y"
# select the response column(s)
# we don't use model.response() here because that drops the column name
x <- model.frame[, iresponse.col, drop=FALSE]
if(!is.good.data(x, sprint("y=model.frame[,%s]", paste.c(iresponse.col)), trace))
return(ret("invalid model.frame[,iresponse]"))
}
list(x=x, do.subset=mf$do.subset, formula=mf$formula, source=mf$source)
}
# The following is derived from stats::model.frame.default but tries to
# also handle models that didn't save the terms etc. in a standard way.
# It never uses parent.frame (as some model.frame methods do).
#
# We will use the given na.action. But if na.action="auto" then get
# na.action from the model itself, and do a little special handling.
#
# Returns list(x, do.subste, formula, source, isFormula)
# where x may be an err msg
# source s a string describing where we got the data from (only used if no err msg)
get.model.frame <- function(object, field, trace, naked,
na.action="auto", newdata=NULL)
{
ret <- function(x, do.subset=FALSE, formula=NULL,
source="model frame", isFormula=FALSE)
{
list(x=x, do.subset=do.subset, formula=formula,
source=source, isFormula=isFormula)
}
#--- get.model.frame starts here
# get.model.formula returns a Formula or formula with an environment, or an error string
modform <- get.model.formula(object, trace, naked)
formula <- modform$formula
if(is.errmsg(formula))
return(ret(formula)) # return errmsg
isFormula <- inherits(formula, "Formula") # Formula vs formula
trace2(trace, "formula is valid, now looking for data for the model.frame\n")
if(!is.null(newdata)) {
if(!is.good.data(newdata, "newdata", trace))
return(ret("bad newdata")) # return errmsg
data <- newdata
data.source <- "newdata"
} else {
# use object$model if possible (e.g. lm)
# TODO the following code really belongs in get.data.for.model.frame?
x <- object[["model"]]
if(is.good.data(x, "object$model", trace)) {
# Drop column named "(weights)" created by lm() if called with weights
# (must drop else x will be rejected because non-naked colname).
x <- x[, which(colnames(x) != "(weights)"), drop=FALSE]
if(trace >= 3)
print_summary(x, "model.frame", trace)
# Note that we call check.naked even when the naked=FALSE.
# Not essential, but gives more consistency so we select the same object$x,
# getCall(object), or etc. regardless of whether naked is set or clear.
if(is.null(check.naked(x, "object$model", trace))) # good object$model?
return(ret(x, FALSE, formula, "object$model", isFormula))
}
temp <- get.data.for.model.frame(object, trace)
data <- temp$data
data.source <- temp$source
if(!is.good.data(data)) {
# data is not usable (could be NULL)
# following is for when no data argument when model was built
data <- model.env(object)
data.source <- "model.env(object)"
}
}
if(is.character(na.action) && length(na.action) == 1 && na.action == "auto") {
na.action <- na.action(object)
class.na.action <- class(na.action)
# following is for rpart's and ctree's (special but useful) NA handling
if(is.null(na.action))
na.action <-
if(inherits(object, "rpart") || inherits(object, "party_plotmo"))
"na.pass"
else
"na.fail"
else if(length(class.na.action) == 2 && class.na.action[1] == "na.rpart")
na.action <- paste0("na.", class(na.action)[2])
else if(class.na.action[1] %in% c("exclude", "fail", "omit", "pass"))
na.action <- paste0("na.", class(na.action)[1])
trace2(trace, "na.action(object) is %s\n", as.char(na.action))
}
if(!is.function(na.action) && !is.character(na.action)) {
errmsg <- sprint("bad na.action: %s", as.char(na.action))
trace2(trace, "%s\n", errmsg)
return(ret(errmsg))
}
if(trace >= 3) {
printf("model.env is %s\n", environment.as.char(model.env(object)))
print_summary(data, "data", trace)
}
data.source <-
if(is.environment(data)) environment.as.char(data)
else if(is.null(data)) "NULL"
else data.source
mfcall.as.char <- sprint("model.frame(%s, data=%s, na.action=%s)",
paste.trunc(modform$form.as.char, maxlen=40),
data.source, trunc.deparse(na.action))
trace2(trace, "stats::%s\n", mfcall.as.char)
x <- try(do.call(stats::model.frame, # calls model.frame.default
args=list(formula=formula, data=data, na.action=na.action)),
silent=trace < 2)
if(trace >= 3)
print_summary(x, "model.frame returned", trace)
ret(x, if(is.null(newdata)) TRUE else FALSE, formula, mfcall.as.char, isFormula)
}
get.data.for.model.frame <- function(object, trace)
{
ret <- function(errmsg, data=NULL, source="model frame")
{
if(!is.null(errmsg))
trace2(trace, "%s\n", errmsg)
list(data=data, source=source)
}
# try object$data e.g. earth models with formula and keepxy=T
# the inherits check is becauses party objects for e.g. "medv ~ log(lstat) + rm^2"
# save "log(lstat)" not "lstat" in object data, that confuses model.frame.default
if(!inherits(object, "party_plotmo")) {
data <- object[["data"]]
if(is.good.data(data, "object$data", trace))
return(ret(NULL, data, "object$data"))
}
# look for the data in getCall(object)
call <- object[["call"]]
if(is.null(call))
return(ret("getCall(object) is NULL so cannot get the data from the call"))
if(!is.call(call))
return(ret("getCall(object) is not actually a call so cannot get the data from the call"))
data <- NULL
argname <- "NULL"
# try getCall(object)$data
idata <- match(c("data"), names(call), 0)[1]
if(idata > 0) {
trace2(trace, "argument %g of the call is 'data'\n", idata-1)
argname <- "call$data"
# Mar 2019: TODO this doesn't work (if model was built internally to another
# function?) because it tries to get data from .RGlobalEnv (which in that
# environment is a function "data"). Perhaps failure is because terms(mf) seems
# to generate a terms field ".GlobalEnv" regardless of where the mf was evaluated.
# Workaround for earth models: use keepxy=TRUE (to avoid this code)
data <- try(eval.trace(call[[idata]], model.env(object),
trace=trace, expr.name=argname),
silent=FALSE) # so user can see what went wrong
is.good.data(data, argname, trace) # purely for tracing
} else {
# no getCall(object)$data, search for an arg that looks like good data
trace2(trace,
"getCall(object) has no arg named 'data', will search for an arg that looks like data\n")
if(length(call) >= 3) { # start at 3 to ignore fname and first arg (the formula)
for(icall in 3:length(call)) {
arg <- call[[icall]]
if(class(arg)[1] == "name") { # paranoia, will always be true?
argname <- sprint("call$%s", quotify(as.character(arg)))
data <- eval.trace(arg, model.env(object), trace=trace, expr.name=argname)
if(is.good.data(data, argname, trace=trace)) {
trace2(trace, "%s appears to be the model data\n", argname)
idata <- icall
break
} else {
trace2(trace, "%s is not the model data\n", argname)
data <- NULL
}
}
}
}
}
if(is.good.data(data, argname)) {
# following needed for e.g. nnet(O3~., data=scale(ozone1), size=2)
# Else get Error in model.frame.default: 'data' must be a data.frame.
if(!is.data.frame(data)) {
data <- try(my.data.frame(data, trace))
# invoke is.good.data purely for issuing trace messages
is.good.data(data, sprint(
"%s converted from \"%s\" to \"data.frame\"",
argname, class(data)[1]), trace)
}
}
ret(NULL, data, argname)
}
# get the column index(s) of the response in the model.frame, return 1 if can't (best guess is 1)
get.iresponse.col <- function(object, model.frame, isFormula, trace)
{
assuming <- sprint("assuming \"%s\" in the model.frame is the response, because",
gen.colnames(model.frame, prefix="model.frame", trace=trace)[1])
iresponse.col <- 1
terms <- try(terms(object), silent=TRUE)
if(is.null(terms)) { # e.g. bagEarth.formula and nn
trace1(trace, "%s terms(object) is NULL\n", assuming)
return(1) # assume iresponse.col is 1
}
if(is.try.err(terms)) {
trace1(trace, "%s terms(object) did not return the terms\n", assuming)
return(1)
}
# object seems to have a valid terms field
iresponse.col <- attr(terms, "response")
if(is.null(iresponse.col) || !is.numeric(iresponse.col) || length(iresponse.col) != 1) {
trace1(trace, "%s attr(terms, \"response\") is invalid\n", assuming)
return(1)
}
if(iresponse.col != 0) {
if(isFormula) {
trace1(trace, "%s object used Formula (not formula) yet attr(terms, \"response\") is nonzero\n", assuming)
return(1)
}
iresponse.col <- try(check.index(iresponse.col,
"attr(terms, \"response\")", model.frame,
is.col.index=TRUE, allow.negatives=FALSE))
}
else { # iresponse.col == 0
if(!isFormula) {
trace1(trace, "%s attr(terms, \"response\") is 0\n", assuming)
return(1)
}
# isFormula
iresponse.col <- attr(terms, "Response")
if(is.null(iresponse.col)) {
# will happen for any model that uses Formula (not formula), except earth
trace1(trace, "%s the model was built with Formula (not formula)\n", assuming)
return(1)
}
if(is.null(iresponse.col) || !is.numeric(iresponse.col)) {
trace1(trace, "%s attr(terms, \"Response\") is invalid\n", assuming)
return(1)
}
iresponse.col <- try(check.index(iresponse.col,
"attr(terms, \"Response\")", model.frame,
is.col.index=TRUE, allow.negatives=FALSE))
}
if(is.try.err(iresponse.col)) {
trace1(trace, "%s calculated index was invalid\n", assuming)
iresponse.col <- 1
}
iresponse.col
}
isa.formula <- function(x)
{
(typeof(x) == "language" && as.list(x)[[1]] == "~") ||
(is.character(x) && length(x) == 1 && grepany("~", x))
}
get.index.of.formula.arg.in.call <- function(call, trace)
{
iform <- match(c("formula"), names(call), 0)
if(iform)
return(iform)
# no arg named "formula" in call, so look for a formula elsewhere in call
# TODO for which model was this code added? I think it's needed if formula arg is unnamed?
call <- as.list(call)
# start at 2 to skip call[1] which is the function name
for(iform in 2:length(call)) {
if(isa.formula(call[[iform]])) {
# warning0("the formula in the model call is not named 'formula'")
trace2(trace, "argument %d in getCall(object) is a formula\n", iform)
return(iform) # note return
}
}
0 # no formula
}
# return a Formula or formula with an environment, or an error string
get.model.formula <- function(object, trace, naked)
{
ret <- function(...) # ... is an err msg in printf form
{
errmsg <- sprint(...)
trace2(trace, "%s\n", errmsg)
list(formula=errmsg, form.as.char="formula")
}
#--- get.model.formula starts here
# try getting the formula from the terms field (object used formula)
terms <- try(terms(object), silent=TRUE)
if(is.null(terms))
trace2(trace, "terms(object) is NULL, will look for the formula elsewhere\n")
else if(is.try.err(terms))
trace2(trace, "terms(object) did not return the terms, will look for the formula elsewhere\n")
else { # object has a valid terms field
# TODO Sep 2020 ask Formula package people to extend
# (currently only earth supports attr(terms, "Formula") and "Response"
form <- attr(terms, "Formula")
isFormula <- !is.null(form) # "Formula" vs "formula"
if(isFormula) {
trace1(trace, "object created with Formula (not formula): using attr(terms, \"Formula\")\n")
form <- formula.as.char.with.check(form, "attr(terms, \"Formula\")", trace)
} else {
form <- try(formula(terms), silent=TRUE)
form <- formula.as.char.with.check(form, "formula(object)", trace)
}
if(!is.null(form$form.as.char))
return(process.formula(object, form$form.as.char, isFormula, trace, naked))
# if there was a $ in the form.as.char there is no point in looking at the call
# formula, so to avoid issuing the same warning twice, we return
# immediately here
if(grepl("\"$\"", form$errmsg, fixed=TRUE))
return(ret(form$errmsg))
}
# try getting the formula from getCall(object)
call <- object[["call"]]
if(is.null(call))
return(ret("getCall(object) is NULL so cannot get the formula from the call"))
if(!is.call(call))
return(ret("getCall(object) is not actually a call so cannot get the formula from the call"))
iform <- get.index.of.formula.arg.in.call(call, trace)
if(iform == 0) # no formula?
return(ret("no formula in getCall(object)"))
# nasty name change, else model.frame.default: invalid type (language)
# TODO clean this up, this won't work because it doesn't change the calling obj
# names.call <- names(call)
# names.call[iform] <- "formula"
# names(call) <- names.call # note <<- not <-
form.name <- sprint("model call argument %d", iform-1)
form <- eval(call[[iform]], model.env(object))
form <- formula.as.char.with.check(form, form.name, trace)
if(is.null(form$form.as.char))
return(ret(form$errmsg))
# TODO More classes could be added to the following assignment to isFormula
# (and remember we can only get here if object doesn't have a terms field,
# and I believe the objects below do in fact have a terms field)
isFormula <- inherits(object, c("pre"))
process.formula(object, form$form.as.char, isFormula=isFormula, trace, naked)
}
# convert the formula to character, and also check it
formula.as.char.with.check <- function(form, form.name, trace)
{
ret.null <- function(...) # ... is an err msg in printf form
{
errmsg <- sprint(...)
trace2(trace, "%s\n", errmsg)
list(form.as.char=NULL, errmsg=errmsg)
}
if(is.try.err(form))
return(ret.null("%s did not return a formula", form.name))
if(is.null(form))
return(ret.null("%s is NULL", form.name))
if(class(form)[1] != "formula" && !class(form)[1] == "Formula" &&
!(is.character(form) && length(form) == 1))
return(ret.null("%s is not a formula or Formula (its class is \"%s\")",
form.name, class(form)[1]))
form.as.char <- paste.collapse(format(form))
trace2(trace, "%s is %s\n", form.name, paste.trunc(form.as.char))
if(!grepl("[^ \t]+.*~", form.as.char))
return(ret.null("%s has no response", form.name))
# Check if any vars have $ in their name, this confuses predict() later.
# TODO Following comments are no longer accurate?
# We do this check in get.x.or.y.from.model.frame but pre-emptively also here
# (where we have the formula) for a slightly more informative error message.
# (The other message kicks in if we get the model.frame from object$model.)
rhs <- gsub(".*~ *", "", form.as.char)
if(grepany("[._[:alnum:]]\\$", rhs)) { # check for "ident$"
warnf("\"$\" in the formula is not supported by plotmo, %s\n formula: %s",
"will try to get the data elsewhere",
rhs)
return(ret.null("%s: \"$\" in formula is not allowed", form.name))
}
list(form.as.char=form.as.char, errmsg=NULL)
}
# Return a formula with an environment. Also process naked.
# TODO this includes Height in Volume~Girth-Height, it shouldn't
process.formula <- function(object, form.as.char, isFormula, trace, naked)
{
stopifnot(is.character(form.as.char))
stopifnot(length(form.as.char) == 1)
if(naked)
form.as.char <- naken.formula.string(form.as.char, trace)
form <- try(formula(form.as.char, env=model.env(object)), silent=TRUE)
if(isFormula && !is.try.err(form))
form <- try(Formula::Formula(form))
if(is.try.err(form)) {
# prepend "formula(%s) failed" for a clearer msg in format.err.field later
form <- sprint("%s(%s) failed%s",
if(isFormula) "Formula" else "formula",
quotify(form.as.char),
# only append err msg if tracing because err msgs can be obscure
if(trace >= 1) sprint("(%s)", cleantry(form)) else "")
trace2(trace, "%s\n", form)
form <- sprint("Error : %s", form)
}
list(formula=form, form.as.char=form.as.char)
}
# Given a formula (as string), return a string with the "naked" predictors.
# This is used for getting the data to pass to predict.
#
# Example: log(y) ~ x9+ns(x2,4) + s(x3,x4,df=4) + x5:sqrt(x6)
# becomes: log(y) ~ x9 + x2 + x3 + x4 + x5 + x6
# which will later result in a model.matrix with columns x9 x2 x3 x4 x5 x6.
#
# Note that we don't naken the response (so for
# example in the above log(y) remains unchanged).
#
# This routine is not infallible but works for the commonly used formulas.
# It's a hack that relies on regular expressions.
naken.formula.string <- function(form.as.char, trace)
{
stopifnot(is.character(form.as.char))
form.as.char <- paste.collapse(form.as.char)
old.form.as.char <- form.as.char
naked <- gsub(".*~", "", form.as.char) # extract everything after ~
naked <- naken.collapse(naked, warn.if.minus=TRUE)
if(grepl("~", form.as.char)) {
response <- gsub("~.*", "", form.as.char) # extract up to the ~
response <- gsub("^ +| +$", "", response) # trim leading and trailing spaces
if(nchar(response))
response <- paste0(response, " ~")
naked <- paste.collapse(response, naked)
}
trace2(trace,
if(strip.space(naked) == strip.space(old.form.as.char))
"naked formula is the same%.0s\n" # e.g. O3~vh+wind
else
"naked formula is %s\n", naked)
naked
}
is.naked <- function(colnames) # returns a logical vector
{
naked <- logical(length(colnames))
for(i in seq_len(length(colnames))) {
colname <- strip.space(colnames[i])
naked[i] <- colname == naken.collapse(colname)
}
naked
}
# Return an err msg if colnames(x) is not "naked".
# Return NULL if everything is ok.
#
# Example: in lm(Volume~poly(Height, degree=3), data=trees, x=T),
# object$x, object$data, and object$model have
# colnames like "poly(Height, degree = 3)1"
# where plotmo (actually model.frame.default) gives "Error: object 'x1' not found"
# unless we preempt that obscure error message here.
check.naked <- function(x, xname, trace)
{
errmsg <- NULL
colnames <- colnames(x)
# column name "(Intercept)" must be considered naked
colnames <- sub("(Intercept)", "Intercept", colnames, fixed=TRUE)
is.naked <- is.naked(colnames)
if(any(!is.naked)) {
# e.g. lm(formula=log(doy)~vh, ...)
errmsg <- sprint(
"%s cannot be used because it has%s non-naked column name%s %s",
xname,
if(sum(!is.naked) > 1) "" else " a",
if(sum(!is.naked) > 1) "s" else "",
quotify.trunc(colnames[!is.naked]))
trace2(trace, "%s\n", errmsg)
}
errmsg
}
# Returns x or an error message (currrently an error message
# is returned only if naked=TRUE but colnames are not naked).
cleanup.x.or.y <- function(object, x, field, trace, check.naked)
{
x <- handle.nonvector.vars(object, x, field, trace)
# remove column "(Intercept)" e.g. object$x for lm(y~x1+x2, x=TRUE)
if(!is.na(i <- match("(Intercept)", colnames(x)))) {
trace2(trace, "dropped \"(Intercept)\" column from %s\n", field)
x <- x[,-i, drop=FALSE]
}
if(check.naked) {
errmsg <- check.naked(x, field, trace)
if(!is.null(errmsg))
return(errmsg)
}
x
}
# This tries to clean up columns of x that are themselves matrices or data.frames.
#
# Example (where the actual values in the x and y are not important):
# x <- matrix(c(1,3,2,4,5,6,7,8,9,10,
# 2,3,4,5,6,7,8,9,8,9), ncol=2)
# colnames(x) <- c("c1", "c2")
# y <- 3:12
# a <- lm(y~x) # seems natural, but lm doesn't handle it as we might expect
# Cannot get predict to work with newdata on above lm model
# Causes for example 'newdata' had 8 rows but variables found have 10 rows
#
# Another example:
# library(ElemStatLearn); x <- mixture.example$x;
# g <- mixture.example$y; a <- lm(g ~ x)
#
# This routine also prevents a misleading error msg later in plot.degree1
# (illegal index, missing column in x) caused by the following code:
# data(gasoline, package='pls')
# plotmo(earth(octane ~ NIR, data=gasoline))
# where NIR has class "AsIs" and is a matrix.
# There appears to be no easy fix for this (July 2011).
handle.nonvector.vars <- function(object, x, field, trace)
{
if(!is.data.frame(x))
return(x)
ndims.of.each.var <- sapply(x, function(x) NCOL(x))
if(all(ndims.of.each.var == 1)) {
# we are ok: NCOL is 1 for all variables (even though some
# may not be vectors i.e. they could be single column mats)
return(x)
}
format <- paste0("%s variable on the %s side of the formula is a matrix or data.frame\n",
" plotmo often cannot process such variables")
msg <- sprint(format,
if(ncol(x) == 1) "the" else "a",
if(field == "x") "right" else "left")
if(field == "x") {
# We issue the warning only if this is the rhs, because we seem to be able
# to recover when the lhs is a non vector. Thus we correctly don't issue
# warnings for valid models like earth(cbind(O3,doy)~., data=ozone1) and
# glm(cbind(damage, 6-damage)~temp, family=binomial, data=orings).
warning0(msg)
} else if(trace >= 2) {
printf("%s\n", msg)
printf("the number of dimensions of each variable in %s is %s and ",
field, paste.trunc(ndims.of.each.var))
# details is 1 not 2 below else huge output
print_summary(x, sprint("%s is ", field), trace, details=-1)
}
# Attempt to fix the problem by replacing x with x[[1]]. However
# for the rhs this only sometimes works --- there may be downstream
# problems, typically in predict (because the column names are wrong?).
if(ndims.of.each.var[1] > 1) { # first variable is not a vector
trace2(trace, "replacing %s with %s[[1]]%s\n", field, field,
if(length(ndims.of.each.var) == 1) ""
else ", ignoring remaining columns")
org.colnames <- colnames(x)
x <- x[[1]]
# add column names (helps keep track later)
if(is.null(colnames(x))) {
safe.org.colnames <-
if(is.null(org.colnames)) # can never happen, but best to be sure
field
else
org.colnames
if(NCOL(x) > 1)
colnames(x) <- paste0(safe.org.colnames[1], "[,", 1:NCOL(x), "]")
else # e.g. glm(formula=response~temp, family="binomial", data=...)
colnames(x) <- safe.org.colnames[1]
trace2(trace, "%s colnames were %s and now %s\n",
field,
if(is.null(org.colnames)) "NULL"
else quotify.trunc(org.colnames),
quotify.trunc(colnames(x)))
}
}
x
}
# Detect if the model is a glm model, and if so possibly convert the
# response. We do this in the same way as glm() does internally:
#
# o A factor response get converted to indicator column of
# ones and zeros (first level vs the rest).
#
# o Two column binomial responses get converted to a single
# column of fractions.
#
# Note that responses for earth models are handled independently
# in plotmo.y.earth (two level factor to single numeric column,
# three of more level factors to three or more indicator columns).
convert.glm.response <- function(object, y, trace)
{
# check if y is is factor, or first column of y is a factor
is.factor <- is.factor(y) ||
(length(dim(y) == 2) && ncol(y) == 1 && is.factor(y[,1]))
if(is.factor)
y <- convert.glm.response.factor(object, y, trace)
else if(NCOL(y) == 2) # possibly a two column binomial model
y <- possibly.convert.glm.two.column.response(object, y, trace)
y
}
is.nomial <- function(object)
{
is.nomial.string <- function(family) {
family[1] == "binomial" ||
family[1] == "quasibinomial" ||
family[1] == "multinomial"
}
if(!is.list(object))
return(FALSE)
family <- object$family
if(is.character(family)) # glmnet models
return(is.nomial.string(family))
fam <- try(family(object), silent=TRUE)
if(inherits(fam, "family")) { # lm, glm, etc models
family <- fam$family
if(is.character(family))
return(is.nomial.string(family))
}
FALSE
}
convert.glm.response.factor <- function(object, y, trace)
{
if(!is.nomial(object)) {
# e.g. rpart(formula=Kyphosis~., data=kyphosis)
trace2(trace,
"the response is a factor but could not get the family of the %s model\n",
class.as.char(object))
} else {
# e.g. glm(formula=sex~., family=binomial, data=etitanic)
if(!is.null(dim(y))) { # data.frame or matrix
levels <- levels(y[,1])
y[,1] <- y[,1] != levels[1]
} else { # vector
levels <- levels(y)
y <- y != levels[1]
y <- data.frame(y)
}
# column naming helps us keep track that we did this manipulation of x
colnames(y) <- if(length(levels) > 1) paste0("is", levels[2])
else paste0("not", levels[1])
trace2(trace, "generated indicator column \"%s\" from levels %s\n",
colnames(y)[1], paste.trunc(levels))
}
y
}
possibly.convert.glm.two.column.response <- function(object, y, trace)
{
if(is.nomial(object)) {
# following are sanity checks
# note also that here we treat a two column multinom model as a binom model
stopifnot(NCOL(y) == 2)
if(!is.numeric(y[,1]) || !is.numeric(y[,2]))
warning0("non-numeric two column response for a binomial model")
else if(any(y[,1] < 0) || any(y[,2] < 0))
warning0("negative values in the two column response ",
"for a binomial model")
# example 1 glm(formula=response~temp, family="binomial", data=orings)
# example 2 glm(formula=cbind(damage,6-damage)~temp, family="bi...)
org.colnames <- colnames(y)
y <- bpairs.yfrac(y[,1:2], trace=(trace!=0))
y <- data.frame(y)
# column naming helps us keep track that we did this manipulation of x
if(!is.null(org.colnames)) {
colnames(y) <- # gsub deletes things like "[,2]"
paste0(gsub("\\[.*\\]", "", org.colnames[1]), ".yfrac")
trace2(trace,
"created column \"%s\" from two column binomial response\n",
colnames(y))
}
}
y
}
get.and.check.subset <- function(x, object, trace)
{
is.valid <- function(subset)
{
!is.null(subset) && (is.numeric(subset) || is.logical(subset))
}
#--- get.and.check.subset starts here
subset <- object$subset
if(is.valid(subset))
msg <- "object$subset"
else {
subset <- try(eval(getCall(object)$subset, model.env(object)), silent=TRUE)
if(is.try.err(subset))
subset <- NULL
else
msg <- "getCall(object)$subset"
}
if(!is.valid(subset))
subset <- NULL
else {
# duplicates are allowed in subsets so user can specify a bootstrap sample
check.index(subset, "subset", x, allow.dups=TRUE, allow.zeros=TRUE)
if(trace >= 2) {
cat0("got subset from ", msg, " length " , length(subset))
print_first_few_elements_of_vector(subset, trace)
}
}
subset
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.