Nothing
## regr.R Functions that are useful for regression, W. Stahel,
## ==========================================================================
regr <- function(formula, data=NULL, family=NULL,
robust = FALSE, method=NULL,
nonlinear = FALSE, start=NULL,
subset=NULL, weights=NULL, offset=NULL, ...)
{
## !!! dispersion: allow to be set.
## Purpose: fit all kinds of regression models
## -------------------------------------------------------------------------
## Arguments:
## formula, data, ... as with lm
## tit title (becomes tit attribute of result)
## calcdisp should dispersion be calculated for
## family=binomial and family=poisson
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: Jan 02
## -------------------------------------------------------------------------
## --- get arguments from regr.control
lcall <- match.call()
lac <- as.list(lcall)[-1]
laRegr <- c("formula", "data", "family", "robust", "method",
"nonlinear", "start", "subset", "weights", "offset")
laControl <- c("contrasts","factorNA", "na.action", "calcdisp", "suffmean",
"model", "x", "termtable", "vif", "testlevel", "leveragelim",
"dist", "control")
latransfer <- c("na.action", "calcdisp", "suffmean",
"model", "x", "termtable", "vif")
lextra <- setdiff(names(lac), c(laRegr, laControl))
if (length(lextra))
warning(":regr: argument(s) ",paste(lextra, collapse=", "),
" not used")
lac <- lac[names(lac)%nin%lextra]
lcl <- c(list(quote(regr.control)),
lac[match(laControl,names(lac), nomatch=0)])
mode(lcl) <- "call"
largs <-eval(lcl)
## ------------------------------------------------------------------
## b. === preparation:
## convert character formula to formula
lform <- lformula <- as.formula(formula)
lcall$formula <- lformula
## nonlinear: drop constants
nonlinear <- i.def(nonlinear, FALSE, TRUE, FALSE)
if (as.character(nonlinear)!="FALSE")
lform <- setdiff(all.vars(lform), names(start))
## nonlinear <- i.nonlincheck(nonlinear, lformula, ldata)
## d. === data
lextrav <- as.list(lcall)[names(lcall)%in%c("weights", "offset", "subset")]
lcgetv <- c(list(quote(getvars)), list(formula = lform, data = data),
lextrav)
mode(lcgetv) <- "call"
lallvars <- eval(lcgetv)
lextras <-
c(weights=as.name("(weights)"),offset=quote((offset)),
subset=quote((subset)))[names(lextrav)]
## !!! check errors
## lallvars <- as.data.frame(unclass(lallvars),row.names=row.names(lallvars))
## in rare cases, lallvars will store a wrong dimension
## repair names of lallvars
##- if (anyNA(names(lallvars))) {
##- lnm <- all.vars(lform)
##- if (ldot <- match(".",lnm, nomatch=0))
##- lnm <- unique(c(lnm[-ldot], names(ldata)))
##- if (ncol(lallvars)==length(lnm)) names(lallvars) <- lnm else
##- stop("!regr! bug: variables not correctly identified")
##- }
## --------------------------------------
## !!! extras: check, replace names
## f. === compose call of fitting function
lcl <- lcall
lcl[names(lextras)] <- lextras
## missing response
if (length(lform)>2) {
lvy <- all.vars(lform[1:2])
ly0 <- lallvars[,lvy]
linna <- apply(cbind(ly0), 1, is.finite)
} else linna <- TRUE
if (length(linna)>1 && sum(linna)<2)
stop("!regr! Less than 2 non-missing response values")
## --- convert character to factor, drop unused levels, generate .NA. level
lfacna <- i.def(largs$factorNA, TRUE)
lfnalabel <- if(is.character(lfacna)) lfacna else ".NA."
for (lvn in 1:ncol(lallvars)) {
lv <- lallvars[[lvn]]
if (is.logical(lv))
lallvars[[lvn]] <- as.numeric(lv) ## logical -> numeric!!!
if (is.character(lv)|is.factor(lv)) {
if (lfacna) lv <- factorNA(lv, lfnalabel)[[1]]
lallvars[[lvn]] <- factor(lv, levels=levels(factor(lv[linna])))
}
}
## g. --- check for variables with a single value
##- lv1 <- which( apply(lmodelframe, 2, function(x) all(x==x[1]) ) )
##- if (length(lv1)) { ## adjust formula
##- lfac <- attr(terms(lmodelframe),"fac")
##- lt1 <- names(which(apply(lfac[lv1,,drop=F],2,any)))
##- warning("!regr! formula contains single valued variables: ",
##- paste(row.names(lfac)[lv1], collapse=". "),
##- "\n I drop the following terms from the formula:\n ",
##- paste(lt1, collapse=", "))
##- lfupd <- as.formula( paste( ".~ .- ",paste(lt1, collapse=" - ") ) )
##- lcl$formula <- update(lcl$formula, lfupd)
##- }
## -------------------------------------------
## h. === response type
if (length(lformula)==2) { # nonlinear called with formula of type ~...
ly <- rep(0,NROW(lallvars))
lytype <- "numeric"
} else {
##- attr(formula, "response")
lyf <- model.frame(lformula[1:2], lallvars, na.action=na.pass)
## I tried to generate model.frame for x and y together. This failed
## because model.frame needs adequate method (when y is matrix)
ltrm <- attr(lyf, "terms")
lytype <- substring(attr(ltrm, "dataClasses"),1,5)
## lysimple <- lytype!="nmatr" ## not a matrix
lyy <- lyf[[1]]
lysimple <- length(dim(lyy))==0
## ly <- na.omit(lyy)
if (lysimple&&length(unique(notna(lyy)))==2 &&
all(as.numeric(lyy)%in%0:1)) ## FALSE for numeric !={0,1}
lytype <- "binary"
if (inherits(lyy,"Surv")) {
lytype <- "survival"
}
## strange variables
##- l1v <- sapply(ldta, function(x) all(x==c(x[!is.na(x)],0)[1],na.rm=TRUE) )
##- ## covers case of several or all NAs
##- if (any(l1v)) {
##- warning(paste(":regr: variable(s)", paste(lvars[l1v],collapse=", "),
##- "has (have) no distinct values")) # -> dropped.
##- }
}
## ----------------------------------------------
## k. === family and fitting function
lfam <- if (u.nuna(family)) NULL else as.character(substitute(family))[1]
if (u.nuna(lfam)) lfam <- largs$dist
lcl$dist <- NULL
if (lytype=="survival")
lfam <- c( lfam, attr(lyy,"distribution"))[1]
if (u.nuna(lfam))
lfam <- switch(substring(lytype,1,5),
numer="normal", nmatr="normal", binar="binomial",
binco="binomial", order="cumlogit",
facto="multinomial", survi="ph", "unknown")
if (substring(lfam,1,7)=="multinom") lfam <- "multinomial"
if (lfam=="multinom") lfam <- "binomial"
##
lfitfun <-
switch( lfam,
gaussian="lm", normal="lm", binomial="glm", poisson="glm",
Gamma="glm",
cumlogit="polr", multinomial="multinomial",
weibull="survreg", lognormal="survreg", loggaussian="survreg",
loglogis="survreg", loglogistic="survreg", extreme="survreg",
ph="survreg", prop.hazard="survreg",
"unknown")
if (lfitfun=="unknown") stop("!regr! Fitting function not identified")
## additional checks
if (lytype=="survival") {
if (!inherits(lyy,"Surv"))
stop("!regr! bug: convert response to Surv object")
## !!! hier machen! lallv[,1] ersetzen durch Surv davon
lfitfun <- "survreg"
}
else if (lfitfun=="glm")
lcl$control <- list(calcdisp=largs$calcdisp, suffmean=largs$suffmean,
lcl$control)
##
lfitname <- paste("i",lfitfun,sep=".")
if (!exists(lfitname)||!is.function(get(lfitname)))
stop (paste("!regr! Fitting function",lfitname, "not found"))
## -----------------------------------------------------
## m. === prepare call
lcl$fname <- lfam
## lcl$na.action <- substitute(largs$na.action)
## lcl <- c(list(quote(regr)), as.list(lcl[-1]), largs[latransfer])
lcl[latransfer] <- largs[latransfer]
lcl[[1]] <- ## hack --> eval(.) works also when call is source()d ...
switch(lfitname,
"i.lm" = quote(regr0::i.lm),
"i.glm" = quote(regr0::i.glm),
"i.multinomial" = quote(regr0::i.multinomial),
"i.polr" = quote(regr0::i.polr),
## "i.smooth" = quote(regr0::i.smooth), ## ??
"i.survreg" = quote(regr0::i.survreg),
## default:
as.name(lfitname))
if (lfitname=="i.glm") lcl$family <- lfam
## lcl[[1]] <- as.name(lfitname) ## sonst geht das debuggen nicht.
if (lfitname=="i.survreg") {
lcl$yy <- lyy
lcl$model <- TRUE ## model needed, see below
}
## --- contrasts
lcontr <- largs$contrasts
if(is.atomic(lcontr)&&length(lcontr)) {
if(!is.character(lcontr))
warning("!regr! invalid contrasts argument")
else {
loldopt <- options(contrasts=c(lcontr,getOption("contrasts")[2])[1:2])
on.exit(options(loldopt))
lcl$contrasts <- NULL
}
lcw <- lcontr==c("contr.wsum","contr.wpoly")
if (ncol(lallvars)>1) {
if (any(lcw))
lyna <- if (length(dim(lyy)))
c(0,NA)[1+apply(is.na(as.matrix(lyy)),1,any)] else lyy
for (lj in 2:ncol(lallvars)) { ## no contrasts for y {
if(lcw[1]&&class(lallvars[,lj])[1]=="factor")
attr(lallvars[,lj],"contrasts") <-
contr.wsum(lallvars[,lj], y=lyna)
if(lcw[2]&&class(lallvars[,lj])[1]=="ordered")
attr(lallvars[,lj],"contrasts") <-
contr.wpoly(lallvars[,lj], scores=NULL, y=lyna)
}
}
## if (lcontr[1]=="contr.wsum") lallvars <- contr.wsum(lallvars, y=lyy)
}
lcl$data <- lallvars ## must be evaluated!
lcall$na.action <- lcl$na.action <- largs$na.action
## ldata <- lallvars
## environment(lcl$formula) <- environment() ## !!!
## lcl$call <- as.list(lcl[-1])
mode(lcl) <- "call"
## === --------------------------------------------
##- lreg <- eval(lcl, envir=environment(formula))
lreg <- eval(lcl)
## === --------------------------------------------
if (is.null(lreg$distrname)) lreg$distrname <- lfam
if (length(lreg$AIC)==0) {
laic <- try(extractAIC(lreg), silent=TRUE)
if (class(laic)!="try-error") lreg$AIC <- laic
}
lreg$response <- lyy
lreg$allvars <- lallvars ## needed more than $model
## since $model contains transformed variables
## recover some arguments to effective function call
lfc <- lreg$call
la <- intersect(c("data", "weights", "offset", "subset","na.action"), names(lfc))
## these arguments should be restored because otherwise,
## add1 does not work if they have changed.
lfc[la] <- lcall[la]
lreg$funcall <- lfc
lcall$formula <- formula(lreg) # hope this never damages anything
lreg$call <- lcall
tit(lreg) <- if (length(largs$tit)==0) attr(data,"tit") else largs$tit
doc(lreg) <- attr(data,"doc")
if (largs$model&&length(lreg$model)==0) {
if (nonlinear) warning(":regr: no $model available for nonlinear regr.")
else lreg$model <- lm(lformula, data, method="model.frame")
}
lterms <- if (nonlinear) NULL else terms(lreg)
if ((!nonlinear) && is.null(attr(lterms, "predvars"))) ## needed for survreg
attr(lreg$terms,"predvars") <- attr(attr(lreg$model,"terms"),"predvars")
## -----------------------------------------------------------------
## r. === leverages, standardized res
## get residuals if missing (as for polr objects
if (!inherits(lreg, "multinom")) {
if ("residuals"%nin%names(lreg)) {
lres <- residuals(lreg)
if (length(lnaaction <- lreg$na.action) && class(lnaaction)=="exclude")
lres <- if (is.matrix(lres)) lres[-lnaaction,] else lres[-lnaaction]
lreg$residuals <- lres
}
lsigma <- c(lreg$sigma, lreg$scale)[1]
if (length(lsigma)==0) lsigma <- sqrt(c(lreg$dispersion,1)[1])
if (!inherits(lreg, "nls")) {
lstr <- i.stres(lreg, sigma=lsigma, leveragelim = largs$leveragelim)
lreg[names(lstr)] <- lstr ## c(..,..) destroys attributes
}
}
## --- misc
if (nonlinear) lreg$r.squared <- 1-lreg$sigma^2/var(lyy,na.rm=TRUE)
if (class(lreg)[1]=="survreg")
lreg$n.obs <- length(lreg$linear.predictor)
if (!largs$x) lreg$x <- NULL
class(lreg) <- if (class(lreg)[1]=="orig") ## nls shall not be regr
class(lreg)[-1] else c("regr",class(lreg))
## ------------------------------------------------------------------
## result of regr
lreg
}
## -----------------------------------------------------------------------
regr.control <-
function(contrasts=getUserOption("regr.contrasts"), factorNA = TRUE,
na.action=as.name("nainf.exclude"), calcdisp=NULL, suffmean=3,
dist=NULL,
model = FALSE, x = TRUE, termtable=TRUE, vif=TRUE,
testlevel = 0.05, leveragelim=c(0.99,0.5), tit=NULL,
control = NULL
)
{
list(contrasts=contrasts, factorNA=factorNA,
na.action=as.name("nainf.exclude"), calcdisp=calcdisp,
suffmean=suffmean,
dist=dist, model=TRUE, x=x, termtable=termtable, vif=vif,
testlevel=testlevel, leveragelim=leveragelim, tit=tit,
control=control
)
## flicken !!! model=T needed in i.lm_ for getting ly
}
## ===================================================================
getvars <-
function (formula, x = NULL, data = NULL, rawvars = TRUE,
jitterbinary = TRUE, ...)
{
## copy of get_all_vars , different error handling; generate is.fac
if (missing(formula)) {
if (!missing(data) && inherits(data, "data.frame") &&
length(attr(data, "terms")))
return(data)
formula <- as.formula(data)
}
else if (missing(data) && inherits(formula, "data.frame")) {
if (length(attr(formula, "terms")))
return(formula)
data <- formula
formula <- data
}
if (missing(data))
data <- environment(formula)
else if (!is.data.frame(data) && !is.environment(data) &&
!is.null(attr(data, "class")))
data <- as.data.frame(data)
else if (is.array(data))
stop("!getAllVars! 'data' must be a data.frame, not a matrix or an array")
##
if (is.character(formula)) formula <- paste("~",paste(formula,collapse="+"))
formula <- as.formula(formula)
if (!inherits(formula, "terms"))
formula <- terms(formula, data = data)
env <- parent.frame() # environment(formula)
rownames <- .row_names_info(data, 0L)
if (rawvars) {
varnames <- all.vars(formula)
xvars <- all.vars(formula(formula)[-2])
} else {
varnames <- rownames(attr(formula, "factors")) ## !!! allow for simple terms
xvars <- varnames[-1]
}
inp <- parse(text = paste("list(", paste(varnames, collapse = ","),
")"), keep.source = FALSE)
variables <- eval(inp, data, env)
names(variables) <- varnames
lvmode <- sapply(variables, mode)
if (any(li <- lvmode%nin%c("numeric","character","logical")))
stop("!getvars! variable(s) ",paste(varnames[li],collapse=", "),
" have wrong mode") ## !!! convert into 'delayed error' as below
## rownames
if (is.null(rownames) && (resp <- attr(formula, "response")) > 0) {
lhs <- variables[[resp]]
rownames <- if (is.matrix(lhs)) rownames(lhs) else names(lhs)
}
## factors, character, binary
is.fac <- NULL
if (length(variables)) {
## turn character into factor
if (any(lic <- sapply(variables, is.character)))
variables[lic] <- lapply(variables[lic], factor)
is.fac <- sapply(variables, is.factor)
## variables with only 2 values
lnv <- sapply(variables, function(x) length(unique(x)) )
if (jitterbinary) is.fac[lnv<=2] <- 2
## factors created in the formula argument of the call to regr
ltv <- attr(terms(formula),"variables")
ltfc <- ltv[grep("factor\\(",as.character(ltv))]
if (!is.null(ltfc)) {
lfc <- all.vars(formula(paste("~",paste(ltfc,collapse="+"))))
is.fac[match(lfc,names(variables),nomatch=0)] <- 2
}
} # if (length(variables))
## ---
extras <- substitute(list(...))
extranames <- names(extras[-1L])
extras <- eval(extras, data, env)
names(extras) <-
if (length(extranames)) paste("(",extranames,")",sep="") else NULL
rr <- c(variables,extras)
len <- sapply(rr,length)
rr <- rr[len>0]
messg <- NULL
len <- len[len>0]
nobs <- if(is.data.frame(data)) nrow(data) else length(rr[[1]])
if (any(li <- len%nin%c(1,nobs))) {
messg <- paste(ifelse(sum(li)==1, "Variable ","Variables "),
paste(c(varnames, extranames)[li], collapse=", "),
ifelse(sum(li)==1, " has inadequate length",
" have inadequate lengths"),sep="")
fatal <- any(li[1:length(variables)])
if (!fatal) {
messg <- paste(messg,"\n and will be ignored")
rr <- rr[!li]
rr <- setNames(as.data.frame(rr), names(rr))
if (!is.null(rownames))
attr(rr, "row.names") <- rownames
} else {
class(rr) <- "regr-error"
attr(rr,"message") <- messg
return(rr)
}
}
rr <- setNames(as.data.frame(rr), names(rr)) ## avoid modifying names
if (!is.null(rownames)) attr(rr, "row.names") <- rownames
attr(rr, "is.fac") <- is.fac
attr(rr, "xvars") <- xvars
if (!is.null(messg)) { ## warning
attr(rr, "message") <- messg
class(rr) <- c("regr-warning", class(rr))
}
rr
}
## =========================================================================
i.stres <- function(x, sigma=1, weights=x$weights, leveragelim = c(0.99, 0.5))
{
## Purpose: calculate hat and standardized residuals
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 1 Mar 2018, 15:45
if (is.null(sigma)||(!is.finite(sigma))||sigma<=0) {
warning(":i.stres: sigma is missing or <=0. I use sigma=1")
sigma <- 1
}
if (inherits(x, "nls")) {
warning(":i.stres: no leverage and standardized residuals",
" are available for a nonlinear model")
return(list(leverage = NULL, stresiduals = NULL, strratio = NULL))
}
llev <- x$leverage
if (length(llev)==0) {
lmm <- x[["x"]]
if (length(lmm)==0) lmm <- model.matrix(terms(x),x$model)
llev <- hat(lmm)
}
lres <- x$residuals
if (length(lres)==0) {
warning(":regr/i.stres: no residuals -> no standardized res.")
return( list(leverage=llev) )
}
if (length(llev)!=NROW(lres)) {
warning(":regr: no leverages available, I set them 0")
llev <- rep(0,NROW(lres))
}
names(llev) <- rownames(as.matrix(lres))
##
lstrratio <- 1/(sigma*sqrt(1-pmin(leveragelim[1],llev)))
lwgt <- weights
if (lIwgt <- length(lwgt)==NROW(lres)) lstrratio <- lstrratio * sqrt(lwgt)
if (length(llev)!=NROW(lres))
stop("!i.stres! BUG: incompatible lengths.",
" Try and eliminate NA rows and re-estimate")
lstres <-
structure(
if (inherits(lres, "condquant"))
cbind(lres[,1:4]*lstrratio, lres[,-(1:4)]) else lres*lstrratio,
weighted = lIwgt)
list(leverage = llev, stresiduals = lstres, strratio = lstrratio)
}
## ==========================================================================
i.lm <- function(formula, data, family, fname="gaussian", nonlinear=FALSE,
robust=FALSE, method=NULL, control=NULL,
vif=TRUE, termtable=TRUE, testlevel=0.05, call = NULL, ...)
{
## Purpose: internal: fit lm
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 4 Aug 2004, 11:18
## b. --- method
## NEW version feb 20
lcall <- match.call()
lmeth <- c(lcall$method,"")[1]
lfn <- if (nonlinear) {
lcall$contrasts <- NULL ## lcall[-match("contrasts",names(lcall),nomatch=0)]
"nls" } else {
if (lmeth=="lmrob") robust <- TRUE
if (robust) {
if (is.null(method)) method <- lcall$method
if (is.null(method)) method <- c("lmrob","KS")
method[1]
} else "lm"}
if (robust) {
if (lfn=="lmrob") {
## require(robustbase) ## !?!
if (length(method)>1) {
if (substr(method[2],1,2)=="KS")
method <- NULL
lcall$setting <- "KS2014"
}
lcall$x <- TRUE
}
if (lfn=="rlm") {
## require(MASS) ## !?!
lcall$method <- c(method,"MM")
lcall$x.ret <- TRUE
}
} else lcall$x <- TRUE
if (lmeth=="rq"|lmeth=="quantreg") { # quantile regression
## require(quantreg) ## !?!
lfn <- "rq"
lcall$method <- if(length(lcall$method)>1) lcall$method[-1] else NULL
lcall$x <- NULL
}
## d. --- call
lcall$method <- if (length(method)>1) method[-1] else NULL
## method[-1] produces character(0) which is not NULL!
mkFn <- function(fn) { ## hack --> eval(.) works also when call is source()d ...## ??? wieso function?
switch(fn,
lmrob = quote(robustbase::lmrob),
rlm = quote(MASS::rlm),
rq = quote(quantreg::rq),
lm = quote(stats::lm),
## default:
as.name(fn))
}
## lcall[[1]] <- mkFn(lfn)
if(lfn!="lmrob") lcall$control <- NULL
lcall <- lcall[setdiff(names(lcall),
c("fname","family","vif","nonlinear","robust",
"calcdisp","suffmean","termtable"))] #,"control"
lcl <- c(list(mkFn(lfn)),as.list(lcall)[-1])
mode(lcl) <- "call"
## --------------------------
lreg <- eval(lcl, envir=environment())
## --------------------------
## f. --- collect results
lreg$call$formula <- formula
lreg$fitfun <- lfn
lreg$distrname <- "gaussian"
lttype <- switch(lfn,
rq="Chisq",
rlm="Chisq",
"F"
)
##- ## leverage
##- if (!nonlinear) {
##- lhat <- pmax(0,hat(lreg$x))
##- if (length(lhat)==0) warning(":regr/i.lm: no leverages") ## else {
##- ##- if (length(lhat)!=NROW(lreg$stres))
##- ##- if (length(lreg[["w"]])==NROW(lreg$stres))
##- ##- lhat <- u.merge(lreg$leverage, 0, lreg[["w"]]>0)
##- ##- }
##- lreg$leverage <- lhat
##- }
## multivariate
if (class(lreg)[1]=="mlm")
return(i.mlmsum(lreg, termtable))
##
lreg1 <- summary(lreg)
lsig <- lreg1$sigma
if (is.null(lsig)) lsig <- lreg$scale ## lmrob
if (is.null(lsig)) lsig <- sd(resid(lreg)) # !!! used for rq
lreg$sigma <- lsig
##- ## standardized residuals
##- if (is.finite(lsig)&&lsig>0) {
##- lreg$stres <- lreg$residuals/lsig
##- if (length(lreg$weights)) lreg$stres <- lreg$stres*sqrt(lreg$weights)
if (class(lreg)=="lmrob") lreg1$cov.unscaled <- lreg$cov/lsig^2 ## !!!
## from summary
lcomp <- c("r.squared","fstatistic","colregelation","aliased",
"df","cov.unscaled")
lreg[lcomp] <- lreg1[lcomp]
if (lfn=="lm") lreg$AIC <- extractAIC(lreg)[2]
## degrees of freedom
if (is.null(lreg$df)) # needed for rq
lreg$df <- c(length(coef(lreg))-attr(terms(lreg),"intercept"),
length(lreg$residuals)-length(coef(lreg)))
lreg$df.residual <- ldfr <- df.residual(lreg)
if (nonlinear) {
lcftab <- lreg1$coefficients
lreg$coefficients <- lcftab[,1]
ltq <- qt(1-testlevel/2, ldfr)
lci <- lcftab[,1]*(1+outer(ltq*lcftab[,2], c(ciLow=-1,ciHigh=1)))
lreg$termtable <- data.frame(coef=lcftab[,1],se=lcftab[,2],lci)
# lreg$r.squared <- 1-(lsig/lsdy)^2
}
lreg$adj.r.squared <- 1-(1-lreg$r.squared)*(length(lreg$residuals)-1)/ldfr
## cov of estimates
lcov <- lreg$cov.unscaled*lsig^2
lreg$covariance <- lcov
lse <- sqrt(diag(lcov))
lreg$correlation <- lcov/outer(lse, lse)
## --- table of terms
if (!nonlinear) {
if(termtable) {
ly <- lreg$model[[1]]
lsdy <- sqrt(var(ly))
ltt <- i.termtable(lreg, lreg1$coef, data, lcov, lttype, lsdy=lsdy,
vif=vif, leverage=TRUE)
lreg[names(ltt)] <- ltt
} else class(lreg) <- c("orig",class(lreg))
}
## result of i.lm
lreg
}
## -----------------------------------------------------------------------
i.mlmsum <- function(object, termtable=TRUE)
{
## Purpose: internal: fit multivariate lm; called from i.lm() only
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 4 Aug 2004, 11:18
##- lreg <- lm(formula, data=data, weights=.Weights, model=model, ...)
##- lreg$call$formula <- eval(lreg$call$formula) # patch
lreg1 <- summary(object)
lform <- formula(object)
lts <- ltp <- NULL
for (ly in 1:length(lreg1)) {
lrg <- lreg1[[ly]]
lts <- cbind(lts,c(lrg[["sigma"]],lrg[["r.squared"]],
lrg[["fstatistic"]]))
ltp <- cbind(ltp,lrg[["coefficients"]][,4])
}
lmodel <- nrow(lts)>=5 # non-trivial model
if (lmodel) {
lts[4,] <- pf(lts[3,],lts[4,],lts[5,], lower.tail=FALSE)
lts <- lts[1:4,]
}
dimnames(object$coefficients)[[2]] <- as.character(lform[[2]])[-1]
dimnames(ltp) <- dimnames(object$coefficients)
dimnames(lts) <- list(rep(c("sigma","r.squared","fstatistic","p-value"),
length=nrow(lts)), dimnames(ltp)[[2]])
object$pvalues <- ltp
object$stats <- lts
object$sigma <- lsig <- lts["sigma",]
lres <- object$residuals
##- if (all(lsig>0)) {
##- object$stres <- sweep(lres,2,lsig,"/")
##- if (length(object$weights))
##- object$stres <- object$stres*sqrt(object$weights)
##- }
object$resmd <- mahalanobis(lres,0,var(lres))
ldfr <- object$df.residual
object$r.squared <- lr2 <- lts["r.squared",]
object$adj.r.squared <- 1-(1-lr2)*(nrow(object$residuals)-1)/ldfr
lcomp <- c("aliased","df","cov.unscaled")
object[lcomp] <- lreg1[[1]][lcomp]
object$drop1 <- if (lmodel) drop1.mlm(object)
## class(lreg) <- c("mregr","mlm","lm")
object
} # i.mlmsum
## -----------------------------------------------------------------------
i.glm <- function(formula, data, family, fname,
control=NULL, vif=TRUE, termtable=TRUE, call=NULL, ...)
{
## Purpose: internal: fit glm
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 4 Aug 2004, 11:18
lfamily <- get(fname)
##- environment(formula) <- environment()
lcall <- match.call()
lcall$x <- TRUE
lcall <- lcall[setdiff(names(lcall),
c("fname","vif","nonlinear","robust",
"calcdisp","suffmean","termtable","control"))]
lcall <- c(list(quote(stats::glm)),as.list(lcall[-1]))
mode(lcall) <- "call"
## ---------------
lreg <- eval(lcall, envir=environment())
## ----------------
lreg$leverage <- pmax(0,hat(lreg$x))
lreg1 <- summary(lreg)
lcoeftab <- lreg1$coef
ly <- as.numeric(lreg$model[,1])
ldisp <- lreg1$dispersion
## ---
lfcount <- fname=="binomial"|fname=="poisson"
lcalcdisp <- control$calcdisp
lsuffmean <- TRUE
if (lfcount) {
lsuffmean <- mean(ly)>control$suffmean # ,na.rm=TRUE
lcd <- lcalcdisp
if (length(lcd)==0) lcd <- lsuffmean
if (lcd) {
ldisp <- lreg1$deviance/lreg1$df.residual
if (ldisp>1||length(lcalcdisp)>0) {
lreg$distrname <- paste("quasi",fname,sep="")
lcoeftab[,2] <- lcoeftab[,2]*sqrt(ldisp)
lcoeftab[,3] <- lcoeftab[,3]/sqrt(ldisp)
##- lcoeftab[,4] <- 2*pnorm(lcoeftab[,3],lower.tail=FALSE)
}
else ldisp <- 1
}
} # else calcdisp <- FALSE
attr(ldisp,"fixed") <- ldisp==1
lreg$dispersion <- ldisp
lreg$sigma <- sqrt(ldisp)
## ---
if (ldisp>1) {
lstr <- residuals(lreg, type="pearson")/sqrt(ldisp)
lnaa <- lreg$na.action
if (class(lnaa)=="exclude") lstr <- lstr[-lnaa]
lreg$stres <- lstr
}
## bug? leverage not taken into account
lcomp <- c("deviance","AIC","df.residual","null.deviance", # "family",
"df.null","iter","deviance.resid","aliased","df","cov.unscaled")
lreg[lcomp] <- lreg1[lcomp]
## --- deviances
ltesttype <- ifelse(ldisp==1,"Chisq","F")
ldev <- unlist(lreg1[c("deviance", "null.deviance")])
ldf <- lreg1$df[1:2]-c(attr(terms(lreg),"intercept"),0)
ltbd <- cbind(deviance=c(diff(ldev),ldev), df=c(ldf,sum(ldf)),
p.value=NA)
dimnames(ltbd)[[1]] <- c("Model","Residual","Null")
ltbd[1:2,3] <- pchisq(ltbd[1:2,1], ltbd[1:2,2], lower.tail=FALSE)
if (!lsuffmean) ltbd[2,3] <- NA
lreg$devtable <- ltbd
## ---
## cov of estimates
ldisp <- lreg$dispersion
if (is.null(ldisp)) ldisp <- 1
lreg$covariance <- lcov <- lreg$cov.unscaled*ldisp
lse <- sqrt(diag(lcov))
lreg$correlation <- lcov/outer(lse, lse)
## ---
lreg$fitfun <- "glm"
if (termtable) {
ltt <- i.termtable(lreg, lcoeftab, data, lcov, ltesttype, lsdy=1, vif=vif)
lcmpn <- c("termtable","termeffects","leverage")
lreg[lcmpn[lcmpn%in%names(ltt)]] <- ltt
}
## result of i.glm
lreg
}
## -----------------------------------------------------------------------
i.multinomial <- function(formula, data, family, fname,
model=TRUE, vif=TRUE, termtable=TRUE, call=NULL, ...)
{
## Purpose: internal: fit multinom
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 4 Aug 2004, 11:18
## ltr <- control$trace
## if (length(ltr)==0) ltr <- trace
## require(nnet) ## !?!
lcall <- match.call()
lcall[[1]] <- quote(regr0::i.multinomfit)
lcall$fname <- lcall$family <- lcall$control <- lcall$vif <- NULL
lcall$trace <- FALSE
lreg <- eval(lcall, envir=environment())
## ---------------
if (length(lreg$na.action)) {
lnaact <- attr(lreg$na.action,"class")
attr(lreg$na.action,"class") <- "omit"
} else lnaact <- NULL ## summary does not work with exclude
lreg$call$formula <- formula
lreg1 <- summary(lreg)
lreg$dispersion <- lreg$sigma <- 1
lres <- lreg1$residuals
lreg$residuals <- lres
lcf <- lreg1$coefficients
lreg$coefficients <- lcf
lreg$AIC <- lreg1$AIC
ldfm <- lreg1$edf-nrow(lcf)
lreg$df <- c(ldfm,prod(dim(lres)-1)-ldfm,ldfm)
##- environment(lreg$call$formula) <- environment()
lreg$fitfun <- "multinom"
ldr1 <- if (u.debug()) drop1(lreg, test="Chisq", trace=FALSE) else
try(drop1(lreg, test="Chisq", trace=FALSE), silent=TRUE)
if (class(ldr1)[1]=="try-error") {
warning(paste(":regr/i.multinom: drop1 did not work.",
"I return the multinom object"))
class(lreg) <- c("orig",class(lreg))
return(lreg)
} else { ##xxx
ldr1 <- ldr1[-1,]}
## signif :
ldr1 <- cbind( ldr1[,1:3], sqrt(ldr1[,3]/qchisq(0.95,ldr1[,1])), ldr1[,4] )
names(ldr1) <- c("df", "AIC", "Chisq", "signif", "p.value")
lreg$termtable <- lreg$drop1 <- ldr1
if (length(lnaact)) attr(lreg$na.action,"class") <- lnaact
## result of i.multinomial
lreg
}
## -----------------------------------------------------------------------
i.polr <- function(formula, data, family, fname, weights = NULL,
model=TRUE, vif=TRUE, termtable=TRUE, call=NULL, ...)
{
## Purpose: internal: fit ordered y
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 4 Aug 2004, 11:18
## require(MASS) ## !?!
lcall <- match.call()
ladrop <- c("fname","family","vif","nonlinear","robust",
"calcdisp","suffmean","termtable")
lcl <- as.list(lcall[setdiff(names(lcall),ladrop)])
lcl[1] <- list(quote(regr0::i.polrfit))
## lcl <- c(list(quote(regr0::i.polrfit), lcl)
mode(lcl) <- "call"
lcl$Hess <- TRUE
lenv <- environment()
lcl$envir <- lenv
## ---
lreg <- eval(lcl, envir=lenv)
## lreg$call$formula <- formula
lreg$w <- data$.weights.
lreg$leverage <- hat(lreg[["x"]])
lreg1 <- if (u.debug()) summary(lreg) else
try(summary(lreg))
if (class(lreg1)[1]=="try-error") {
warning(paste(":regr/i.polr: summary did not work.",
"I return the polr object"))
## lreg$call$data <- call$data
class(lreg) <- c("orig","polr")
return(lreg)
} ## ---
## model.matrix
##- ldata <- eval(data, envir=environment(formula))
## lreg$x <- model.matrix(formula, ldata)
lcf <- lreg1$coefficients
lreg$intercepts <- lcf[(lreg1$pc+1):nrow(lcf),1:2]
lreg$stres <- NULL
## cov of estimates!
lreg$covariance <- lcov <- vcov(lreg)
lse <- sqrt(diag(lcov))
lreg$correlation <- lcov/outer(lse, lse)
## --- deviances
lreg$fitfun <- "polr"
if (termtable) {
ltt <- i.termtable(lreg, lreg1$coef, data, lcov, ltesttype="Chisq",
lsdy=1, vif=vif, leverage=TRUE)
lcmpn <- c("termtable","termeffects","leverage")
lreg[lcmpn[lcmpn%in%names(ltt)]] <- ltt
}
lreg$dispersion <- 1
## lreg$residuals <- residuals(lreg)
llp <- fitted.polr(lreg, type="link")
if (length(lnaaction <- lreg$na.action)) llp <- llp[-lnaaction]
lreg$linear.predictors <- llp
## result of i.polr
lreg
}
## -----------------------------------------------------------------------
i.survreg <-
function(formula, data, family, yy, fname="ph", method, control,
vif=TRUE, termtable=TRUE, ...)
{
## Purpose: internal: fit ordered y
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 4 Aug 2004, 11:18
## require(survival) ## !?!
lcall <- match.call()
## b. --- method
if (fname=="ph") {
lfitfun <- "coxph"
lcall[[1]] <- quote(survival::coxph)
} else {
lfitfun <- "survreg"
lcall[[1]] <- quote(survival::survreg)
lcall$dist <- fname
lcall$method <- lcall$control <- NULL
}
lyy <- lcall$yy
if (is.null(lcall$dist)) lcall$dist <- lcall$family
lcall <- lcall[names(lcall)%nin%c("yy","fname","vif","family",
"calcdisp","suffmean","termtable")]
## lcall$yy <- lcall$fname <- lcall$family <- lcall$vif <- NULL
lcall$y <- lcall$x <- TRUE
## ---
lreg <- eval(lcall, envir=environment())
## ---
class(lreg$y) <- "Surv"
attr(lreg$y, "type") <- attr(lyy, "type")
## lreg$call$formula <- formula
lreg1 <- if (u.debug()) summary(lreg) else
try(summary(lreg), silent=TRUE)
if (class(lreg1)[1]=="try-error") {
warning(paste(":regr/i.survreg: summary did not work. ",
"I return the survreg object"))
## lreg$call$data <- call$data
class(lreg) <- c("orig",class(lreg))
return(lreg)
} ## ---
lreg$resid.orig <- lreg$residuals
lreg$stres <- NULL
lcf <- lreg1$coefficients
## --- deviances
## lreg$scale
if (lfitfun=="survreg") {
attr(lreg$scale,"fixed") <- length(lcall$scale)>0
ldf <- sum(lreg$df) - lreg$idf
ldfr <- length(lreg$linear.predictors)-sum(lreg$df)
}
if (lfitfun=="coxph") {
lreg1$table <- lreg1$coefficients
ldf <- length(lreg$coefficients)
ldfr <- length(lreg$residual)-ldf-1
}
lreg$df.residual <- ldfr
lreg$AIC <- extractAIC(lreg)[2]
lreg$deviance <- -2*lreg$loglik
lchi <- 2*diff(lreg1$loglik)
ltbd <- cbind(deviance=c(lchi,-2*lreg1$loglik[2]),
df=c(ldf, ldf+ldfr),
p.value=c(pchisq(lchi,ldf,lower.tail=FALSE),NA))
dimnames(ltbd)[[1]] <- c("Model","Null")
lreg$devtable <- ltbd
lreg$covariance <- lcov <- lreg$var
lsd <- sqrt(diag(lcov))
lreg$correlation <- lcov/outer(lsd,lsd)
lreg$fitfun <- lfitfun
lres <- residuals.regr(lreg) ## includes NAs
if (length(lnaaction <- lreg$na.action)&&class(lnaaction)=="exclude")
lres <- lres[-lreg$na.action,]
ly <- lreg$y
##- lreg$n.censored <-
##- if (attr(ly,"type")%in%c("right","left"))
##- table(ly[,2])[2] else sum(ly[,2]!=1) #interval
ltype <- attr(ly,"type")
##- lreg$n.censored <- sum(lres[,"prob"]>0, na.rm=TRUE)
ltb <- table(ly[,2])
lfit <- lres[,"fit"]
llimit <- attr(ly,"limit")
lreg$n.censored <- NA
if (ltype=="left") {
lreg$n.censored <- structure(ltb[2], names="left")
if (length(llimit))
lreg$n.fitout <- structure(sum(lfit<llimit, na.rm=TRUE), names="left")
} else {
if (ltype=="right") {
lreg$n.censored <- structure(ltb[1], names="right")
if (length(llimit))
lreg$n.fitout <- structure(sum(lfit>llimit, na.rm=TRUE), names="right")
}
}
if (termtable) {
ltt <- i.termtable(lreg, lreg1$table, data, lcov, ltesttype="Chisq",
lsdy=1, vif=vif)
## log(scale): signif<-NA. no! log(scale)==0 means
## exp.distr for weibull/gumbel
lcmpn <- c("termtable","termeffects","leverage")
lreg[lcmpn[lcmpn%in%names(ltt)]] <- ltt
}
## lreg$df <- c(model=ldf, residual=ldfr, original=lreg$df) ## !!! lreg has df = ldf+object$idf !
## do not modify before calling i.termtable
lreg$distrname <- if (lfitfun=="coxph") "prop.hazard" else lreg$dist
lreg$residuals <- lres
## result of i.survreg
lreg
}
## -----------------------------------------------------------------------
Tobit <- function(data, limit=0, limhigh=NULL, transform=NULL, log=FALSE, ...)
{
## Purpose: create a Surv object for tobit regression
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 1 Jan 2010, 21:49
## require(survival) ## !?!
ltrs <- as.character(substitute(transform))
data <- pmax(data,limit)
lright <- !is.null(limhigh)
if (lright) data <- pmin(data, limhigh)
if (log[1]) { ## model.frame evaluates log in data ! Whence [1]
transform <- logst
ltrs <- "logst"
}
if (!is.null(transform)) {
if (is.character(transform)) transform <- get(transform)
if (!is.function(transform))
stop("!Tobit! argument 'transform' does not yield a function")
ldt <- transform(c(limit,limhigh,data), ...)
data <- ldt[-1]
limit <- ldt[1]
if (lright) {
limhigh <- ldt[2]
data <- data[-1]
}
}
if (sum(data<=limit,na.rm=TRUE)==0)
warning(":Tobit: no observation <= `limit`")
if (lright&&sum(data>=limhigh,na.rm=TRUE)<=1)
warning(":Tobit: no observation >= `limhigh`")
if (lright) {
rr <- survival::Surv(time = data, time2=data,
event = (data<=limit) + (data<limhigh),
type="interval")
rr[,2] <- rr[,1]
} else
rr <- survival::Surv(data, event = data>limit, type="left")
structure(rr, distribution="gaussian", transform=ltrs,
limit=c(limit,limhigh), class=c(class(rr), "matrix"))
}
## -----------------------------------------------------------------------
i.termtable <- function(lreg, lcoeftab, ldata, lcov, ltesttype="F",
lsdy, vif=TRUE, leverage=vif)
{
## Purpose: generate term table for various models
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 4 Aug 2004, 15:37
lterms <- terms(lreg)
if(length(attr(lterms,"term.labels"))==0)
return(list(termtable = data.frame(
coef=c(lreg$coef,NA)[1], se=NA, ciLow=NA, ciHigh=NA,
df=1, testst=NA, signif=NA, p.value=NA, p.symb="", stcoef=NA, R2.x=NA,
stringsAsFactors=FALSE)
))
## degrees of freedom
ldfr <- df.residual(lreg)
if (ldfr<1) {
warning(":regr/i.termtable: no degrees of freedom left.")
return(list(termtable = data.frame(
coef=c(lreg$coef,NA)[1], se=NA, ciLow=NA, ciHigh=NA,
df=1, testst=NA, signif=NA, p.value=NA, p.symb="", stcoef=NA, R2.x=NA,
stringsAsFactors=FALSE)
))
}
pvCutpoints <- c(0, 0.001, 0.01, 0.05, 0.1, 1)
pvSymbols <- c("***", "**", "*", ".", " ", "")
pvLegend <- paste(rbind(pvCutpoints,pvSymbols), collapse=" ")
## drop1
ldr1 <-
if (class(lreg)[1]%in%c("lm","lmrob")) {
if (u.debug())
drop1Wald(lreg, test=ltesttype, scope=lterms) else
try(drop1Wald(lreg, test=ltesttype, scope=lterms),
silent=TRUE) } else {
if (u.debug())
drop1(lreg, test=ltesttype, scope=lterms) else
try(drop1(lreg, test=ltesttype, scope=lterms),
silent=TRUE)
}
if (class(ldr1)[1]=="try-error") {
warning(paste(":regr: drop1 did not work. I return the table produced by ",
lreg$fitfun))
##- lsum <- summary(lreg)
##- lcft <- lsum$coef
##- if (length(lcft)==0) lcft <- lsum$parameters ## nls
##- return(list(test=lcft)) # !!! noch reparieren
return(list(termtable=lcoeftab))
}
ldr1 <- ldr1[-1,]
ldr1$RSS <- NULL # same ncol for lm and glm
if (inherits(lreg,"rlm")) ldr1[,4] <- ldr1[,2]/ldr1[,1] ## !!!
if (inherits(lreg,"mlm")||inherits(lreg,"manova"))
return(list(termtable=ldr1)) ## !!! needs much more
ltstq <- if (ltesttype=="F") qf(0.95,c(1,ldr1[,1]),ldfr) else {
if (ltesttype=="Chisq") qchisq(0.95,c(1,ldr1[,1])) else NA }
ltstq1 <- sqrt(ltstq[1]) ## 1 degree of freedom
ltstq <- ltstq[-1]
## coefficients
lcoef <- lreg$coefficients
## model.matrix
lmmt <- lreg[["x"]]
if (length(lmmt)==0)
lmmt <- model.matrix(lreg)
lasg <- attr(lmmt,"assign")[!is.na(lcoef)]
## if (class(lreg)[1]%in%c("polr")) lasg <- lasg[-1] ## ,"coxph"
## terms without factor involvement
lfactors <- attr(lterms,"factors")
lvcont <- !attr(lterms,"dataClasses")[row.names(lfactors)] %in%
c("numeric","logical") ## [...] excludes (weights) and possibly others
## terms only containing continuous variables
lcont <- which( lvcont %*% lfactors ==0 )
## licasg <- which(lasg%in%lcont)
## vif --> R2.x
lr2 <- NA
if (vif) {
lvift <- ## lterms: n of levels for each term
if (u.debug()) vif.regr(lreg, lcov, lmmt) else
try(vif.regr(lreg, lcov, lmmt), silent=TRUE)
if (class(lvift)[1]=="try-error" || length(lvift)==0) {
warning(":regr/i.termtable: error in the calculation of R2.xs")
lvif <- NA
} else lvif <- lvift[,3]^2
lr2 <- 1-1/lvif
}
## prepare table
lpvcol <- pmatch("Pr(",names(ldr1), nomatch=ncol(ldr1))
lpv <- ldr1[,lpvcol]
ltb <- data.frame(coef=NA, se=NA, ciLow=NA, ciHigh=NA,
df=ldr1[,1], testst=ldr1[,lpvcol-1], signif=NA,
p.value=lpv, p.symb="", stcoef=NA, R2.x=lr2,
stringsAsFactors=FALSE)
row.names(ltb) <- row.names(ldr1)
## intercept
ljint <- "(Intercept)"==names(lcoef)[1]
if (ljint) {
## ltstint <- # if(class(lreg)[1]%in%c("lm","nls","rlm"))
lcoeftab[1,3]^2 # else lcoeftab[1,3]
ltb <- rbind(
"(Intercept)"=
data.frame(coef=NA, se=NA, ciLow=NA, ciHigh=NA,
df=1, testst=NA, signif=NA,
p.value=NA, p.symb="", stcoef=NA, R2.x=NA,
stringsAsFactors=FALSE),
ltb)
ltstq <- c(ltstq1, ltstq)
lcont <- c(0, lcont)
}
lcont1 <- lcont+ljint # row number in dr1
## p.symb and signif
ltb$signif <- sqrt(pmax(0,ltb$testst)/ltstq)
## coefficients and statistics for terms with 1 df
if (length(lcont)) { ## lcont refers to assign
ltlb <- dimnames(ltb)[[1]]
lclb <- ltlb[lcont1] ## lcont1 is the row in the coef table of lreg1
ljc <- match(lcont,lasg) # index of coefs for cont variables
lcf <- lcoef[ljc]
## fill in
ltb$coef[lcont1] <- lcf
ltb$se[lcont1] <- lse <- lcoeftab[ljc,2]
lci <- lcf+outer(ltstq1*lse, c(-1,1))
## confint(lreg,row.names(ltb)[lcont1]) does not always work...
ltb[lcont1,c("ciLow","ciHigh")] <- lci
ltb[lcont1,"signif"] <- sign(lcf)*ltb[lcont1,"signif"]
## standardized coefficients
lstcf <- lcf[lcont>0] * # exclude intercept term
sqrt(apply(lmmt[,names(lcf[lcont>0]),drop=FALSE],2,var)) / lsdy
ltb$stcoef[lcont1[lcont>0]] <- lstcf
}
if (row.names(lcoeftab)[nrow(lcoeftab)]=="Log(scale)") { # survreg
ltsc <- lcoeftab[nrow(lcoeftab),]
lcont1 <- c(lcont1, nrow(lcoeftab))
if (!u.true(lreg$dist=="weibull")) ltsc[2:4] <- NA
ltb <- rbind(ltb,"log(scale)"=
c(ltsc[1],ltsc[2],ltsc[1]+c(-1,1)*qnorm(0.975)*ltsc[2],
1, ltsc[3], ltsc[3]/qnorm(0.975), ltsc[4], NA, NA, NA))
}
## p-symbol
lipv <- as.numeric(cut(ltb$p.value, pvCutpoints))
ltb[,"p.symb"] <- pvSymbols[lipv]
attr(ltb, "legend") <- pvLegend
## --- termeffects (dummy coef)
lallcf <- termeffects(lreg)
if (inherits(lreg,"polr")) lreg$coefficients <- c("(Intercept)" = NA, lcoef)
rr <- list(termtable=ltb, termeffects=lallcf)
if (leverage) rr <- c(rr, leverage=list(hat(lmmt)))
rr
}
## --------------------------------------------------------------------------
ciSignif <- function(estimate, se=NULL, df=Inf, testlevel=0.05) {
if (is.null(se))
if (NCOL(estimate)>1) {
se <- estimate[,2]
estimate <- estimate[,1]
} else
stop("!ciSignif! no standard errors found")
ltq <- qt(1-testlevel/2, df)
lci <- estimate+outer(ltq*se, c(ciLow=-1,ciHigh=1))
ltst <- estimate/se
lsgf <- ltst/ltq
lpv <- 2*pt(-abs(ltst), df)
lipv <- as.numeric(cut(lpv, c(0, 0.001, 0.01, 0.05, 0.1, 1)))
lsst <- c("***", "**", "*", ".", " ")[lipv]
data.frame(estimate=estimate, se=se, lci, testst=ltst,
signif=lsgf, p.value=lpv, p.symb=lsst)
}
## ==========================================================================
contr.wsumpoly <-
function (n, scores = NULL, y = NULL, w = NULL,
contrasts = TRUE, sparse = FALSE, poly = NA)
{ ## provide weighted sum contrasts
if (is.data.frame(n)) {
for (lj in 1:ncol(n))
if (is.factor(n[,lj]))
attr(n[,lj],"contrasts") <-
contr.wsumpoly(n[,lj], scores=scores, y=y,
contrasts=contrasts, sparse=sparse)
return(n)
}
## not a data.frame, but...
if (is.character(n)) n <- factor(n)
if (is.factor(n)) { ## ... a factor
if (length(y)) {
if (length(y)!=length(n)) {
warning(":contrasts.wsum: unequal lengths of arguments. ",
"I ignore argument 'y'")
## y only used to eliminate NAs in target variable
} else n <- n[apply(is.finite(cbind(y)), 1, all)]
}
w <- c(table(factor(n))) ## exclude unused levels
if (is.na(poly)) poly <- is.ordered(n)
}
nn <- length(w)
if (is.na(poly)) poly <- FALSE
if (nn<1) {
if (!(is.atomic(n)&&is.numeric(n)&&length(n)==1))
stop ("!contr.wsumpoly! Provide either 'n' or 'w'")
nn <- n
## w <- rep(1,nn)
}
contr <-
if (poly) {
scores <- if(length(scores)) scores else 1:nn
contr.poly(nn, scores = scores, contrasts=contrasts, sparse=sparse)
} else
contr.sum(nn, contrasts=contrasts, sparse=sparse)
if (is.null(w) || anyNA(w) || any(w<=0)) {
warning(":contr.wsum: weights 'w' not suitable.",
" Returning unweighted sum contrast")
return(contr)
}
if (poly) {
contr <- make.poly( nn, scores=scores, w=w)
if (contrasts) {
dn <- colnames(contr)
dn[2:min(4, nn)] <- c(".L", ".Q", ".C")[1:min(3, nn - 1)]
colnames(contr) <- dn
contr <- contr[, -1, drop = FALSE]
}
else {
contr[, 1] <- 1
contr
}
} else contr[nn,] <- - w[-nn]/w[nn]
##- if (sparse)
##- contr <- .asSparse(contr)
structure(contr, w=w)
}
## --------------------------------------------------------------------
contr.wsum <- function(n, scores = NULL, y=NULL, w = NULL, contrasts = TRUE,
sparse = FALSE) {
if (is.ordered(n)) n <- factor(n)
contr.wsumpoly (n, y=y, w=w, contrasts=contrasts, sparse=sparse, poly=FALSE)
}
## --------------------------------------------------------------------
contr.wpoly <- function(n, scores = NULL, y = NULL, w = NULL, contrasts = TRUE,
sparse = FALSE) {
if (is.factor(n)) n <- ordered(n)
contr.wsumpoly (n, scores = scores, y = y, w = w,
contrasts = contrasts, sparse = sparse, poly=TRUE)
}
## -----------------------------------------------------------------
make.poly <- function(n, scores, w) {
y <- scores - sum(scores*w)/sum(w)
X <- sqrt(w)*outer(y, seq_len(n) - 1, "^")
QR <- qr(X)
z <- QR$qr
z <- z * (row(z) == col(z))
Z <- qr.qy(QR, z) ## raw <-
##- Z <- sweep(raw, 2L, apply(raw, 2L, function(x) sqrt(sum(x^2))),
##- "/", check.margin = FALSE)
## do not standardize. WSt
Z <- Z / sqrt(w)
colnames(Z) <- paste0("^", 1L:n - 1L)
Z
}
## ===================================================================
print.regr <- function (x, call=TRUE, correlation = FALSE,
termeffects = getUserOption("show.termeffects"),
termcolumns = getUserOption("termcolumns"),
termeffcolumns = getUserOption("termeffcolumns"),
digits = max(3, getUserOption("digits")-2),
symbolic.cor = p > 4, signif.stars = getOption("show.signif.stars"),
na.print = getUserOption("na.print"),
residuals=FALSE, niterations=FALSE, ...)
{
##
if (is.null(na.print)) na.print <- "."
## doc
ldoc <- getUserOption("doc")
if (length(ldoc)==0) ldoc <- 1
if (ldoc>=1) if (length(tit(x)))
cat("\n ",tit(x),"\n")
if (ldoc>=2) if (length(doc(x)))
cat(" ",paste(doc(x),"\n "))
## mlm
if (inherits(x,"mlm"))
return(invisible(print.mregr(x, na.print=na.print, ...)))
## preparation
lItermeff <- i.def(termeffects, TRUE)
## call, fitting fn, residuals
if (call) {
if(!is.null(x$call)) {
cat("\nCall:\n")
cat(paste(deparse(x$call), sep = "\n", collapse = "\n"),"\n", sep = "")
}
cat("Fitting function: ",x$fitfun,"\n")
}
df <- x$df
rdf <- c(x$df.resid,df[2])[1]
if (residuals) {
resid <- x$residuals
##- cat(if (!is.null(x$w) && diff(range(x$w)))
##- "Weighted ", "Residuals:\n", sep = "")
cat("Residuals:\n")
if (rdf > 5) {
nam <- c("Min", "1Q", "Median", "3Q", "Max")
rq <- if (length(dim(resid)) == 2)
structure(apply(t(resid), 1, quantile),
dimnames = list(nam, dimnames(resid)[[2]]))
else structure(quantile(resid), names = nam)
print(rq, digits = digits, na.print=na.print, ...)
} else {
if (rdf > 0) print(resid, digits = digits, na.print=na.print, ...)
else cat("ALL", df[1],
"residuals are 0: no residual degrees of freedom!\n")
}
}
## coefficients
nsingular <- df[3] - df[1]
if ((!is.na(nsingular))&&nsingular>0)
cat("\nCoefficients: (", nsingular,
" not defined because of singularities)\n", sep = "")
# else {
if (!is.null(x$sigma))
if((!is.finite(x$sigma))||x$sigma<=0)
cat("\n!!! Error variance is 0 !!!")
## coef table
lttab <- x$termtable
if (length(lttab)>0) {
llttab <- TRUE
if (signif.stars) termcolumns <- union(termcolumns, "p.symb")
if(!is.null(termcolumns)) {
if (all(termcolumns=="")) llttab <- FALSE else {
ljp <- match(termcolumns,colnames(lttab), nomatch=0)
if (sum(ljp)!=0)
## warning(":print.regr: no valid columns of termtable selected") else
lttab <- lttab[,ljp,drop=FALSE]
}
}
if (llttab) {
cat("\nTerms:\n")
## round R2.x, signif, p.value
ljrp <- colnames(lttab)[notna(pmatch(c("R2","signif","p.v"),colnames(lttab)))]
if (length(ljrp)) lttab[,ljrp] <- round(as.matrix(lttab[,ljrp]),max(3,digits))
if ("signif"%in%ljrp) lttab$signif <- round(lttab$signif,last(digits)-1)
lttabf <- format(lttab, na.encode=FALSE)
lttabp <- data.frame(lapply(lttabf, function(x) sub("NA",na.print,x)),
row.names=row.names(lttab))
print(lttabp, quote=FALSE, na.print="")
if (signif.stars>=1)
cat("---\nSignif. codes: ", attr(x$termtable, "legend"),"\n", sep = "")
} ## end if(llttab)
## --- error block
} else {
if (length(x$coef)) {
cat("\nCoefficients:\n")
print(x$coef, na.print=na.print)
}
}
##- if (length(x$binlevels)>0) {
##- cat("\nFactor(s) with two levels converted to 0-1 variable(s):\n")
##- print(as.matrix(data.frame(x$binlevels,row.names=0:1)))
##- }
## cat("\n")
## special for polr
if (length(x$intercepts)) { # polr
cat("Intercepts:\n")
print(x$intercepts)
}
## error
if (length(x$sigma) && !u.true(attr(x$sigma,"fixed")))
cat("\nSt.dev.error: ", formatC(x$sigma, digits = digits),
" on", rdf, "degrees of freedom\n")
if (length(x$r.squared)&&!is.na(x$r.squared))
cat("Multiple R^2: ", formatC(x$r.squared, digits = digits),
" Adjusted R^2: ",
formatC(x$adj.r.squared, digits = digits),
if (length(lAIC <- x$AIC)&&!is.na(lAIC))
" AIC: ", formatC(lAIC, digits = log10(abs(lAIC))+3),"\n" )
if (length(x$fstatistic)>0) {
cat("F-statistic: ", formatC(x$fstatistic[1],
digits = digits), " on", x$fstatistic[2], "and", x$fstatistic[3],
"d.f., p.value:", formatC(pf(x$fstatistic[1],
x$fstatistic[2], x$fstatistic[3], lower.tail=FALSE),
digits = digits),
"\n")
}
## deviances
if (length(x$deviance)>0) {
if (length(x$devtable)) print(x$devtable, na.print=na.print)
if (length(x$n.censored)) {
lnc <- 100*x$n.censored/x$n.obs
cat(paste("\ncensored ",
paste(paste(names(lnc), "=", round(lnc,1), "%"), collapse=" ; ")))
}
if (length(x$n.fitout)) {
lnf <- 100*x$n.fitout/x$n.obs
cat(paste("\nfit outside limit",
paste(paste(names(lnf), "=", round(lnf,1), "%"), collapse=" ; ")))
}
cat("\nDistribution: ",x$distrname)
if (length(x$dispersion))
cat(". Dispersion parameter: ",
if ((!is.null(attr(x$dispersion,"fixed")))&&
attr(x$dispersion,"fixed"))
"fixed at ", format(x$dispersion))
else if (length(x$scale))
cat(". Shape parameter (`scale`): ",
if ((!is.null(attr(x$scale,"fixed")))&&
attr(x$scale,"fixed"))
"fixed at ", format(x$scale))
cat("\nAIC: ", format(x$AIC, digits = max(4, digits + 1)), "\n", sep = " ")
if (niterations&&length(x$iter)>0)
cat("Number of iterations:", x$iter, "\n")
}
## --- additional coefficients
if (x$distrname=="multinomial") {
cat("\nCoefficients:\n")
print(t(x$coefficients), na.print=na.print)
} else {
if (length(lttab)&lItermeff) {
if (lItermeff==1) {
##- lidf <- match("df",colnames(x$termtable))
##- if (is.na(lidf)) {
##- if (getOption("verbose"))
##- warning(":print.regr: df of coef not available")
##- } else { ## dummy coefficients
mterms <- row.names(x$termtable)[is.na(x$termtable[,"coef"])]
mt <- if (length(mterms)>0 & length(x$termeffects)>0) {
imt <- mterms%in%names(x$termeffects)
x$termeffects[mterms[imt]]
} else NULL
} else mt <- x$termeffects
if (length(mt)>0) {
cat("\nEffects of factor levels:\n")
print.termeffects(mt, digits=digits, na.print=na.print,
columns=getUserOption("termeffcolumns")) }
} ## else cat("\n")
}
## ---- correlation
correl <- x$correlation
if (length(correl)>0 && correlation) {
p <- NCOL(correl)
if (p > 1) {
cat("\nCorrelation of Coefficients:\n")
if (symbolic.cor) {
symbc <- symnum(correl, symbols=c(" ", ".", ",", "+", "*", "H"))
symbl <- attr(symbc,"legend")
attr(symbc,"legend") <- NULL
print(symbc)
cat("\nSymbols: ",symbl,"\n")
} else {
correl[!lower.tri(correl)] <- NA
print(correl[-1, -p, drop = FALSE], digits = digits,
na.print = na.print)
}
}
}
## cat("\n")
invisible(x)
}
## ==========================================================================
summary.regr <- function(object, ...) object ## dispersion=NULL,
## ==========================================================================
termeffects <- function (object, se = 2, # use.na = TRUE,
df = df.residual(object), ...) {
if (is.atomic(object)||is.null(terms(object)))
stop("!termeffects! inadequate first argument")
## xl <- object$xlevels
Terms <- delete.response(terms(object))
tl <- attr(Terms, "term.labels")
dcl <- attr(Terms,"dataClasses")[-1]
if (all(dcl=="numeric"))
return(as.list(coef(object)))
## result already available?
allc <- object$termeffects
if ((!is.null(allc))&&length(allc)==length(tl)&&
(is.matrix(allc[[length(allc)]])|!se)) return(allc) ## !!! check!
int <- attr(Terms, "intercept")
facs <- attr(Terms, "factors")
mf <- object$model ##! d.c used all.vars
if (is.null(mf)) mf <- model.frame(object)
xtnm <- dimnames(facs)[[1]] ## names ##! replaces vars
xtlv <- lapply(mf[,xtnm, drop=FALSE],function(x) levels(x)) ## levels
lcontr <- object$contrasts
imat <- which(substr(dcl,1,7)=="nmatrix") ## resulting from bs()
if (length(imat)) {
xtlv[imat] <-
lapply(as.list(dcl[imat]),
function(x) as.character(1:as.numeric(substr(x,9,12))))
## lcontr <- c(lcontr, structure(rep(contr.id,length(tl)), names=tl)[imat])
lctr <- list()
for (li in seq_along(imat))
lctr <- c(lctr, list(diag(length(xtlv[[li]]))))
names(lctr) <- names(dcl)[imat]
lcontr <- c(lcontr, lctr)
}
xtnl <- pmax(sapply(xtlv,length),1) ## number of levels
termnl <- apply(facs, 2L, function(x) prod(xtnl[x > 0])) ##! lterms
nl <- sum(termnl)
## --- df.dummy: data frame of simple terms
args <- setNames(vector("list", length(xtnm)), xtnm)
for (i in xtnm)
args[[i]] <- if (xtnl[[i]] == 1) rep.int(1, nl) else
factor(rep.int(xtlv[[i]][1L], nl), levels = xtlv[[i]])
df.dummy <- as.data.frame(args) # do.call("data.frame", args)
names(df.dummy) <- xtnm
## rnn: names of rows
pos <- 0
rn <- rep.int(tl, termnl)
rnn <- rep.int("", nl)
## fill df.dummy
for (j in tl) {
i <- unlist(xtnm[facs[, j] > 0])
ifac <- i[xtnl[i] > 1]
if (length(ifac) == 0L) {
rnn[pos + 1] <- j
}
else if (length(ifac) == 1L) {
df.dummy[pos + 1L:termnl[j], ifac] <- xtlv[[ifac]]
rnn[pos + 1L:termnl[j]] <- as.character(xtlv[[ifac]])
}
else {
tmp <- expand.grid(xtlv[ifac])
df.dummy[pos + 1L:termnl[j], ifac] <- tmp
rnn[pos + 1L:termnl[j]] <-
apply(as.matrix(tmp), 1L, function(x) paste(x, collapse = ":"))
}
pos <- pos + termnl[j]
}
## attributes
attr(df.dummy,"terms") <- attr(mf,"terms")
lci <- sapply(df.dummy,is.factor)
lcontr <- lcontr[names(lci)[lci]] ## factors with 1 level have disappeared (?)
if (lIpolr <- inherits(object, "polr")) {
attr(Terms, "intercept") <- 1
mm <- model.matrix(Terms, df.dummy, contrasts.arg=lcontr, xlev=xtlv)
asgn <- attr(mm, "assign")[-1]
mm <- mm[,-1]
} else {
mm <- model.matrix(Terms, df.dummy, contrasts.arg=lcontr, xlev=xtlv)
asgn <- attr(mm, "assign")
}
if (anyNA(mm)) {
warning("some terms will have NAs due to the limits of the method")
mm[is.na(mm)] <- NA
}
## calculate dummy coefs
coef <- object$coefficients ##!!! cf <-
##- if (!use.na)
##- coef[is.na(coef)] <- 0
lnna <- is.finite(coef)
names(asgn) <- colnames(mm)
if (any(!lnna)){
coef <- coef[lnna]
mm <- mm[,lnna]
asgn <- asgn[lnna]
}
if (se) {
cov <- vcov(object)
if (is.null(cov)) {
warning(":termeffects: no covariance matrix of coefficients found.",
" Returning coefficients only")
se <- FALSE
} else
if (inherits(object, "polr"))
cov <- cov[1:length(coef),1:length(coef)]
}
licf <- pmatch(colnames(mm), names(coef))
## asgn <- asgn[names(coef)] ## !!!
res <- setNames(vector("list", length(tl)), tl)
ljfail <- NULL
for (j in seq_along(tl)) {
mmr <- rn == tl[j] ## rows corresponding to the term
mmc <- asgn==j ## & !is.na(coef)
lcf <- coef[licf[mmc]]
## mmc <- names(asgn)[asgn == j & !is.na(coef)] ## columns (logical fails for polr, vcov() too large) !!! was which
mmpart <- mm[mmr, mmc, drop=FALSE]
rrj <- setNames(drop(mmpart %*% lcf), rnn[mmr]) ## coef[mmc]
if (se) {
if (any(is.na(rrj))) {
warning(":termeffects: missing coef for term '", tl[j],
"'. no standard errors etc")
ljfail <- c(ljfail, tl[j])
} else {
sej <- sqrt(diag(mmpart %*% cov[mmc,mmc] %*% t(mmpart)))
rrj <- ciSignif(rrj, sej, df)
}
}
res[[j]] <- rrj
}
if (length(ljfail))
warning(":termeffects: error calculating se for terms ",
paste(ljfail, collapse=", "))
if (int > 0) {
res <- c(list(`(Intercept)` = coef[int]), res)
}
if (lIpolr)
res <- c(res,list("(Intercepts)"=ciSignif(object$intercepts[,1],
object$intercepts[,2], df) ))
## class(res) <- "termeffects" ## don't do that:
## want to be able to print the whole table
res
}
## --------------------------------------------------------------------
print.termeffects <- function(x, columns=NULL, ## userOptions("termeffcolumns"),
transpose=FALSE, ...) {
if (is.null(columns)) columns <- "all"
columns[columns=="coef"] <- "estimate"
csymb <- "coefsymb"%in%columns
if ("all"%in%columns) columns <-
if(csymb)
c("coefsymb", "se", "ciLow", "ciHigh", "testst",
"signif", "p.value") else
c("estimate", "se", "ciLow", "ciHigh", "testst",
"signif", "p.value")
for (li in seq_along(x)) {
xi <- x[[li]]
if (is.null(dim(xi))) next
if (csymb)
xi$coefsymb <-
if ("p.symb"%in%names(xi)) {
lps <- as.character(xi[,"p.symb"])
## lps[is.na(lps)] <- na.print ## would be misleading!
paste(format(xi[,1],...), lps)
} else xi[,1]
xif <- format(xi[,intersect(columns,names(xi)), drop=FALSE],...)
xif <- if (ncol(xif)==1 || (nrow(xif)>1 & transpose)) t(xif) else xif
if (nrow(xif)==1) row.names(xif) <- " " ## drop row name
if (ncol(xif)==1) colnames(xif) <- " " ## drop col name
if (prod(dim(xif))==1) xif <- as.character(xif[1,1])
x[li] <- list(xif)
}
print(unclass(x), quote=FALSE, ...)
}
## -------------------------------------------------------------------------
vcov.regr <- function(object, ...) {
cov <- object$covariance
if (is.null(cov)) {
class(object) <- setdiff(class(object),"regr")
vcov(object)
}
cov
}
## --------------------------------------------------------------------
df.residual.regr <- function(object, ...) {
df <- object$df.residual
if (is.null(df)) df <- object$df
if (length(df)>=2) df <- df[2]
if (is.null(df)) {
sry <- summary(object)
df <- sry$df
if (is.null(df))
df <- NROW(object$residual)-NROW(object$coefficients)
}
if (is.null(df)) df <- Inf
df
}
## ====================================================================
confint.regr <- function(fitted, ...)
{
if (!inherits(fitted, c("glm","nls"))) {
class(fitted) <- class(fitted)[-1]
return(confint(fitted, ...))
}
if (inherits(fitted, "glm")) {
## confint needs $coefficients from object (a vector) as well as
## from its sumary (a matrix containing 'Std. Error"
summary <- function(fitted)
list(coefficients = cbind(fitted$coefficients,
"Std. Error"=sqrt(diag(fitted$covariance))) )
class(fitted) <- class(fitted)[-1]
} else { ## workaround: call nls again, since profile.nls is difficult to adapt...
call <- fitted$call
call$start <- fitted$coefficients
call$nonlinear <- NULL
call[[1]] <- as.name("nls")
fitted <- eval(call, parent.frame())
}
confint(fitted, ...)
}
## ==========================================================================
drop1.regr <-
function (object, scope=NULL, scale = 0, test = NULL, k = 2,
sorted = FALSE, add=FALSE, ...)
{
## Purpose: drop1/add1 for regr objects
## ----------------------------------------------------------------------
lfam <- object$distrname
lres <- object$residuals
if (is.null(test)) test <- if (is.null(lfam)) "none" else {
if ((lfam=="gaussian"&&as.character(object$fitfun)%in%c("lm","roblm"))|
((lfam=="binomial"|lfam=="poisson")&&object$dispersion>1)) {
if (inherits(object,"mlm")) "Wilks" else "F" }
else "Chisq"
}
if (length(scope)==0) {
scope <- if (add) terms2order(object) else drop.scope(object)
} else {
##- if (!is.character(scope))
##- scope <- attr(terms(update.formula(object, scope)),
##- "term.labels")
if (is.character(scope))
as.formula(paste("~",paste(scope, collapse="+"))) else scope
}
if (length(scope)==0) { ## || !is.formula(scope) ## drop.scope is character
warning(":drop1/add1.regr: no valid scope")
ldr1 <- data.frame(Df = NA, "Sum of Sq" = NA, RSS =NA, AIC = NA,
row.names = "<none>")
return(ldr1)
}
## lform <- update(formula(object), scope !!!)
## !!! model.frame for finding the valid rows
class(object) <- setdiff(class(object), "regr")
fcall <- object$funcall
if (!is.null(fcall)) object$call <- fcall
##- dfm <- object$df
##- object$df <- dfm[setdiff(names(dfm),"residual")] ## survreg !
##- if (inherits(object, c("survreg","coxph")))
##- object$df <- object$df["original"]
##
dr1 <- if (add) { ## ------------ add
if (class(object)[1]=="lmrob")
stop("!add1.regr! 'add1' not (yet) available for 'lmrob' objects")
ldata <- eval(object$call$data, envir=environment(formula(object)) )
li <- row.names(ldata)%in%RNAMES(object$residuals)
if (length(ldata[li,])==0) stop("!drop1.regr! no data found ")
lvars <-unique(c(all.vars(formula(object)),
if (is.formula(scope)) all.vars(scope) else scope))
lvars <- lvars[lvars%in%names(ldata)]
linotna <- li & !apply(is.na(ldata[,lvars]),1,any)
lnobs <- sum(linotna)
lnrd <- sum(li)
lfc <- object$funcall ## the call to the effective R function
if (is.null(lfc)) lfc <- object$call
if (lnobs!= lnrd) {
warning(":add1.regr: refitting object to ",lnobs," / ",lnrd,
" observations due to missing values")
if(!is.null(lsubs <-
eval(lfc$subset, envir=environment(formula(object))))) {
lnsubs <- rep(FALSE,length(linotna))
lnsubs[lsubs] <- TRUE
linotna <- linotna &!lnsubs
}
lfc$subset <- linotna
object <- eval(lfc, envir=environment(formula(object)))
##- object$call[[1]] <-
##- if (is.null(lfc)) as.name(class(object)[1]) else
##- lfc[[1]]
##- object <- update(object, subset=linotna)
# environment(object$call$formula) <- environment()
##- class(object) <- setdiff(class(object), "regr")
}
if (!all(linotna)) { ## needed if NA's have been generated by transformations
lfc$subset <- linotna
object <- eval(lfc, envir=environment(formula(object)))
}
add1(object, scope=scope, scale=scale, test=test, k=k, ...)
} else { ## --------------------- drop
if (class(object)[1]%in%c("lmrob")) ## to be expanded
drop1Wald(object, test="F", ...) else {
ldata <- object$allvars # eval(object$call$data)
if (is.null(ldata)) stop("!drop1.regr! no data found ")
## all predictors must get the same missing observations
lina <- apply(is.na(ldata),1,any)
if (any(lina)) ldata[lina,] <- NA
object$call$data <- ldata
drop1(object, scope=scope, scale=scale, test=test, k=k, ...)
}
}
##- rnm <- row.names(dr1)
##- row.names(dr1) <- paste(ifelse(substring(rnm,1,1)=="<","",
##- if (add) "+ " else "- "),rnm,sep="")
attr(dr1,"drop") <- !add
##- if(add) attr(dr1,"ndropped") <- lndiff
if (sorted) {
lsrt <- notna(match(c("AIC","p.value"),colnames(dr1)))
if (length(lsrt)) dr1 <- dr1[order(dr1[, lsrt[1]]), ]
}
dr1
}
## ==========================================================================
add1.regr <-
function (object, scope=NULL, scale = 0, test = NULL, k = 2,
sorted = FALSE, ...)
{
## Purpose: add1 for regr objects
## ----------------------------------------------------------------------
if (!is.null(scope)) {
if (is.character(scope)) scope <- paste(scope,collapse="+")
if (is.formula(scope)) scope <- last(as.character(scope))
scope <- as.formula(paste("~ ",formula(object)[3],"+",scope))
}
drop1.regr(object, scope=scope, scale=scale, test=test, k=k,
sorted=sorted, add=TRUE, ...)
}
## ==========================================================================
drop1Wald <-
function (object, scope=NULL, scale = 0, test = c("none", "Chisq", "F"),
k = 2, ...)
{
x <- model.matrix(object)
offset <- model.offset(model.frame(object))
n <- nrow(x)
asgn <- attr(x, "assign")
lterms <- terms(object)
tl <- attr(lterms, "term.labels")
attr(lterms, "order") <- rep(1,length(tl))
if (is.null(scope))
scope <- tl # drop.scope(lterms)
else {
if (!is.character(scope))
scope <- attr(terms(update.formula(object, scope)),
"term.labels")
if (!all(match(scope, tl, 0L) > 0L))
stop("scope is not a subset of term labels")
}
ndrop <- match(scope, tl)
ns <- length(scope)
rdf <- object$df.residual
lsig <- c(object$sigma, object$scale)[1]
chisq <- lsig^2 * rdf
## sum(weighted.residuals(object)^2, na.rm = TRUE)
## deviance.lm(object)
dfs <- numeric(ns)
RSS <- numeric(ns)
cov <- object$cov.unscaled
if (is.null(cov)) cov <- object$covariance/lsig^2
if (length(cov)==0) stop("!drop1Wald! no covariance matrix found")
cf <- object$coefficients
##- jj <- match(names(cf),colnames(cov), nomatch=0)
##- if (!(any(jj==0)&&all(is.na(cf[jj==0]))))
jj <- match(colnames(cov),names(cf), nomatch=0)
if (any(jj==0))
warning(":drop1Wald: coefficient(s) and cov. matrix may not correspond")
coef <- cf[jj]
asgn <- asgn[jj]
if (any(names(coef[!is.na(coef)])%nin%names(coef)))
stop("!drop1Wald! coefficient(s) not appearing in covariance matrix")
##- y <- object$residuals + predict(object)
for (i in 1:ns) {
ii <- which(asgn==ndrop[i]) ## seq_along(asgn)[asgn == ndrop[i]]
RSS[i] <- if (length(ii)==1) coef[ii]^2/cov[ii,ii] else
coef[ii]%*%solve(cov[ii,ii])%*%coef[ii] ## !!! REPLACE THIS
dfs[i] <- length(ii)
##- if (all.cols)
##- jj <- setdiff(seq(ncol(x)), ii)
##- else jj <- setdiff(na.coef, ii)
##- z <- if (iswt)
##- lm.wfit(x[, jj, drop = FALSE], y, wt, offset = offset)
##- else lm.fit(x[, jj, drop = FALSE], y, offset = offset)
##- dfs[i] <- z$rank
##- oldClass(z) <- "lm"
##- RSS[i] <- deviance(z)
}
scope <- c("<none>", scope)
dfs <- c(c(object$rank,object$df)[1], dfs)
RSS <- chisq + c(0, RSS)
if (scale > 0)
AIC <- RSS/scale - n + k * dfs
else AIC <- n * log(RSS/n) + k * dfs
##- dfs <- dfs[1] - dfs
##- dfs[1] <- NA
aod <- data.frame(Df = dfs, "Sum of Sq" = c(NA, RSS[-1] -
RSS[1]), RSS = RSS, AIC = AIC, row.names = scope, check.names = FALSE)
if (scale > 0)
names(aod) <- c("Df", "Sum of Sq", "RSS", "Cp")
test <- match.arg(test)
if (test == "Chisq") {
dev <- aod$"Sum of Sq"
if (scale == 0) {
dev <- n * log(RSS/n)
dev <- dev - dev[1]
dev[1] <- NA
}
else dev <- dev/scale
df <- aod$Df
nas <- !is.na(df)
dev[nas] <- pchisq(dev[nas], df[nas], lower.tail = FALSE)
aod[, "Pr(Chi)"] <- dev
}
else if (test == "F") {
dev <- aod$"Sum of Sq"
dfs <- aod$Df
rdf <- object$df.residual
rms <- aod$RSS[1]/rdf
Fs <- (dev/dfs)/rms
Fs[dfs < 1e-04] <- NA
P <- Fs
nas <- !is.na(Fs)
P[nas] <- pf(Fs[nas], dfs[nas], rdf, lower.tail = FALSE)
aod[, c("F value", "Pr(F)")] <- list(Fs, P)
}
head <- c("Single term deletions (Wald test)", "\nModel:",
deparse(as.vector(formula(object))),
if (scale > 0) paste("\nscale: ", format(scale), "\n"))
class(aod) <- c("anova", "data.frame")
attr(aod, "heading") <- head
aod
}
## ==========================================================================
step <- function(object, ...)
UseMethod("step")
step.default <- stats::step
## step.default <- get("step", pos="package:stats")
#### !!! sollte das anders heissen? step.default <- stats::step ???
step.regr <- function (object, scope=NULL, expand=FALSE, scale = 0,
direction = c("both", "backward", "forward"), trace = FALSE, keep = NULL,
steps = 1000, k = 2, ...)
{
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 15 May 2012, 07:58
mydeviance <- function(x, ...) {
dev <- deviance(x)
if (!is.null(dev))
dev
else extractAIC(x, k = 0)[2L]
}
cut.string <- function(string) {
if (length(string) > 1L)
string[-1L] <- paste0("\n", string[-1L])
string
}
re.arrange <- function(keep) {
namr <- names(k1 <- keep[[1L]])
namc <- names(keep)
nc <- length(keep)
nr <- length(k1)
array(unlist(keep, recursive = FALSE), c(nr, nc), list(namr,
namc))
}
step.results <- function(models, fit, object, usingCp = FALSE) {
change <- sapply(models, "[[", "change")
rd <- sapply(models, "[[", "deviance")
if(is.matrix(rd)) rd <- rd[2,]
dd <- c(NA, abs(diff(rd)))
rdf <- sapply(models, "[[", "df.resid")
ddf <- c(NA, diff(rdf))
AIC <- sapply(models, "[[", "AIC")
heading <- c("Stepwise Model Path \nAnalysis of Deviance Table",
"\nInitial Model:", deparse(formula(object)), "\nFinal Model:",
deparse(formula(fit)), "\n")
aod <- data.frame(Step = I(change), Df = ddf, Deviance = dd,
`Resid. Df` = rdf, `Resid. Dev` = rd, AIC = AIC,
check.names = FALSE)
if (usingCp) {
cn <- colnames(aod)
cn[cn == "AIC"] <- "Cp"
colnames(aod) <- cn
}
attr(aod, "heading") <- heading
fit$anova <- aod
fit
}
## end step.results
Terms <- terms(object)
object$call$formula <- object$formula <- Terms
md <- missing(direction)
direction <- match.arg(direction)
backward <- direction == "both" | direction == "backward"
forward <- direction == "both" | direction == "forward"
if (is.null(scope)&expand) ## !! was missing(scope)
scope <- list(lower=formula(object), upper=terms2order(object)) ## !!
if (is.list(scope)) {
fdrop <- if (!is.null(fdrop <- scope$lower))
attr(terms(update.formula(object, fdrop)), "factors")
else numeric()
fadd <- if (!is.null(fadd <- scope$upper))
attr(terms(update.formula(object, fadd)), "factors")
}
else {
if (is.character(scope))
scope <- as.formula(paste("~.+",paste(scope, collapse="+")))
fadd <- if (!is.null(fadd <- scope))
attr(terms(update.formula(object, scope)), "factors")
fdrop <- numeric()
}
models <- vector("list", steps)
if (!is.null(keep))
keep.list <- vector("list", steps)
n <- nobs(object, use.fallback = TRUE)
fit <- object
bAIC <- extractAIC(fit, scale, k = k, ...)
edf <- bAIC[1L]
bAIC <- bAIC[2L]
if (is.na(bAIC))
stop("AIC is not defined for this model, so `step` cannot proceed")
nm <- 1
if (trace) {
cat("Start: AIC=", format(round(bAIC, 2)), "\n",
cut.string(deparse(formula(fit))), "\n\n", sep = "")
utils::flush.console()
}
models[[nm]] <-
list(deviance = mydeviance(fit), df.resid = n - edf, change = "",
AIC = bAIC)
if (!is.null(keep))
keep.list[[nm]] <- keep(fit, bAIC)
usingCp <- FALSE
## ------------------------
while (steps > 0) {
steps <- steps - 1
AIC <- bAIC
ffac <- attr(Terms, "factors")
scope <- factor.scope(ffac, list(add = fadd, drop = fdrop))
aod <- NULL
change <- NULL
## backward
if (backward && length(scope$drop)) {
aod <- drop1(fit, scope$drop, scale = scale, trace = trace,
k = k, sorted=FALSE, ...)
rn <- row.names(aod)
row.names(aod) <- c(rn[1L], paste("-", rn[-1L], sep = " "))
if (any(aod$Df == 0, na.rm = TRUE)) {
zdf <- aod$Df == 0 & !is.na(aod$Df)
change <- rev(rownames(aod)[zdf])[1L]
}
}
## forward
if (is.null(change)) {
if (forward && length(scope$add)) {
aodf <- add1(fit, scope$add, scale = scale, trace = trace,
k = k, ...)
rn <- row.names(aodf)
row.names(aodf) <- c(rn[1L], paste("+", rn[-1L], sep = " "))
aod <- if (is.null(aod)) aodf else {
names(aodf) <- names(aod)
rbind(aod, aodf[-1, , drop = FALSE])
}
}
}
## backward or forward
attr(aod, "heading") <- NULL
nzdf <- if (!is.null(aod$Df))
aod$Df != 0 | is.na(aod$Df)
aod <- aod[nzdf, ]
if (is.null(aod) || ncol(aod) == 0)
break
nc <- match(c("Cp", "AIC"), names(aod))
nc <- nc[!is.na(nc)][1L]
o <- order(aod[, nc])
if (trace)
print(aod[o, ])
if (o[1L] == 1) break
change <- rownames(aod)[o[1L]]
## update
usingCp <- match("Cp", names(aod), 0L) > 0L
## if (is.null(change)) break else {
fit <- update(fit, paste("~ .", change), evaluate = FALSE)
fit <- eval.parent(fit)
nnew <- nobs(fit, use.fallback = TRUE)
if (all(is.finite(c(n, nnew))) && nnew != n) {
warning(":step.regr: number of rows in use has changed: \n ",
nnew," observations instead of ", n)
n <- nnew
}
Terms <- terms(fit)
bAIC <- extractAIC(fit, scale, k = k, ...)
edf <- bAIC[1L]
bAIC <- bAIC[2L]
## output
if (trace) {
cat("\nStep: AIC=", format(round(bAIC, 2)), "\n",
cut.string(deparse(formula(fit))), "\n\n", sep = "")
utils::flush.console()
}
## if (bAIC >= AIC + 1e-07)
## else break
nm <- nm + 1
models[[nm]] <- list(deviance = mydeviance(fit), df.resid = n - edf,
change = change, AIC = bAIC)
if (!is.null(keep))
keep.list[[nm]] <- keep(fit, bAIC)
}
if (!is.null(keep))
fit$keep <- re.arrange(keep.list[seq(nm)])
step.results(models = models[seq(nm)], fit, object, usingCp)
}
## ==========================================================================
terms2order <- function(object, squared = TRUE, interactions = TRUE)
{
## Purpose:
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 15 Oct 2009, 16:01
ltadd <- NULL
if (squared) {
# lvrs <- all.vars(formula(object)[-2])
if (!is.list(object)||length(lterms <- object$terms)==0)
stop("!terms2order! first argument has no 'terms' component")
ltl <- attr(lterms, "term.labels")
lts <- c(grep("\\^",ltl),grep(":",ltl))
if (length(lts)) ltl <- ltl[-lts]
ltn <- ltl[attr(object$terms,"dataClasses")[ltl]=="numeric"]
if (length(ltn)) ltadd <- paste(paste("I(", ltn,"^2)",sep=""), collapse="+")
}
if (interactions) {
ltint <- "(.)^2"
if (is.null(ltadd)) ltadd <- ltint else ltadd <- paste(ltadd, ltint, sep=" + ")
}
if (is.null(ltadd)) {
warning(":terms2order: nothing to add")
return(formula(object))
}
ltadd <- paste("~.+", ltadd)
##- attr(terms(update.formula(object, ltadd)), "term.labels")
update.formula(object, ltadd)
}
## ==========================================================================
fitted.regr <-
function (object, type=NULL, ...)
{
if (is.null(type)&&pmatch("fitted",names(object),nomatch=0))
return( naresid(object$na.action, object$fitted) )
lres <- object$residuals
if (inherits(lres, "condquant"))
structure(lres[,"fit"], names=row.names(lres)) else {
class(object) <- setdiff(class(object), "regr")
predict(object, type=type, ...)
}
}
## ==========================================================================
predict.regr <-
function (object, newdata = NULL, scale = object$sigma,
df=object$df.residual, type = NULL, ...)
## bug: if used with NULL newdata, predictions will be produced
## for obs in model.matrix, which excludes those with missing values
{
##- lglm <- inherits(object,"glm")
##- lmeth <- object$call$method
##- lnls <- length(lmeth)>0 && lmeth=="nls"
if (length(type)==0)
type <- if (inherits(object,"glm")) "link" else
if (inherits(object, "polr")) "link" else "response"
## !!!
if (object$fitfun=="rlm")
if (!is.matrix(object[["x"]]))
object$x <- model.matrix(formula(object), data=object$allvars) ## !!! was $model
##- if (length(scale)==0) scale <- c(object$sigma,1)[1]
class(object) <- class(object)[-1]
##- if (missing(newdata) || length(newdata)==0) {
##- lpred <- if (lglm)
##- predict.glm(object, type=type, se.fit=se.fit,
##- dispersion=lscale^2, terms=terms, na.action = na.action )
##- else {
##- if (lnls) predict.nls(object, type=type, se.fit=se.fit,
##- na.action = na.action ) else
##- predict.lm(object, type=type, se.fit=se.fit,
##- terms=terms, na.action = na.action )
##- }
##- lpred <- predict(object, type=type, se.fit=se.fit,
##- dispersion=lscale^2, terms=terms, na.action = na.action )
##- } else {
ldt <- newdata
if (is.null(ldt)) return(
predict(object, type=type, scale=object$sigma,
dispersion=object$dispersion^2, ... ) )
## analyze variables
## if (!is.null(ldt)) {
## terms with logst -> need original thresholds
ltl <- attr(terms(object),"term.labels")
ltll <- grep("logst\\(",ltl)
lvlogst <- NULL
if (length(ltll)) {
lvlogst <- unique( gsub(".*logst\\((.*)\\).*","\\1", ltl[ltll]) )
lmodel <- model.frame(object)
lform <- as.character(as.expression(formula(object)))
}
for (lvn in names(ldt)) {
lv <- ldt[[lvn]]
## factors
if (is.factor(lv)) ldt[[lvn]] <-
if (match(lvn,names(object$binlevels),nomatch=0)>0) ## binary
match(lv,object$binlevels[[lvn]])-1
else factor(lv)
## logst
if (lvn %in% lvlogst) {
lt <- ltl[grep(lvn, ltl)[1]]
lvv <- lmodel[[lt]]
lth <- attr(lvv, "threshold")
if (is.null(lth))
stop("!predict.regr! variable in term ",lt,
" not found in model.frame or threshold not available. \n",
" Prediction with 'logst' would fail.",
" Store transformed variable in data.frame")
lth <- round(lth,5) ## get it from there,
## since it may have been set by the call to regr
lform <- gsub(paste("logst *\\(",lvn,"\\)",sep=" *"),
paste("logst(",lvn,", threshold=",lth,")",sep=""), lform)
}
}
if (length(lvlogst))
object$terms <- terms(as.formula(lform), data=object$data)
predict(object, newdata=ldt, type=type, scale=object$sigma,
df=df, dispersion=object$dispersion^2, ... )
}
## ==========================================================================
##- extractAIC.regr <- function (fit, scale = 0, k = 2, ...)
##- { ##- # AIC, divided by n to allow for comparing models with different n
##- ##- lres <- fit$residuals
##- ##- if (is.null(lres)) {
##- ##- lfit <- fit$fitted
##- ##- fit$fitted.values <- notna(lfit)
##- ##- } else fit$residuals <- notna(lres)
##- class(fit) <- setdiff(class(fit),"regr")
##- extractAIC(fit, scale = scale, k = k, ...)
##- }
## ==========================================================================
vif.regr <- function(mod, cov, mmat)
{
## Purpose: vif.lm of library car
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: modified by Werner Stahel, Date: 11 Mar 2005, 09:18
##- v <- mod$sigma^2* mod$cov.unscaled
##- cls <- dimnames(model.matrix(mod))[[2]]%in%dimnames(v)[[2]]
##- # needed for singular cases
##- assign <- attributes(model.matrix(mod))$assign[cls]
cls <- dimnames(mmat)[[2]]%in%dimnames(cov)[[2]]
##- # needed for singular cases
assign <- attr(mmat,"assign")[cls]
terms <- labels(terms(mod))
n.terms <- length(terms)
if (n.terms < 2) {
##- stop("model contains fewer than 2 terms")
return(matrix(1,1,3))
}
if (length(cov)==0) { # ||n.terms!=nrow(cov)|nrow(cov)!=ncol(cov)
warning(":vif.regr: mod$cov.unscaled is inappropriate. no vifs")
return(matrix(NA,n.terms,3))
}
if (names(coefficients(mod)[1]) == "(Intercept)") {
cov <- cov[-1, -1]
assign <- assign[-1]
}
else if (mod$fitfun%nin%c("polr","coxph","survreg"))
warning("No intercept: vifs may not be sensible.")
sd <- 1/sqrt(diag(cov))
if (any(!is.finite(sd))) {
warning(":vif.regr: zero variances of estimates. no R2x")
return(NULL)
}
R <- cov/outer(sd,sd)
result <- matrix(0, n.terms, 3)
rownames(result) <- terms
colnames(result) <- c("GVIF", "Df", "GVIF^(1/2Df)")
for (term in 1:n.terms) {
subs <- which(assign == term)
result[term, 1] <- det(as.matrix(R[subs, subs])) *
det(as.matrix(R[-subs,-subs]))/det(R)
result[term, 2] <- length(subs)
}
result[, 3] <- result[, 1]^(1/(2 * result[, 2]))
result
}
## =================================================================
drop1.multinom <-
function (object, scope, test = c("Chisq","none"), ...)
{
if (!inherits(object, "multinom"))
stop("Not a multinom fit")
if (missing(scope))
scope <- drop.scope(object)
else {
if (!is.character(scope))
scope <- attr(terms(update.formula(object, scope)),
"term.labels")
if (!all(match(scope, attr(object$terms, "term.labels"),
nomatch = FALSE)))
stop("scope is not a subset of term labels")
}
ns <- length(scope)
ans <- matrix(nrow = ns + 1, ncol = 2, dimnames = list(c("<none>",
scope), c("Df", "AIC")))
ans[1, ] <- c(object$edf, object$AIC)
if (test[1]=="Chisq") ans <- cbind(ans,Chisq=NA, p.value=NA)
env <- environment(formula(object))
for(i in seq(ns)) {
tt <- scope[i]
## cat("trying -", tt, "\n")
nfit <- update(object, as.formula(paste("~ . -", tt)),
evaluate = FALSE)
nfit <- eval(nfit, envir=env) # was eval.parent(nfit)
##- nobject <- update(object, paste("~ . -", tt))
if (nfit$edf == object$edf)
nfit$AIC <- NA
ans[i+1, ] <- c(nfit$edf, nfit$AIC,
if (test[1]=="Chisq") unlist(anova(object,nfit)[2,6:7]))
}
as.data.frame(ans)
}
## =================================================================
drop1.mlm <- function (object, scope = NULL,
test = c("Wilks", "Pillai", "Hotelling-Lawley", "Roy"),
total=TRUE, add=FALSE, ...)
{
## add=TRUE do add instead of drop1
Pillai <- function(eig, q, df.res) {
test <- sum(eig/(1 + eig))
p <- length(eig)
s <- min(p, q)
n <- 0.5 * (df.res - p - 1)
m <- 0.5 * (abs(p - q) - 1)
tmp1 <- 2 * m + s + 1
tmp2 <- 2 * n + s + 1
c(test, (tmp2/tmp1 * test)/(s - test), s * tmp1, s *
tmp2)
}
Wilks <- function(eig, q, df.res) {
test <- prod(1/(1 + eig))
p <- length(eig)
tmp1 <- df.res - 0.5 * (p - q + 1)
tmp2 <- (p * q - 2)/4
tmp3 <- p^2 + q^2 - 5
tmp3 <- if (tmp3 > 0)
sqrt(((p * q)^2 - 4)/tmp3)
else 1
c(test, ((test^(-1/tmp3) - 1) * (tmp1 * tmp3 - 2 * tmp2))/p/q,
p * q, tmp1 * tmp3 - 2 * tmp2)
}
HL <- function(eig, q, df.res) {
test <- sum(eig)
p <- length(eig)
m <- 0.5 * (abs(p - q) - 1)
n <- 0.5 * (df.res - p - 1)
s <- min(p, q)
tmp1 <- 2 * m + s + 1
tmp2 <- 2 * (s * n + 1)
c(test, (tmp2 * test)/s/s/tmp1, s * tmp1, tmp2)
}
Roy <- function(eig, q, df.res) {
p <- length(eig)
test <- max(eig)
tmp1 <- max(p, q)
tmp2 <- df.res - tmp1 + q
c(test, (tmp2 * test)/tmp1, tmp1, tmp2)
}
##- if (!is.null(object$drop1)) return (object$drop1)
if (!(inherits(object, "maov") || inherits(object, "mlm")))
stop("object must be of class \"maov\" or \"mlm\"")
test <- match.arg(test)
asgn <- object$assign[object$qr$pivot[1:object$rank]]
tl <- attr(object$terms, "term.labels")
## scope
if (is.null(scope))
scope <- if (add) attr(terms(update.formula(object, ~(.)^2)),
"term.labels") else
drop.scope(object)
else {
if (!is.character(scope))
scope <- attr(terms(update.formula(object, scope)),
"term.labels")
##- if (!all(match(scope, tl, FALSE)))
if (!(add||all(match(scope, tl, FALSE))))
stop("!drop1.mlm! scope is not a subset of term labels")
}
ns <- length(scope)
rdf <- object$df.residual
res <- resid(object)
## ::: needed for finding the data later
ldata <- eval(object$call$data,
envir=environment(formula(object)))
ladd <- 1
if (add) {
ladd <- -1
lna <- i.add1na(object, scope)
if (!is.null(lna)) res[lna,1] <- NA
}
res <- nainf.exclude(res)
lna <- attr(res,"na.action")
if (!is.null(lna)) ldata <- ldata[-lna,]
## full model
rss <- crossprod(as.matrix(res))
rss.qr <- qr(rss)
if (rss.qr$rank < NCOL(res))
stop(paste("!drop1.mlm! residuals have rank", rss.qr$rank, "<",
ncol(res)))
stats <- matrix(NA,length(scope),4)
dimnames(stats) <- list(scope,c(test,"F.stat","dfnum","dfden"))
tstfn <- switch(test, Pillai = Pillai, Wilks = Wilks,
"Hotelling-Lawley" = HL, HL = HL, Roy = Roy)
object$call[[1]] <- as.name("lm")
## loop through scope
for (lsc in scope) {
lfo <- as.formula(paste(if (add) "~.+" else "~.-",lsc))
lrg <- update(object, lfo, data=ldata, model=FALSE) # ,data=data
dfj <- ladd * (lrg$df.residual - rdf)
bss <- ladd * (crossprod(resid(lrg))-rss)
eigs <- Re(eigen(qr.coef(rss.qr, bss),symmetric = FALSE)$values)
stats[lsc,] <- tstfn(eigs, dfj, rdf)
}
ldf <- stats[1,3:4]
names(ldf) <- c("numerator","denominator")
if (total) {
lpr <- predict(object)
if (length(lna)) lpr <- lpr[-lna,] # drop rows with NA
yy <- scale(res + lpr, scale=FALSE)
bss <- crossprod(yy)-rss
eigs <- Re(eigen(qr.coef(rss.qr, bss), symmetric = FALSE)$values)
stats <- rbind(stats, "<total>"= tstfn(eigs, object$df[1], rdf))
}
data.frame(stats,
p.value = pf(stats[,2],stats[,3],stats[,4], lower.tail = FALSE))
## attr(stats,"df") <- ldf
## stats
} ## {drop1.mlm}
## ==========================================================================
add1.mlm <-
function (object, scope=NULL, test = c("Wilks", "Pillai", "Hotelling-Lawley", "Roy"), ...)
{
## Purpose: add1 for regr objects
## ----------------------------------------------------------------------
drop1.mlm(object, scope=scope, test=test, total=FALSE, add=TRUE, ...)
}
## ===========================================================================
i.add1na <- function (object, scope)
{
## determine rows with NA`s in model.frame for expanded model
Terms <-
terms(update.formula(object, paste("~.+",paste(scope, collapse = "+"))))
fc <- object$call
fc$formula <- Terms
fob <- list(call = fc, terms = Terms)
class(fob) <- oldClass(object)
m <- model.frame(fob, xlev = object$xlevels)
r <- cbind(resid(object))
if (nrow(r)!=nrow(m)) {
warning(gettextf("!add1! using the %d/%d rows from a combined fit",
nrow(m), nrow(r)), domain = NA)
lna <- !row.names(r)%in%row.names(m)
}
else lan <- NULL
}
## ==========================================================================
## currently only called from print.regr():
print.mregr <- function(x, na.print=getUserOption("na.print"), ...)
{
## Purpose: collect results for mregr object
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: Feb 2008
f.prv <- function(x) paste(paste(names(x),x,sep=" = "),collapse=", ")
if (is.null(na.print)) na.print <- "."
cat("\nCall:\n")
cat(paste(deparse(x$call), sep = "\n", collapse = "\n"),"\n", sep = "")
cat("\nCoefficients:\n")
print(x$coefficients, na.print=na.print)
cat("\nP-values:\n")
print(round(x$pvalues,4), na.print=na.print)
cat("\nStatistics for individual response models:\n")
print(x$stats)
cat("\nResidual degrees of freedom: ",x$df,"\n")
ldr <- x$drop1
if (!is.null(ldr)) {
cat("\nMultivariate tests for all responses\n Degrees of freedom: ",
f.prv(attr(ldr,"df")),"\n")
print(ldr[,], na.print=na.print)
}
invisible(x)
}
## ===================================================
fitted.polr <- function(object, type="link", na.action=object, ...) {
if (pmatch(type,"link",nomatch=0)) {
lfit <- object$linear.predictor
if (length(lfit)==0) { # if called by original polr
Terms <- delete.response(object$terms)
environment(Terms) <- environment() ## ! WSt
## from predict.polr
m <- object$model
if (length(m)==0)
m <- model.frame(Terms, object$data, na.action = function(x) x,
xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
X <- model.matrix(Terms, m, contrasts = object$contrasts)
xint <- match("(Intercept)", colnames(X), nomatch = 0)
if (xint > 0) X <- X[, -xint, drop = FALSE]
lfit <- drop(X %*% object$coefficients)
}
} else
lfit <- object$fitted
if (type=="class")
lfit <- factor(max.col(lfit), levels = seq_along(object$lev),
labels = object$lev)
naresid(object$na.action,lfit)
}
## ===================================================
predict.polr <-
function (object, newdata=NULL,
type = c("class", "probs", "link"), ...)
## type link added by WSt, newdata=NULL
{
if (!inherits(object, "polr"))
stop("not a \"polr\" object")
type <- match.arg(type)
if (length(newdata)==0) {
if (type=="link")
eta <- fitted(object, type="link", na.action=NULL)
Y <- object$fitted
na.action <- object$na.action
}
else {
na.action <- NULL
newdata <- as.data.frame(newdata)
Terms <- delete.response(object$terms)
attr(Terms, "intercept") <- 1
environment(Terms) <- environment() ## ! WSt
m <- model.frame(Terms, newdata, na.action = function(x) x,
xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
X <- model.matrix(Terms, m, contrasts = object$contrasts)
## changed by WSt
coef <- coefficients(object)
eta <- drop(X[,-1] %*% coef) ## without the intercept
n <- nrow(X)
q <- length(object$zeta)
## pgumbel <- function(q) exp(pweibull(log(q))) # ???
pfun <- switch(object$method, logistic = plogis, probit = pnorm,
cloglog = prevgumbel, cauchit = pcauchy)
cumpr <- matrix(pfun(matrix(object$zeta, n, q, byrow = TRUE) - eta), , q)
Y <- t(apply(cumpr, 1, function(x) diff(c(0, x, 1))))
dimnames(Y) <- list(rownames(X), object$lev)
}
##- if (newdata) && !is.null(object$na.action))
##- Y <- napredict(object$na.action, Y)
switch(type, class = {
Y <- factor(max.col(Y), levels = seq_along(object$lev),
labels = object$lev)
}, probs = {
}, link = { Y <- eta })
Y <- napredict(na.action,Y)
drop(Y)
}
## ===================================================================
condquant <- function(x, dist="normal", sig=1, randomrange=0.9)
{
## Purpose: conditional quantiles and random numbers
## works only for centered scale families
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 24 Oct 2008, 09:30
##- fp <- switch(dist, normal="pnorm(x,0,sig)", gaussian="pnorm(x,0,sig)",
##- logis="plogis(x,0,sig)", revgumbel="prevgumbel(x,0,sig)",
##- # lognormal="pnorm(log(x),0,sig)"
##- ## x is log regsidual ?
##- )
if (length(x)==0) stop("!condquant! bug: no data")
fp <- switch(dist, normal=pnorm, gaussian=pnorm, unif=function (x) x,
logis=plogis, logistic=plogis, revgumbel=prevgumbel)
if (is.null(fp)) stop(paste("!condquant! distribution ", dist, " not known"))
fq <- switch(dist, normal=qnorm, gaussian=qnorm, unif=function (x) x,
logis=qlogis, logistic=qlogis, revgumbel=qrevgumbel)
## if (NCOL(x)>=2) stop("!condquant! x must have 2 columns")
x <- na.exclude(rbind(x))
lx <- t(apply(x, 1,sort))
lp <- fp(lx/sig)
lpp <- rbind(rbind(lp)%*%rbind(c(0.5,0.75,0.25),c(0.5,0.25,0.75)))
lprand <- lp[,1]+(lp[,2]-lp[,1])*
runif(nrow(lp),(1-randomrange)/2,(1+randomrange)/2)
## <<<<<<< .mine
rr <- cbind(cbind(median=fq(lpp[,1]),lowq=fq(lpp[,2]),
uppq=fq(lpp[,3]), random=fq(lprand))*sig,
prob=lp[,2]-lp[,1])
if (any(lp0 <- lp[,2]<=0)) rr[lp0,1:4] <- matrix(lx[lp0,2],sum(lp0),4)
if (any(lp1 <- lp[,1]>=1)) rr[lp1,1:4] <- matrix(lx[lp1,1],sum(lp1),4)
rr <- naresid(attr(x,"na.action"), rr)
## dimnames(rr)[[1]] <- row.names(x)
class(rr) <- c("condquant", "matrix")
rr
##- ======= !!!???!!!
##- structure(cbind(sig*cbind(median= fq(lpp[,1]), lowq = fq(lpp[,2]),
##- uppq = fq(lpp[,3]), random= fq(lprand)),
##- prob=abs(lp[,2]-lp[,1])),
##- class = c("condquant", "matrix")) # "matrix", e.g., for head()
## >>>>>>> .r32
}
## ===================================================================
residuals.regr <- function(object, type=NULL, ...)
{
##- if (!is.na(pmatch(type,"condquant"))) {
##- ## this seems to apply only if residuals.regr is called explicitly
lcall <- match.call()
lcall$type <- if (is.null(type)||is.na(type)) NULL else type
lff <- object$fitfun
if (lff=="glm" && !is.null(type) && substr(type,1,4)=="cond") lff <- "polr"
lcall[[1]] <-
switch(as.character(lff),
"polr" = quote(regr0:::residuals.polr),
"survreg" = quote(regr0:::residuals.regrsurv),
"coxph" = quote(regr0:::residuals.regrcoxph),
quote(residuals))
##- lres <-
##- if (object$fitfun%in%c("polr","glm"))
##- residuals.polr(object, type=type, na.action=na.action, ...) else {
##- if (object$fitfun=="coxph")
##- residuals.regrcoxph(object, type=type, na.action=na.action, ...) else
##- residuals.regrsurv(object, type=type, na.action=na.action, ...)
##- }
##- ##- return(lres)
##- ##- }
##- if (is.null(type)) return( naresid(na.action$na.action, object$residuals) )
class(object) <- setdiff(class(object), "regr")
lcall$object <- object
structure( eval(lcall, envir=parent.frame()), type=type)
}
## ===================================================================
residuals.regrcoxph <- function(object, type=NULL, na.action=object, ...)
{
## Purpose: conditional quantiles and random numbers for censored obs
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: Aug 2010
if(u.nuna(type)) type <- "CoxSnellMod"
if (type!="CoxSnellMod") {
object$residuals <- object$resid.orig
return(structure( survival:::residuals.coxph(object, type=type, ...),
type=type) )
}
lres <- object$residuals
if (inherits(lres, "condquant")) return(lres)
ly <- object$y
lst <- ly[,2] # status
li <- lst!=1
## martingale --> coxsnell
lres <- lst - lres
lrs <- qnorm(exp(-lres)) # --> normalscores
## fill matrix with values adequate for non-censored obs
lrr <- matrix(lrs,length(lrs),5)
dimnames(lrr) <- list(row.names(ly),c("median","lowq","uppq","random","prob"))
lrr[,"prob"] <- 0
## censoring
if (any(li)) {
llim <- cbind(lrs[li],Inf)
lr <- condquant(llim, "normal")
lrr[li,] <- lr
}
lrr <- cbind(lrr, fit=object$linear.predictor)
## class(lrr) <- "condquant"
structure( naresid(na.action$na.action, lrr),
class=c("condquant", "matrix"), type=type)
}
## ===================================================================
residuals.regrsurv <- function(object, type=NULL, na.action=object, ...)
{
## Purpose: conditional quantiles and random numbers for censored obs
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 24 Oct 2008, 10:16
if(u.nuna(type)) type <- "condquant"
if (type!="condquant")
return(structure( survival:::residuals.survreg(object, type, ...),
type=type) )
ly <- object$y ## log for weibull
lsig <- object$sigma
if (length(lsig)==0) lsig <- summary(object)$scale
if (length(lsig)==0) {
warning("!residuals.regrsurv! no sigma found. Setting =1")
lsig <- 1
}
lfit <- object$linear.predictors
## lres <- ly[,1]-lfit
lres <- ly[,1]-lfit
ldist <- if (length(object$dist)>0) object$dist else "normal"
li <- match(ldist, c("weibull","lognormal","loglogistic"))
if (!is.na(li)) ldist <- c("revgumbel","normal","logistic")[li]
## for user-defined survreg.distributions with transformation,
## this is not enough.
## fill matrix with values adequate for non-censored obs
lrr <- matrix(lres,length(lres),5)
dimnames(lrr) <- list(row.names(ly),c("median","lowq","uppq","random","prob"))
lrr[,"prob"] <- 0
## censoring
lst <- ly[,2] # status
## ltt <- attr(object$response[[1]], "type")
ltt <- attr(object$response, "type")
##- if (length(ltt)>0&<t=="left") lst[lst==0] <- 2
ltl <- length(ltt)>0&<t=="left"
li <- lst!=1
if (any(li)) {
##- llim <- cbind(lres[li],c(Inf,NA,-Inf)[lst[li]+1]) #
llim <- if(ltl) cbind(-Inf,lres[li]) else cbind(lres[li],Inf)
lr <- condquant(llim, ldist, lsig)
lrr[li,] <- lr
}
lrr <- cbind(lrr, fit=lfit)
## class(lrr) <- "condquant"
structure( naresid(na.action$na.action, lrr),
class=c("condquant", "matrix"), type=type)
}
## ==============================================================
nobs.survreg <- function(object, use.fallback = TRUE) {
lnobs <- length(object$linear.predictors)
if (lnobs==0) lnobs <- NROW(residuals(object))
lnobs
}
nobs.coxph <- function(object, use.fallback = TRUE) {
object$n
}
## ===================================================================
residuals.polr <- function(object, ...) ## na.action=object,
{
## Purpose: residuals for cumulative logit regression
## ----------------------------------------------------------------------
## Arguments:
## object result of polr
## Value: list with components
## median "conditional median" residual = median of conditional distr.
## of latent response variable, given observed response
## will probably be replaced by mean in the near future
## in order to allow for adequate smoothing
## lowq, uppq lower ans upper quartiles of this cond. distribution
## Remark: experimental function !!!
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 2 Oct 2007, 11:31
if (!((lpolr <- inherits(object, "polr"))|
(lbin <- inherits(object, "glm")&&object$family=="binomial")))
stop ("!residuals.polr! unsuitable first argument")
lyr <- attr(terms(object), "response")
ldi <- max(match("model", names(object), nomatch=0))
if (ldi) ldt <- object[[ldi]] else {
ldt <- if (u.debug()) model.frame(object) else
try(model.frame(object),silent=TRUE)
if (class(ldt)=="try-error")
stop ("!residuals.polr! no data found")
}
ly <- ldt[,lyr]
if (length(ly)==0) stop ("!residuals.polr! bug: no response values found")
if (length(dim(ly))) {
warning(":residuals.polr: returning simple deviance residuals for non-binary (grouped) data")
return(residuals(object, type="deviance"))
}
#if (lpolr)
ly <- as.numeric(ordered(ly))
ly <- naresid(object$na.action, ly)
lfit <- fitted(object, type="link")
lthres <- c(-100, if (lpolr) object$zeta else 0, 100)
llim <- structure(cbind(lthres[ly],lthres[ly+1])-lfit,
dimnames = list(names(lfit), c("low","high")) )
lr <- cbind(condquant(llim,"logis"),fit=lfit,y=ly)
##- structure( naresid(object$na.action, lr), class=c("condquant", "matrix"))
structure(lr, class=c("condquant", "matrix"))
}
## ===========================================================================
linear.predictors <- function(object) {
llp <- object$linear.predictors
if (is.null(llp)) llp <- object$fitted.values
if (is.null(llp))
stop("linear.predictors! no component linear predictor")
naresid(object$na.action, llp)
}
linpred <- linear.predictors
## ===========================================================================
fitcomp <- function(object, data=NULL, vars=NULL, se=FALSE,
xm=NULL, xfromdata=FALSE, noexpand=NULL, nxcomp=51)
{
## Purpose: components of a fit
## ----------------------------------------------------------------------
## !!! make nxcomp >= maximal number of factor levels !!!
## !!! why vars??? can possibly be much simpler!!!
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 6 Jan 2004, 22:24
##- lf.predict <-
##- function(object, newdata, type="linear.predictor", se.fit = FALSE) {
##- if (type=="linear.predictor") {
##- lmod <- model.matrix(formula(object)[-2], newdata)
##- if (inherits(object,"polr")) lmod <- lmod[,colnames(lmod)!="(Intercept)"]
##- rr <- lmod%*%object$coefficients[colnames(lmod)]
##- } else rr <- predict(object, newdata, type=type, se.fit=se.fit)
##- rr
##- }
if (length(data)==0) {
data <- object$allvars
if (length(data)==0)
data <- eval(object$call$data)
}
lnls <- c(eval(object$call$nonlinear),FALSE)[1]
lform <- formula(object)[-2]
lvars <- all.vars(lform)
if (lnls)
lvars <- lvars[match(lvars,names(coefficients(object)),nomatch=0)==0]
lvmiss <- setdiff(lvars,names(data))
if (length(lvmiss))
stop(paste("!fitcomp! variable(s)", paste(lvmiss,collapse=", "),
"not in data"))
if (length(lvars)==0)
stop("!fitcomp! no variables selected")
ldata <- data[,lvars,drop=FALSE]
for (lj in 1:length(ldata))
if (is.character(ldata[[lj]])|is.factor(ldata[[lj]]))
ldata[,lj] <- factor(ldata[,lj])
lformfac <- NULL
if (!lnls) {
if (length(c(grep("factor *\\(", format(lform)),
grep("ordered *\\(", format(lform))))) {
warning(
":fitcomp: Using 'factor(...)' or 'ordered(...)' in the formula ",
"is hazardous for fitcomp.\n",
" : I try to continue.")
lformfac <- i.findformfac(lform)
## return(structure(list(comp=NULL), class="try-error") )
}
}
## generate means xm if needed
if (length(xm)>0) {
if ((!is.data.frame(xm))||any(names(xm)!=names(ldata))) {
warning(":fitcomp: arg. xm not suitable -> not used")
xm <- NULL } else xm <- xm[1,]
}
## median point and prediction for it
if (length(xm)==0) {
xm <- ldata[1,,drop=FALSE]
for (lj in 1:length(ldata)) {
lv <- ldata[,lj]
if (is.character(lv)) lv <- factor(lv)
lnhalf <- ceiling(sum(!is.na(lv))/2)
xm[1,lj] <-
if (is.factor(lv)) {
levels(lv)[
if (is.ordered(lv)) sort(as.numeric(lv))[lnhalf] else
which.max(table(as.numeric(lv)))
]
} else ## median(as.numeric(lv),na.rm=TRUE)
sort(lv)[lnhalf]
## median should be attained in >=1 cases
}
}
if (is.null(attr(terms(object), "predvars"))) { # from model.frame
lterms <- attr(lm(formula(object), data=data, method="model.frame"),"terms")
attr(object$terms,"predvars") <- attr(lterms,"predvars")
}
ltype <- if (inherits(object,"coxph")) "lp" else NULL
lprm <- c(predict(object, newdata=xm, type=ltype)) # lf.
lny <- length(lprm)
## expand to matrix
if (xfromdata) {
lx <- ldata
} else {
lnxj <- sapply(ldata,
function(x) if (is.factor(x)) length(levels(x)) else 0)
if(!is.null(noexpand) && is.numeric(noexpand))
noexpand <- names(noexpand)[noexpand>0]
noexpand <- c(noexpand, lformfac) ##
lvconv <- names(ldata) %in% noexpand
names(lvconv) <- names(ldata)
if (any(lvconv)) lnxj[lvconv] <-
sapply(ldata[lvconv], function(x) length(unique(x)) )
lnxc <- max(nxcomp, lnxj)
lx <- ldata[1,,drop=FALSE][1:lnxc,,drop=FALSE]
row.names(lx) <- 1:lnxc
}
## lxm: data.frame of suitable dimension filled with "median"
lxm <- lx
for (lv in names(lxm)) lxm[,lv] <- xm[,lv]
##
lvcomp <- names(ldata)
if (!is.null(vars)) lvcomp <- intersect(lvcomp, vars)
if (is.null(lvcomp)) {
warning(":fitcomp: no variables found. Result is NULL")
return(NULL)
}
## components
lcomp <- array(dim=c(nrow(lx), length(lvcomp), lny))
dimnames(lcomp) <- list(dimnames(lx)[[1]], lvcomp, names(lprm))
lcse <- if (se) lcomp else NULL
for (lv in lvcomp) {
if (xfromdata) {
ld <- lxm
ld[,lv] <- ldata[,lv]
lfc <- sapply(ld,is.factor) # eliminate extra levels of factors
if (any(lfc)) ld[lfc] <- lapply(ld[lfc], factor)
} else { # +++
ldv <- ldata[,lv]
if (lnxj[lv]) { # factor levels
ldx <- if (lvconv[lv]) sort(unique(ldv)) else factor(levels(ldv))
lnl <- length(ldx)
ld <- lxm[1:lnl,,drop=FALSE]
ld[,lv] <- ldx
##- lx[,lv] <- factor(c(1:lnl,rep(NA,lnxc-lnl)),labels=levels(ldv))
lx[,lv] <- c(ldx,rep(NA,lnxc-lnl))
##
lpr <- try( predict(object, newdata=ld, se.fit = se),
silent=TRUE)
if (class(lpr)=="try-error") {
warning(":fitcomp: no fitcomp for variable ", lv)
## predict finds new levels of formfac variables
next
}
if (se) {
lc <- lpr$fit
lcse[1:lnl,lv,] <- lpr$se.fit
} else lc <- lpr
lcomp[1:lnl,lv,] <- lc
next # end for loop
} else { # continuous var
ld <- lxm
lx[,lv] <- ld[,lv] <-
seq(min(ldv,na.rm=TRUE),max(ldv,na.rm=TRUE),length=lnxc)
} # ---
} # +++
## continuous variable or xfromdata
lpr <- predict(object, newdata=ld, se = se) # lf.
if (se) {
lcomp[,lv,] <- lpr$fit
lcse[,lv,] <- lpr$se.fit
} else lcomp[,lv,] <- lpr
}
if (lny==1) {
dim(lcomp) <- dim(lcomp)[1:2]
dimnames(lcomp) <- dimnames(lx[,lvcomp,drop=FALSE])
lcomp <- lcomp-lprm
if (se) {
dim(lcse) <- dim(lcse)[1:2]
dimnames(lcse) <- dimnames(lx)
}
} else lcomp <- sweep(lcomp,3,lprm)
list(comp=lcomp, x=lx[,lvars,drop=FALSE], xm=xm[,lvars,drop=FALSE], se=lcse)
}
## ==========================================================================
i.findformfac <- function(formula) {
## find variable involved in explicit factor terms in formula
lfo <- format(formula)
lmf <- c(gregexpr("(factor *\\([^)]*\\))", lfo),
gregexpr("(ordered *\\([^)]*\\))", lfo) )
lf <- function(x)
if(x[1]!=-1) substring(lfo, x, x+attr(x,"match.length"))
all.vars(as.formula(
paste("~",paste(unlist(lapply(lmf, lf)), collapse="+"))))
}
## ==========================================================================
predict.mlm <-
function (object, newdata=NULL, se.fit = FALSE, scale = NULL, df = Inf,
interval = c("none", "confidence", "prediction"), level = 0.95,
type = c("response", "terms"), terms = NULL, na.action = na.pass,
##- pred.var = res.var/wgts, wgts = 1, ...)
pred.var = NULL, weights = 1, ...) ## ... to absorb unused args
## predict.lm, extended for mlm
{
tt <- terms(object)
if (missing(newdata) || is.null(newdata)) {
mm <- X <- model.matrix(object)
mmDone <- TRUE
offset <- object$offset
}
else {
Terms <- delete.response(tt)
m <- model.frame(Terms, newdata, na.action = na.action,
xlev = object$xlevels)
if (!is.null(cl <- attr(Terms, "dataClasses")))
.checkMFClasses(cl, m)
X <- model.matrix(Terms, m, contrasts.arg = object$contrasts)
offset <- if (!is.null(off.num <- attr(tt, "offset")))
eval(attr(tt, "variables")[[off.num + 1]], newdata)
else if (!is.null(object$offset))
eval(object$call$offset, newdata)
mmDone <- FALSE
}
r <- cbind(object$residuals)
n <- nrow(r)
m <- ncol(r)
ynm <- colnames(r)
p <- object$rank
p1 <- seq_len(p)
piv <- object$qr$pivot[p1]
if (p < ncol(X) && !(missing(newdata) || is.null(newdata)))
warning("prediction from a rank-deficient fit may be misleading")
##- beta <- object$coefficients
beta <- cbind(object$coefficients)
##- predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv])
predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv,,drop=FALSE])
if (!is.null(offset))
predictor <- predictor + offset
interval <- match.arg(interval)
if (interval == "prediction") {
if (missing(newdata))
warning("Predictions on current data refer to _future_ responses\n")
if (missing(newdata) && missing(weights)) {
w <- weights(object) ## .default
if (!is.null(w)) {
weights <- w
warning("Assuming prediction variance inversely proportional to weights used for fitting\n")
}
}
if (!missing(newdata) && missing(weights) && !is.null(object$weights) &&
##- missing(pred.var)
is.null(pred.var)
)
warning("Assuming constant prediction variance even though model fit is weighted\n")
if (inherits(weights, "formula")) {
if (length(weights) != 2L)
stop("`weights` as formula should be one-sided")
d <- if (missing(newdata) || is.null(newdata))
model.frame(object)
else newdata
weights <- eval(weights[[2L]], d, environment(weights))
}
}
type <- match.arg(type)
if (se.fit || interval != "none") {
res.var <- if (is.null(scale)) {
## r <- object$residuals
w <- object$weights
##- rss <- sum(if (is.null(w)) r^2 else r^2 * w)
rss <- apply( if (is.null(w)) r^2 else r^2 * w ,2,sum)
df <- n - p
rss/df
} else {
##- scale^2
if (length(scale)==m) scale^2 else
stop("!predict.lm! argument scale has wrong length")
}
res.var.mx <- matrix(res.var, p, m, byrow=TRUE)
if (type != "terms") {
if (p > 0) {
XRinv <- if (missing(newdata) && is.null(w))
qr.Q(object$qr)[, p1, drop = FALSE]
else X[, piv] %*% qr.solve(qr.R(object$qr)[p1, p1])
##- ip <- drop(XRinv^2 %*% rep(res.var, p))
ip <- drop(XRinv^2 %*% res.var.mx)
}
else ip <- rep(0, n)
}
}
if (type == "terms") {
if (!mmDone) {
mm <- model.matrix(object)
mmDone <- TRUE
}
aa <- attr(mm, "assign")
ll <- attr(tt, "term.labels")
hasintercept <- attr(tt, "intercept") > 0L
if (hasintercept)
ll <- c("(Intercept)", ll)
aaa <- factor(aa, labels = ll)
asgn <- split(order(aa), aaa)
if (hasintercept) {
asgn$"(Intercept)" <- NULL
if (!mmDone) {
mm <- model.matrix(object)
mmDone <- TRUE
}
avx <- colMeans(mm)
termsconst <- sum(avx[piv] * beta[piv])
}
nterms <- length(asgn)
if (nterms > 0) {
##- predictor <- matrix(ncol = nterms, nrow = NROW(X))
predictor <- array(dim=c(NROW(X),nterms,m))
##- dimnames(predictor) <- list(rownames(X), names(asgn))
dimnames(predictor) <- list(rownames(X), names(asgn), ynm)
if (se.fit || interval != "none") {
##- ip <- matrix(ncol = nterms, nrow = NROW(X))
##- dimnames(ip) <- list(rownames(X), names(asgn))
ip <- predictor
Rinv <- qr.solve(qr.R(object$qr)[p1, p1])
}
if (hasintercept)
X <- sweep(X, 2L, avx, check.margin = FALSE)
unpiv <- rep.int(0L, NCOL(X))
unpiv[piv] <- p1
for (i in seq.int(1L, nterms, length.out = nterms)) {
iipiv <- asgn[[i]]
ii <- unpiv[iipiv]
iipiv[ii == 0L] <- 0L
##- predictor[, i] <- if (any(iipiv > 0L))
##- X[, iipiv, drop = FALSE] %*% beta[iipiv]
predictor[, i,] <- if (any(iipiv > 0L))
X[, iipiv, drop = FALSE] %*% beta[iipiv,]
else 0
if (se.fit || interval != "none")
##- ip[, i] <- if (any(iipiv > 0L))
##- as.matrix(X[, iipiv, drop = FALSE] %*% Rinv[ii,, drop = FALSE])^2 %*%
##- rep.int(res.var,p)
ip[, i,] <- if (any(iipiv > 0L))
as.matrix(X[, iipiv, drop = FALSE] %*% Rinv[ii,, drop = FALSE])^2 %*%
res.var.mx
else 0
}
if (!is.null(terms)) {
predictor <- predictor[, terms, drop = FALSE]
if (se.fit)
ip <- ip[, terms, drop = FALSE]
}
}
else {
predictor <- ip <- matrix(0, n, 0)
}
attr(predictor, "constant") <- if (hasintercept)
termsconst
else 0
}
if (interval != "none") {
tfrac <- qt((1 - level)/2, df)
if (is.null(pred.var)) pred.var <- res.var.mx/weights ## !!!
hwid <- tfrac * switch(interval, confidence = sqrt(ip),
prediction = sqrt(ip + pred.var))
if (type != "terms") {
if (m==1) { ## changed
predictor <- cbind(predictor, predictor + hwid %o% c(1, -1))
colnames(predictor) <- c("fit", "lwr", "upr")
} else { ## changed
predictor <- array(c(predictor, predictor - hwid, predictor + hwid),
dim=c(n,m,3))
dimnames(predictor)[[3]] <- c("fit", "lwr", "upr")
}
}
else {
lwr <- predictor + hwid
upr <- predictor - hwid
}
}
if (se.fit || interval != "none")
se <- sqrt(ip)
##- if (missing(newdata) && !is.null(na.act <- object$na.action)) { ## !!! not yet extended
##- predictor <- napredict(na.act, predictor)
##- if (se.fit)
##- se <- napredict(na.act, se)
##- }
if (m==1) { ## !!!
if (length(dim(predictor))==3) predictor <- predictor[,,1]
if (se.fit) if (length(dim(se))==3) se <- se[,,1]
}
if (type == "terms" && interval != "none") {
##- if (missing(newdata) && !is.null(na.act)) { # !!! not yet extended
##- lwr <- napredict(na.act, lwr)
##- upr <- napredict(na.act, upr)
##- }
list(fit = predictor, se.fit = se, lwr = lwr, upr = upr,
df = df, residual.scale = sqrt(res.var))
}
else if (se.fit)
list(fit = predictor, se.fit = se, df = df, residual.scale = sqrt(res.var))
else predictor
}
## ==========================================================================
compareTerms <-
function(..., list=NULL, seq=NULL)
{
## Purpose: compare terms of several models
## -------------------------------------------------------------------------
## Arguments:
## models character vector of names of model objects
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: 2010
lcl <- match.call()[-1]
if (!is.null(names(lcl))) lcl <- lcl[names(lcl)!="seq"]
## !!! bug: list does not work !!!
##- if (length(list)) lcl <- c(as.list(lcl[names(lcl)!="list"]),
##- c(lcl["list"]))
lnmod <- length(lcl)
lmnm <- names(lcl)
if (is.null(lmnm)) lmnm <- as.character(lcl) else
lmnm[lmnm==""] <- as.character(lcl)[lmnm==""]
lterms <- list()
for (lmd in 1:lnmod) {
lmd <- eval(lcl[[lmd]])
if (is.character(lmd)) lmd <- get(lmd)
ltr <- if(is.list(lmd)) attr(terms(lmd),"term.labels") else NULL
if (is.null(ltr)) stop("!compareTerms! inadequate argument", lmnm[lmd])
lterms <- c(lterms, list(ltr))
}
ltrm <- unique(unlist(lterms))
rr <- sapply(lterms, function(x) ltrm%in%x )
dimnames(rr) <- list(ltrm,lmnm)
if (!is.null(seq)) rr <- rr[order(match(ltrm,seq,nomatch=length(seq)+1)),]
rr
}
## ==========================================================================
modelTable <-function(models, seq=NULL)
{
## Purpose: collect several models into a table
## -------------------------------------------------------------------------
## Arguments:
## models character vector of names of model objects
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: 9 Aug 2000, 11:50
if (inherits(models, regrModelClasses))
stop("!modelTable! First argument is a single model.",
" I need a list of at least two models")
t.islist <- is.list(models)
t.nm <- length(models)
t.mnm <- names(models) ## if (t.islist) names(models) else models
if (length(t.mnm)==0)
if (t.islist) names(models) <- t.mnm <- paste("model",1:t.nm,sep=".")
else t.mnm <- models
if (t.islist) {
if (any(t.wr <- sapply(models,
function (x) !inherits(x, regrModelClasses))))
stop("!modelTable! element ",paste(t.mnm[t.wr],collapse=", "),
" has inadequate class")
} else if(!is.character(models))
stop("!modelTable! Argument 'models' must be a list or a character vector")
t.ls <- t.trm <- t.cf <- setNames(vector("list",t.nm), t.mnm)
t.nobs <- t.df <- t.sig <- t.fitfun <- setNames(rep(NA,t.nm), t.mnm)
## -----------------------------------------------------------
for (li in 1:t.nm) {
lr <- if (t.islist) models[[li]] else get(models[li],envir=parent.frame())
##- lfitfun <- NULL
##- for (lc in c("lm","lmrob","glm","multinom","polr","survreg"))
##- if (inherits(lr,lc)) lfitfun <- lc
##- if (is.null(lfitfun))
##- stop(paste("!modelTable! Model ",li," is not an adequate model"))
##- t.fitfun[li] <- lfitfun
if (!inherits(lr, "regr"))
stop("!modelTable! ... only programmed for 'regr' objects")
t.fitfun[li] <- t.ff <- lr$fitfun
t.nobs[li] <- lnr <-
NROW(if(t.ff=="survreg") lr$linear.predictors else lr$fitted.values)
t.df[li] <- ldf <- lnr-df.residual(lr)
lt <- terms(lr)
ltnm <- c( if(attr(lt,"intercept")) "(Intercept)", attr(lt, "term.labels"))
t.cf[[li]] <-
lr$termtable[match(ltnm,row.names(lr$termtable),nomatch=0),
c("coef","p.value")]
t.trm[[li]] <- ltnm
## t.trmc <- c(t.trmc, attr(terms(lr),"dataClasses"))
lsig <- if (t.ff=="survreg") lr$scale else summary(lr)$sigma
t.sig[li] <- c(lsig,NA)[1]
}
if (length(unique(t.nobs))>1)
warning(":modelTable: models have different numbers of observations")
## --- collect
t.tr <- unique(unlist(t.trm))
## --- coefs and p values
t.nt <- length(t.tr)
t.pr <- t.coef <- matrix(NA,t.nt,t.nm, dimnames=list(t.tr,t.mnm))
## ---
for (li in t.mnm) {
t.t <- t.trm[[li]]
if (length(t.t)) {
lcf <- t.cf[[li]]
t.coef[t.t,li] <- lcf[,1]
t.pr[t.t,li] <- lcf[,2]
}
}
## reorder
if (length(seq)>0) {
li <- match(seq, t.tr)
t.t <- c(t.tr[notna(li)],t.tr[!t.tr%in%seq])
t.coef <- t.coef[t.t,]
t.pr <- t.pr[t.t,]
t.sd <- t.sd[t.t]
}
# attr(t.coef,"standardized") <- t.trn
if (all(is.na(t.sig))) t.sig <- NULL
t.r <- list(coef=t.coef, p=t.pr, sigma=t.sig, nobs=t.nobs,
df=t.df, fitfun=t.fitfun) # , sd.terms=t.sd
class(t.r) <- "modelTable"
t.r
}
## ==========================================================================
"[.modelTable" <- function(object,rows=NULL,cols=NULL, reduce=TRUE) {
if (is.null(rows)) rows <- 1:nrow(object$coef)
if (is.null(cols)) cols <- 1:ncol(object$coef)
lp <- object$p[rows,cols,drop=FALSE]
li <- if(reduce) !apply(is.na(lp),1,all) else 1:nrow(lp)
if (length(li)==0) stop("![.modelTable! no terms left")
lsd <- if (length(object$sd.terms)) object$sd.terms[rows][li]
lsig <- if (length(object$sigma)) object$sigma[li]
rr <- list(coef=object$coef[rows,cols,drop=FALSE][li,,drop=FALSE],
p=lp[li,,drop=FALSE], sigma=lsig,
nobs=object$nobs[cols], df=object$df[cols],
fitfun <- object$fitfun[cols] ) # ,sd.terms=lsd
# attr(rr$coef,"standardized") <- attr(object$coef,"standardized")
class(rr) <- class(object)
rr
}
## ==========================================================================
format.modelTable <-
function(x, digits=getUserOption("digits"),
stars = c("***","** ","* ",": ",". "), sep="", ...)
{
## Purpose:
## ----------------------------------------------------------------------
## Arguments:
## x a modelTable object
## tex if TRUE, the output will be suitable for pasting into
## (la)tex source
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 23 Dec 2008, 10:09
t.sd <- x$sd.terms
lsd <- length(t.sd)>0
t.ntrm <- nrow(x$coef)
t.stars <- c(stars, rep(" ",5))[1:5]
t.st <- stars[cut(x$p,c(-1,0.001,0.01,0.05,0.1,1))]
t.st[is.na(t.st)] <- " "
dim(t.st) <- dim(x$p)
##- t.stdd <- attr(x$coef,"standardized")
##- if (is.null(t.stdd)||!t.stdd)
##- warning(":: Coefficients are not standardized",call.=FALSE)
t.cf <- x$coef
if (length(x$sigma)) {
t.cf <- rbind(t.cf, .sigma.=x$sigma)
t.st <- rbind(t.st, " ")
if(lsd) t.sd <- c(t.sd, .sigma.=NA)
}
t.cf <- rbind(t.cf, .df.=x$df)
t.st <- rbind(t.st, .df.=" ")
if(lsd) t.sd <- c(t.sd, .df.=NA)
if (lnobs <- length(unique(x$nobs))>1) {
t.cf <- rbind(t.cf, .nobs.=x$nobs)
t.st <- rbind(t.st, " ")
if(lsd) t.sd <- c(t.sd, .nobs.=NA)
}
t.cfo <- format(t.cf, digits=digits, ...)
t.nna <- is.na(x$coef)&!is.na(x$p)
t.cfo[1:t.ntrm,][t.nna] <- paste(c(rep(" ",1+digits/2),"+++"),collapse="")
t.cfo[".df.",] <- paste(" ",format(t.cf[".df.",]))
if (lnobs) t.cfo[".nobs.",] <- paste("",format(t.cf[".nobs.",]))
lff <- x$fitfun
if (length(lff))
if (length(unique(lff))>1) {
t.cfo <- rbind(t.cfo, fitfun=substr(lff,1,digits+3) )
t.st <- rbind(t.st, .fitfun.="" )
}
t.cfo[grep("NA",t.cfo)] <- paste(c(rep(" ",2+digits/2),"-"),collapse="")
t.ii <- (!is.na(t.cf))&t.cf==0
t.cfo[t.ii] <- t.cf0 <- gsub("0"," ",t.cfo[t.ii])
t.o <- paste(sep, format(t.cfo), sep, t.st)
t.sdo <- if (lsd)
paste(sep, sub("NA"," ", format(t.sd, digits=digits))) else NULL
t.out <- cbind(t.sdo, matrix(t.o, nrow=nrow(t.cfo)))
t.nm <- row.names(t.cfo)
if (lsd) t.nm <- paste(t.nm,ifelse(!is.na(t.sd),"@",""))
dimnames(t.out) <- list(t.nm,c(if (lsd) "sd",dimnames(x$p)[[2]]))
structure( t.out, class=c("modelTable", "modelTF"), nterms=t.ntrm)
}
## ==========================================================================
print.modelTable <- function(x, tex = FALSE, transpose=FALSE, ...)
{
## Purpose:
## ----------------------------------------------------------------------
## Arguments:
## x a modelTable object
## tex if TRUE, the output will be suitable for pasting into
## (la)tex source
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 23 Dec 2008, 10:09
t.out <- unclass( if (t.fo <- inherits(x, "modelTF")) x else
format(x, ...) )
if (tex) {
sep <- "&"
if (t.fo) {
t.a <- t.out[1,1]
t.jsep <- substr(t.a,nchar(t.a),nchar(t.a))==sep
if (!t.jsep) {
warning(":print.modelTable: The header of the tex table will "
,"most probably be wrong")
for (lj in 1:(ncol(t.out)-1)) t.out[,lj] <- paste(t.out[,lj], sep)
}
} else t.out <- unclass( format(x, sep=sep, ...) )
## if (transpose) t.out <- t(t.out) ## would not work yet, need to select rows
mc1 <- "\\mc{2}{|c}{"
mc2 <- "}"
end <- "\\\\"
headend <- "\\\\ \\hline"
tabstart <- " {\\def\\mc{\\multicolumn}\n\\begin{tabular}" # \\providecommand{
tabend <- " \\end{tabular}\n}"
lnt <- attr(t.out,"nterms")
t.end <- c(rep(end,nrow(t.out)-1),"")
t.end[lnt] <- headend
cat(tabstart, "{l", rep("|rl",ncol(t.out)),"}\n ",
paste(sep,mc1,colnames(t.out),mc2,sep=""),headend,"\n")
t.rn <- format(row.names(t.out))
if (t.fo) t.rn <- paste(t.rn,sep)
for (li in 1:nrow(t.out)) cat(t.rn[li],t.out[li,],t.end[li],"\n")
cat(tabend,"\n")
} else {
if (transpose) t.out <- t(t.out)
colnames(t.out) <- paste("",colnames(t.out))
print(structure(t.out, nterms=NULL), quote=FALSE)
} ## !!!
invisible(x)
}
## ===================================================================
leverage <- function(fit)
{
## Purpose: extract leverages
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 10 Nov 2010, 09:31
lh <- fit$leverage
names(lh) <- names(fit$resid)
lnaa <- fit$na.action
if (length(lnaa)) naresid(lnaa, lh) else lh
}
## ==========================================================================
## pseudoreplicate variability
xdistResdiff <- function(object, perc=c(3,10,80), trim=0.1, nmax=100,
nsim=100, out="aggregate")
{
## Purpose: distance in x space and absolute residual difference
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 13 Oct 2011, 09:40
if (!inherits(object,"lm")) stop("only suitable for lm like objects")
lres <- object$stres # resid
lqr <- object$qr
ln <- nrow(lqr$qr)
lq <- qr.qy(lqr, diag(1, nrow = ln, ncol = lqr$rank)) # like in hat
li <- which(!is.na(lres))
if (ln>nmax) {
li <- sample(1:ln, nmax) # [replace=FALSE]
lq <- lq[li,]
lres <- lres[li]
ln <- nmax
}
ldist <- dist(cbind(lq))
lrd <- abs(outer(lres,lres,"-"))
if (nsim) {
lrsim <- matrix(NA,length(ldist),nsim)
for (ls in 1:nsim) {
li <- sample(ln)
lrsim[,ls] <- as.dist(lrd[li,li])
}
}
lnm <- names(lres)
lm <- diag(ln)
lid <- cbind(id1=lnm[rep(1:(ln-1),(ln-1):1)],
id2=lnm[row(lm)[row(lm)>col(lm)]])
lrd <- as.dist(lrd)
lio <- order(ldist)
rr <- data.frame(lid[lio,], xdist=ldist[lio], resdiff=lrd[lio])
if (nsim) rr <- cbind(rr, rdsim=lrsim[lio,])
class(rr) <- c("xdistResdiff","data.frame")
if (out=="aggregate") xdistResscale(rr, perc=perc) else rr
}
## ====================================================================
xdistResscale <- function(x, perc=c(3,10,90), trim=1/6)
{
## Purpose: aggregate xdistResdiff data
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 18 Oct 2011, 08:40
if (!inherits(x,"xdistResdiff"))
stop ("only programmed for xdistResdiff objects")
llim <- c(-0.001*max(x$xdist), x$xdist[perc*nrow(x)/100], max(x$xdist))
lgrp <- list(cut(x$xdist, llim))
lrd <- aggregate(sqrt(x$resdiff), lgrp, mean, trim=trim)[,2]
lmn <- mean(sqrt(x$resdiff), trim=trim)
ljsim <- FALSE
if (ncol(x)>5) {
ljsim <- TRUE
lrdsim <- aggregate(sqrt(as.matrix(x[,-(1:4)])), lgrp, mean, trim=trim)[,-1]
lnsim <- ncol(lrdsim)
lrdmn <- apply(lrdsim,1,mean)
lrdse <- apply(lrdsim,1,sd)
lrss <- apply(sweep(lrdsim,1,lrdse,"/")^2,2,sum)
lrss0 <- sum((lrd/lrdse)^2)
##- ltestall <- sum(((lrd-lmn)/lrdse)^2)
##- lpv <- pchisq(ltestall, df=length(lrd)-1, lower=FALSE)
##- lpv1 <- pnorm((lrd[1]-lmn)/lrdse[1]) # one-sided is good
lpv <- apply(lrdsim<lrd,1,sum)/lnsim
names(lpv) <- paste("pv",1:length(lpv),sep="")
lpv <- c(lpv, pv.rssq=sum(lrss0<lrss)/lnsim)
}
rr <- cbind(xdist=aggregate(x$xdist, lgrp, mean)[,2], resd.mean=lrd)
if (ljsim) rr <- cbind(rr, resd.simmean=lrdmn, resd.se=lrdse)
attr(rr,"limits") <- llim
attr(rr,"resdMean") <- lmn
attr(rr,"trim") <- trim
attr(rr,"perc") <- perc
if (ljsim) attr(rr,"pvalues") <- lpv #c(shortdist=lpv1, overall=lpv)
class(rr) <- c("xdistResscale", "matrix")
rr
}
## =======================================================================
plot.xdistResscale <- function(x, lwd=2, cex=2, xlab="distance in x space",
ylab="average abs. residual difference", col.aux="grey30", ...)
{
## Purpose: plot average residual difference^2 vs. x distance
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 14 Oct 2011, 08:46
lxdist <- sqrt(x[,"xdist"])
lrd <- x[,"resd.mean"]
ly2 <- lrd+2*x[,"resd.se"]
ljse <- length(ly2)>0
llim <- attr(x,"limits")
lymax <- if (ljse) max(ly2) else max(lrd)
plot(lxdist, lrd, xlab=xlab, ylab=ylab,type="b", cex=cex,
xlim=c(0,sqrt(max(llim))), yaxs="i", ylim=c(0,1.02*lymax),
axes=FALSE, ...)
box()
axis(2)
lxat <- pretty(c(0,0.8*last(llim)))
axis(1, at=sqrt(lxat), labels=format(lxat))
if (ljse) {
segments(lxdist,lrd-2*x[,"resd.se"],lxdist,ly2, lwd=lwd,
col=col.aux)
lysim <- x[,"resd.simmean"]
if (length(lysim)) {
lxd <- sqrt(max(llim))/(nrow(x)*10)
segments(lxdist-lxd, lysim, lxdist+lxd, lysim, col=col.aux)
}
}
axis(3,at=c(0,sqrt(llim[-1])),labels=rep("",length(llim)), col=col.aux)
abline(h=attr(x,"resdMean"), lty=3, col=col.aux)
abline(v=0)
"plot.xdistResscale done"
}
## ==========================================================================
## plotting functions
## ==========================================================================
plotregr.control <-
function(x, formula=NULL, data = NULL, rawvars = TRUE,
## generate variables used for plotting
smooth.group = NULL, weights = NULL, plweights = NULL,
plab = NULL, pch = NULL, pcol = NULL, cex.plab = -1, cex.pch = -1,
markprop = NULL,
## specify some specific aspects / contents of plots
glm.restype = "working", smresid = TRUE,
jitter = TRUE, jitterbinary = TRUE, mbox = TRUE,
partial.resid = TRUE, cookdistlines = 1:2, leveragelim = c(0.99, 0.5),
## smooth and reflines
smooth = 2, smooth.par=NA, smooth.iter=NULL, smooth.sim=NULL,
reflines = TRUE, reflines.se = FALSE,
## general options for plotting
ylim = TRUE, ylimfac = c(4,3), ylimext = 0.1,
plext = getUserOption("plext"), yaxp = NULL,
## graphical parameters
lty = c(1,2,5,3,4,6,1,1), lwd = c(1,1,2,1,1.5,1,1,1),
colors.ra = getUserOption("colors.ra"),
colors.smgrp = getUserOption("colors"), smooth.pale = 0.2,
## title, labels
main = NULL, cex.title = NULL, xlabs = NULL, ylabs = NULL,
## multiple frames
mf = TRUE, mfcol = FALSE,
multnrow = 0, multncol = 0,
oma = NULL, # ask = NULL,
## components not needed for plresx
fit = TRUE, hat = TRUE, stresiduals = TRUE
)
## get data for plotting, collect and check arguments
## do preparations that are common to plot.regr_ and plresx
## --------------------------------------------------------------
{
## --- formula
lform <- formula(x)
lfo <- length(formula)>0 ## && !(is.logical(formula)&&!formula)
if (lfo) {
if (is.character(formula))
formula <- as.formula(paste("~",paste(formula, collapse="+")))
else {
if (is.logical(formula)&&formula[1]) {
formula <-
if (rawvars)
as.formula(paste("~",paste(all.vars(lform)[-1], collapse="+")))
else formula(x)
}
}
if (!is.formula(formula))
stop("!plot.regr/plresx! argument 'formula' not suitable")
lform <- update(lform, formula)
}
## --- variables to be evaluated in data
lcl <- match.call()
lcl$data <- if (length(data)>0) data else {
ldt <- x$allvars
if (is.null(ldt)) ldt <- eval(x$call$data)
ldt }
lcl[[1]] <- as.name("getvars")
lcl$formula <- lform
lcl[c(FALSE,names(lcl[-1])%nin%
c("x","formula","data","rawvars","jitterbinary",
"plab","pch","col","smooth.group","plweights",
"weights", "sequence"))] <- NULL
pldata <- eval(lcl, parent.frame())
if (class(pldata)[1] == "regr-error")
stop(paste("!plrdata!",attr(pldata,"message")))
if (class(pldata)[1] == "regr-warn") {
warning(paste("!plrdata!",attr(pldata,"message")))
class(pldata) <- class(pldata)[-1]
}
xvars <- attr(pldata, "xvars")
## -------------------------------------------
## --- prepare objects needed for plotting
## --- family
lfam <- c(x$distrname, x$family$family)[1]
if (is.null(lfam) || lfam=="" || is.na(lfam)) lfam <- "gaussian"
lfamgauss <- lfam%in%c("gaussian","Gaussian")
lfamcount <- u.true(lfam%in%c("binomial","poisson","multinom")) |
inherits(x,"polr")
## --- na.action: always get full data
lnaaction <- x$na.action
if (length(lnaaction)) class(x$na.action) <- class(lnaaction) <- "exclude"
## --- residuals
rtype <- NULL
if (inherits(x, "glm")) rtype <- glm.restype
lres <- as.matrix(residuals(x, type=rtype)) ## cbind distroys attributes
lnr <- nrow(lres)
if (nrow(pldata)!=lnr)
stop("!i.getPlrData! length of residuals inadequate. BUG")
lIcq <- inherits(lres,"condquant")
lmres <- if (lIcq) 1 else ncol(lres)
if (lmres>1) { ## multivariate
if (is.null(lcn <- colnames(lres))) lcn <- 1:ncol(lres)
colnames(lres) <- paste("res", lcn, sep=".")
}
lres0 <- all( apply(lres[,1:lmres, drop=FALSE],2,
function(x) all(x==notna(x)[1], na.rm=TRUE ) ) )
if (lres0)
stop("!plot.regr/plresx! all residuals are equal -> no residual plots")
## --- response residual names
lyexpr <- deparse(lform[[2]])
lynm <- if (nchar(lyexpr)>10) "Y" else lyexpr ##
lresname <- if (lmres>1) colnames(lres) else paste("res(", lynm, ")")
## --- weights, used for smooth calculation: get from x if needed
lwgt <- pldata[["(weights)"]] ## possibly only logical
lIweights <- is.logical(lwgt)&&all(lwgt) ## weights explicitly required
lInoweights <- is.logical(lwgt)&&!any(lwgt) ## weights explicitly denied
if (lIweights | (length(lwgt)&&all(is.na(lwgt))))
lwgt <- naresid(lnaaction, x$weights)
lIwgt <- length(lwgt)>1 && any(lwgt!=notna(lwgt)[1],na.rm=TRUE)
if (lIweights&!lIwgt)
warning(":plot.regr/plresx: no weights found for smooth calculation.")
pldata[["(weights)"]] <- lweights <-
if (lIwgt) lwgt / mean(lwgt, na.rm=TRUE) else NULL
## --- plweights, used as sizes of plotting characters: same as weights
lwgt <- pldata[["(plweights)"]] ## possibly only logical
lIplweights <- is.logical(lwgt)&&all(lwgt) ## plweights explicitly required
lInoplweights <- is.logical(lwgt)&&!any(lwgt) ## plweights explicitly denied
if (lIplweights | (length(lwgt)&&all(is.na(lwgt))))
lwgt <- naresid(lnaaction, x$weights)
lIwgt <- length(lwgt)>1 && any(lwgt!=notna(lwgt)[1],na.rm=TRUE)
if (lIplweights&!lIwgt)
warning(":plot.regr/plresx: no weights found for plotting.")
pldata[["(plweights)"]] <-
if (lIwgt) lwgt / mean(lwgt, na.rm=TRUE) else NULL
## --- hat and standardized residuals
##- hat <- i.def(hat, TRUE, TRUE, FALSE)
##- stresiduals <- i.def(stresiduals, TRUE, TRUE, FALSE)
##- if (hat|stresiduals) {
##- lhat <- naresid(lnaaction, x$leverage)
##- if(length(lhat)==0 && length(x$qr)>0)
##- lhat <- naresid(lnaaction, hat(x$qr))
##- } else lhat <- NULL
## --- ldfres
ldfres <- df.residual(x)
if (is.null(ldfres)) {
warning(":plot.regr: bug: no df of residuals. setting n-1")
ldfres <- lnr-1
}
## --- sigma
lsigma <- x$sigma
if (length(lsigma)==0) lsigma <- c(x$scale, summary(x)$sigma)[1]
if (length(lsigma)==0)
lsigma <- if (lfamcount) 0 else sqrt(apply(lres^2,2,sum)/ldfres)
## --- standardized residuals !!! -> regr braucht das auch, vgl.
##- lstresname <- paste("st.",lresname,sep="")
##- lstres <- lstrratio <- NULL
##- if (stresiduals) {
##- lstres <- x$stresiduals
##- if (length(lstres)==0)
##- {
##- lstres <- if (lIcq) {
##- if (u.true(lsigma)) cbind(lres[,1:4]/lsigma, lres[,5:6]) else lres
##- } else {
##- lstres <- if (lIwgt) lres*sqrt(lweights) else lres ## !!! check
##- if (length(lhat)!=0)
##- lstres / outer(sqrt(1-pmin(leveragelim[1],lhat)),
##- ifelse (lsigma==0, 1, lsigma) )
##- }
##- }
##- lstres <- cbind(lstres)
##- lstrratio <- if(lIcq) 1 / lsigma ## lstres[,1]/lres[,1]
##- else 1 / outer(sqrt(1-pmin(last(leveragelim),lhat)),lsigma)
##- lstrratio[!is.finite(lstrratio)] <- 1
## --- absolute residuals
##- labsres <- if (lIcq) abs(lstres[,"random"]) else abs(lstres)
##- labsresname <- paste("|",lstresname,"|", sep = "")
## --- Mahalanobis norm of multivariate residuals
lresmahal <- x$resmahal
if (lmres>1)
if (is.null(lresmahal)) lresmahal <- mahalanobis(lres,0,var(lres))
## ---
lsmgrplab <- levels(pldata$"(smooth.group)")
## if (length(lsmgrpnames)) ## shorten! !!!
## ln <- nrow(pldata)
lnobs <- sum(!apply(is.na(lres),1,any))
## -------------------------
## labels
## priorities: plab , pch , row.names
lpch <- pldata$"(pch)"
lplab <- pldata$"(plab)"
##- if (length(lpch)) { ## labels given in pch
##- if (length(lplab)==0) lplab <- lpch
##- lpch <- NULL }
## default plotting character
if (is.factor(lpch)) lpch <- as.character(lpch)
if (is.character(lpch)&&any(nchar(lpch)>1) |
is.numeric(lpch)&&any(lpch<0|lpch>30)) {
warning(":plotregrContol: unsuitable plotting character ('pch')")
lpch <- NULL
}
if (length(lpch)==0)
lpch <- if (lIcq) ifelse(lres[,"prob"]==0,15,3) else
if (lnobs>100) "." else 3
lcp <- max(min(1,log(50)/log(lnobs))^2,0.3)
lcpp <- if (all(lpch==".")) 4 else 0.7
cex.plab <- i.def(cex.plab, -lcp, valuefalse = 1 )
if (length(cex.plab)==1) cex.plab <- c(1, lcpp)*cex.plab
## --- row.names
lrown <- row.names(lres)
if (length(lrown)==0) lrown <- as.character(1:lnr)
lplabels <- lrown
if (length(lplab)>0) {
lplabels <- if (is.character(lplab)) lplab else as.numeric(lplab) # factors
}
## now, lplabels always useful
markprop <- i.def( markprop, ## default for markprop
if (length(lplab)>1|lIcq) 0 else ceiling(sqrt(lnobs)/2)/lnobs )
if (markprop==0) {
lplab <- rep(lpch, length=lnr)
} else {
lplab <- lplabels
if (markprop<1) {
li <- if (lmres>1) order(lresmahal)[1:(lnobs*(1-markprop))] else
order(abs(lres[,1]))[1:(lnobs*(1-markprop))] # [,1] for condq
lplab[li] <- NA
}
}
## lplabna will contain NA where weights should determine symbol
lplabna <- if (lIwgt&markprop==0) rep(NA, length=lnr) else lplab
## ----------------------------------------------------
## --- smooth
## if (!is.function(smooth)) smooth <- smoothRegr
smooth.iter <- i.def(smooth.iter, 50 * lfam%nin%c("binomial","multinomial"))
lsmgrp <- pldata$"(smooth.group)"
## smooth.par
lnsm <- lnobs
lnsmgrp <- length(unique(lsmgrp))
if (length(lsmgrp)) lnsm <- lnobs/lnsmgrp
smooth <- i.def(smooth, 2, 2, 0)
lsmpar <- i.def( smooth.par, 5*lnsm^log10(1/2)*(1+inherits(x,"glm")) )
## --- simulated residuals
## when using smooth.group , default is 0
lnsims <- i.def(smooth.sim, 19*(length(lsmgrp)==0), 19, 0)
if (inherits(x, c("nls", "nlm", "survreg", "polr", "coxph"))) lnsims <- 0
if (lmres>1) lnsims <- 0 # not yet programmed for mlm
if (lnsims>0 & !inherits(x, c("lm","glm"))) {
warning(":plot.regr/simresiduals: ",
"I can simulate only for 'lm' and 'glm' objects")
lnsims <- 0
}
lsimstres <- lsimres <- NULL # lsimrabs <-
if (lnsims>0) {
lsimr <- if(u.debug())
simresiduals(x, lnsims, glm.restype=glm.restype) else
try(simresiduals(x, lnsims, glm.restype=glm.restype), silent=TRUE)
if (class(lsimr)=="try-error")
warning(":plot.regr/simresiduals: simresiduals did not work. ",
"No simulated smooths")
else {
##- if ((!is.null(lsimr)) && (!is.null(llnna <- attr(lsimr, "nNA"))) &&
##- any(llnna>0) ) {
##- warning(":plot.regr/simresiduals: simresiduals produced NAs. ",
##- "No simulated smooths")
##- lsimr <- lsimr[,llnna==0]
##- } else {
lsimres <- lsimr ## naresid(lnaaction, lsimr)
lsimstres <- attr(lsimr,"stres") ## naresid(lnaaction, )
## lsimrabs <- if (is.null(lsimstres)) NULL else abs(lsimstres)
##- }
}
if (length(lsimres)==0) lnsims <- 0
}
## --- some more arguments
mbox <- i.def(mbox, TRUE)
jitter <- i.def(jitter, TRUE)
jitterbinary <- i.def(jitterbinary, TRUE)
## ---------------------------------------- was plrargs
## --- graphical elements
lty <- i.def(lty, c(1,2,3,3,6,4,1,1))
lty <- ifelse((1:6)<=length(lty), rep(lty, length=6), c(1,2,3,3,6,4,1,1))
lwd <- i.def(lwd, c(1,1,2,1,1.5,1,1,1))
lwd <- ifelse((1:6)<=length(lwd), rep(lwd, length=6), c(1,1,2,1,1.5,1,1,1))
lcol.ra <-
rep(i.def(colors.ra, c.colors.ra, valuefalse = c.colors.ra), length=9)
lcol.smgrp <-
if (lnsmgrp>1)
rep(i.def(colors.smgrp, c.colors, valuefalse = c.colors),
length=lnsmgrp)
else lcol.ra[3]
if (NCOL(lcol.smgrp)==1)
lcol.smgrp <- cbind(lcol.smgrp, colorpale(lcol.smgrp, pale=smooth.pale))
## --- main
lftext <- paste(as.character(formula(x))[c(2,1,3)], collapse=" ")
main <- i.def(main, lftext, lftext, "")
main <- if (is.character(main) && substring(main,1,1)==":")
paste(lftext,substring(main,2,30)) else as.character(main)
## if (length(main)) tit(main) <- tit(x)
if (is.null(cex.title)) cex.title <- max(0.5, min(1.2,
par("mfg")[4]*par("pin")[1]/(par("cin")[1]*nchar(main))))
## --- axis labels
xlabs <- i.def(xlabs, NULL, NULL, NULL)
ylabs <- i.def(ylabs, NULL, NULL, NULL)
## --- plot ranges
ylim <- i.def(ylim, TRUE)
ylimfac <-
rep(i.def(ylimfac, c(residuals=4,y=3), valuefalse = c(residuals=4,y=3)),
length=2)
ylimext <- i.def(ylimext, 0.1, 0.1, 0)
plext <- i.def(plext, 0.05, 0.05, 0)
if (length(plext)<4) plext <- rep(plext, length=4)
##
yaxp <- i.def(yaxp, NULL)
## --- multiple frames
mf <- i.def(mf, NULL, TRUE, NULL)
oma <- i.def(oma, NULL, valuefalse=0)
if (length(oma)==2) oma <- c(0,0,oma)
## outer.margin <- oma[3]>0 ## && length(lmain)==1
## --- ask
##- ask <- i.def(ask, getOption("ask"), TRUE )
##- if (length(ask)==0)
##- ask <- interactive() && last(c("",names(dev.list())))%in%c("X11cairo")
## --- more arguments
smresid <- i.def(smresid, TRUE)
if (lmres>1) smresid <- FALSE ## !!! muss noch gemacht werden
reflines <- i.def(reflines, TRUE)
reflines.se <- i.def(reflines.se, FALSE, TRUE, FALSE)
partial.resid <- i.def(partial.resid, TRUE)
cookdistlines <- i.def(1:2, 1:2, NULL)
## ------------------------------------------------------------
## result
list(
pldata = pldata, formula = lform, xvars = xvars,
residuals = lres, ## stres = lstres, strratio = lstrratio, absres = labsres,
## resmahal = lresmahal, fitted = lfit, hat = lhat,
yexpr = lyexpr, resname = lresname, ## stresname = lstresname,
## absresname = labsresname, fitname = lfitname,
smooth.grouplab = lsmgrplab,
na.action = lnaaction,
sigma=lsigma, df.residuals = ldfres, nobs = lnobs,
markprop = markprop, jitter = jitter, jitterbinary = jitterbinary,
smooth = smooth, smooth.par = lsmpar,
smooth.iter = smooth.iter, smooth.sim = lnsims,
simres = lsimres, simstres = lsimstres, # simabsres = lsimrabs,
family = lfam, famgauss = lfamgauss, famcount = lfamcount,
pch = lpch, plabels = lplabels, plab = lplab, plabna = lplabna,
cex.plab = cex.plab,
rescol = lmres, smooth.ngroups = lnsmgrp,
mbox = mbox,
lty=lty, lwd=lwd,
colors.ra=lcol.ra, colors.smgrp=lcol.smgrp,
ylim=ylim, ylimfac=ylimfac, ylimext=ylimext,
plext=plext, yaxp=yaxp, mf=mf, oma=oma,
## multnrow=lmr, multncol=lmc,
## ask=ask,
main=main, cex.title=cex.title, xlabs=xlabs, ylabs=ylabs,
smresid=smresid, reflines=reflines, reflines.se = reflines.se,
partial.resid=partial.resid, cookdistlines=cookdistlines
)
}
## ====================================================================
plot.regr <-
function(x, data=NULL, plotselect = NULL, xvars = TRUE,
rawvars = TRUE, sequence=FALSE, weights=NULL,
addcomp = FALSE, smooth.legend = FALSE, ...)
{
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: 7 May 93 / 2002
argPlregr <- c("x", "data", "plotselect", "xvars", "formula",
"rawvars", "sequence", "weights", "addcomp", "smooth.legend")
## ----------------
lac <- as.list(match.call())[-1]
lextra <- setdiff(names(lac), c(argPlregr, i.argPlrControl))
if (length(lextra))
warning(":plresx: argument(s) ",paste(lextra, collapse=", "),
" not used")
lcl <- c(list(quote(plotregr.control)),
lac[match(i.argPlrControl,names(lac), nomatch=0)])
## list(fit=TRUE, hat=TRUE, stresiduals=TRUE) )
mode(lcl) <- "call"
largs <-eval(lcl)
## -------------------------------------------------------------------
## lfit <- largs$fitted
lres <- largs$residuals
## lstres <- largs$stres
lsimres <- largs$simres
lmres <- largs$rescol
lweights <- largs$pldata$"(weights)"
lsmgrp <- largs$pldata$"(smooth.group)"
lsmooth <- largs$smooth
lsmpar <- largs$smooth.par
lplab <- largs$plab
lclr <- largs$colors.ra
lIwgt <- length(lweights)>0
lsigma <- largs$sigma
## lfitname <- largs$fitname
lresname <- largs$resname
lnaaction <- x$na.action
lnr <- nrow(lres)
lnna <- !apply(is.na(lres),1,any)
lnobs <- sum(lnna)
if (length(names(smooth.legend))==0) {
lsmlegend <- i.def(smooth.legend, NULL, TRUE, NULL)
if (length(lsmlegend)==1 && is.null(names(lsmlegend)))
lsmlegend <-
setNames(rep(lsmlegend,5),
c("yfit","resfit","absresfit","absresweight","(xvars)") )
} else lsmlegend <- smooth.legend
## family
if (inherits(x,"mulltinom"))
stop("!plot.regr! I do not know how to plot residuals of a mulitnomial regression")
lfam <- x$distrname
if (length(lfam)==0) lfam <- x$family$family
if (is.null(lfam) || lfam=="" || is.na(lfam)) lfam <- "gaussian"
lfamgauss <- lfam%in%c("gaussian","Gaussian")
lglm <- inherits(x, "glm")
## lpolr <- inherits(x,"polr")
lnnls <- !inherits(x, "nls")
lfamcount <- lfam%in% c("binomial", "poisson", "multinomial") |
inherits(x,"polr")
lIcq <- inherits(lres, "condquant")
lform <- formula(x)
## ----------------
## plot selection
lplsel <-
i.plotselect(plotselect, smooth=largs$smooth, wgt=lIwgt, mult=lmres>1,
famgauss=largs$famgauss, famglm=inherits(x,"glm"),
famcount=largs$famcount)
## --- more preparations
lfit <- x$linear.predictors
lfitname <- "linear predictor"
if(is.null(lfit)) {
lfit <- if (lIcq) lres[,"fit"] else fitted(x) ## full size
lfitname <- "fitted value"
} else lfit <- naresid(lnaaction, lfit)
lfit <- as.matrix(lfit)
lfitname <- rep(lfitname, length=lmres)
##
lstres <- x$stresiduals
lstrratio <- x$strratio
llev <- leverage(x)
if (is.null(lstres)) {
lstr <- i.stres(x, sigma=lsigma, leveragelim=largs$leveragelim)
lstres <- lstr$stresiduals
lstrratio <- lstr$strratio
llev <- naresid(lnaaction, lstr$leverage)
}
lstres <- if (length(lstres)) ## should always be the case
as.matrix(naresid(lnaaction, lstres))
else {
warning(":plot.regr: residuals are not standardized")
lstres <- lres ## a better solution os in i.stres
}
lstrratio <- as.matrix( if (length(lstrratio))
naresid(lnaaction, lstrratio) else rep(1,lnr) )
lstresname <- paste("st.", lresname, sep = "")
labsresname <- paste("|st.",lresname,"|", sep="")
##
lsimabsres <- lsimstres <- largs$simstres
if (!is.null(lsimabsres)) lsimabsres <- abs(lsimabsres)
## --- residuals from smooth
## lfsmsm <- lfsmooth <- as.list(rep(NA, lmres))
if (largs$smresid & any(c(lplsel[c("absresfit","qq","leverage")],0)>0)) {
## & length(lstres))
for (lj in seq_len(lmres)) {
lfsm <-
smoothM(lfit[,lj], lres[,lj], ## if(lIcq) lres[,1, drop=FALSE] else
## cbind(lres[,lj], lsimres),
weights=lweights, group=lsmgrp,
band=lsmooth>1, resid=TRUE, par=lsmpar,
na.action=nainf.exclude)
lfsmr <- residuals(lfsm)
lstres[,lj] <- lfsmr[,1] * lstrratio[,lj]
}
lstresname <- paste("st.sm.", lresname, sep = "")
labsresname <- paste("|st.sm.",lresname,"|", sep="")
if (length(lsimabsres))
lsimabsres <- lsimabsres *
median(abs(lstres), na.rm=TRUE)/median(lsimabsres, na.rm=TRUE)
}
## plot range for residuals
lreslim <- pllimits(largs$ylim, lres, limfac=largs$ylimfac[1])
## if (length(lstres)) {
llfac <- if (lIcq) median(abs(lstres[,1])/abs(lres[,1]), na.rm=TRUE) else
apply(abs(lstres)/abs(lres),2, median,na.rm=TRUE)
if (is.list(lreslim)) {
absreslim <- streslim <- lreslim
lj <- match(names(streslim), colnames(llfac), nomatch=0)
if (any(lj>0))
for (ljj in which(lj>0)) {
if (is.numeric(streslim[[ljj]])) {
streslim[[ljj]] <- lstrl <- streslim[[ljj]]*llfac[lj[ljj]]
absreslim[[ljj]] <- c(0, mean(c(-1,1)*lstrl))
}
}
} else {
streslim <- sweep(lreslim, 2, llfac, "*")
absreslim <- if (length(streslim))
rbind(0, (streslim[2,]-streslim[1,])/2 ) else NULL
}
## }
## -------------
ltxt <- is.character(lplab)
lpty <- ifelse(ltxt,"n","p")
## --- multiple frames
lmf <- largs$mf ## i.def(lmf, TRUE, TRUE, FALSE)
if (length(lmf)) {
if (is.logical(lmf)&&lmf)
lmf <- if (lmres>1) {
if (lmres<=4) c(NA, lmres) else lmres
} else c(NA, 2)
}
if (length(lmf)==2 && is.na(lmf[1])) {
lmf1 <- sum(names(lplsel)%in%
c("yfit","resfit","absresfit","qq","absresweight") )
if(lmf1>lmf[2]+1) lmf1 <- lmf[2]
lmf[1] <- lmf1
}
loma <- i.def(largs$oma, c(2,1)*(length(lmf)>0), valuefalse=NULL)
if (length(loma)<4) loma <- c(0,0,loma,0)[1:4]
loldpar <-
if (length(lmf)&(!is.logical(lmf))) {
if (length(lmf)==1) mframe(mft=lmf, oma=loma) else
mframe(lmf[1], lmf[2], oma=loma)
} else par(oma=loma) ## , ask=largs$ask
lnewplot <- TRUE ## !!!
## -----------------------------------
lplsel <- lplsel[is.na(lplsel)|lplsel>0]
## --------------------------------------------------------------------------
## start plots
if (length(lplsel))
for (liplot in 1:length(lplsel)) {
lpllevel <- lplsel[liplot]
lpls <- names(lpllevel)
## y on fit
if(lpls=="yfit") {
if (is.na(lpllevel)) lpllevel <- 3*(lplsel["resfit"]==0)
if (is.na(lpllevel)) lpllevel <- 0
if (lpllevel>0) { ## !!! condquant bereinigen
ly <- eval(parse(text=largs$yexpr), largs$pldata)
if (length(ly)==0 || any(dim(ly)!=dim(lfit)))
ly <- cbind(y=lfit + lres)
lylabs <- largs$ylabs
if (length(lylabs)!=NCOL(ly)) lylabs <- colnames(ly)
lyind <- if(NCOL(ly)>1) paste("[",seq_len(NCOL(ly)),"]",sep="") else NULL
lylabs <- ifelse(nchar(lylabs)<15, lylabs, paste("Y",lyind,sep="") )
lylim <- cbind(pllimits(largs$ylim, ly, limfac=last(largs$ylimfac[2])))
lsimy <- if(length(lsimres)) lfit + lsimres else NULL ## !!! mult!
lsml <- if (length(lsmlegend["yfit"]))
setNames(rep(lsmlegend["yfit"],length(largs$yname)),
largs$yname) else lsmlegend
for (lj in seq_len(NCOL(lfit))) {
i.plotlws(lfit[,lj],ly[,lj], lfitname[lj], lylabs[lj],
do.smooth=lpllevel,
ylim=lylim[,lj], reflinex = 0, refliney = 1,
smooth.sim=largs$smooth.sim, smooth.legend=lsml,
simy=lsimy, arguments=largs)
}
}
}
## ---
if(lpls=="resfit") {
## if (lpllevel>0) {
## refline centerpoint
lrx <- rbind(apply(lfit,2,mean,na.rm=TRUE))
lry <- rbind(rep(-1,largs$rescol))
ldosm <- (lpllevel>1) + (lpllevel>=1.5)
for (lj in seq_len(NCOL(lfit))) {
i.plotlws(lfit[,lj], lres[,lj], lfitname[lj], largs$resname[lj],
do.smooth=lpllevel,
ylim=lreslim[,lj], reflinex=lrx[lj],refliney=-1,
smooth.sim=largs$smooth.sim, smooth.legend=lsmlegend["resfit"],
simy=lsimres,
arguments=largs)
}
## }
}
## ---
if(lpls=="absresfit")
if(length(lstres)) {
for (lj in seq_len(NCOL(lfit))) {
i.plotlws(lfit[,lj], abs(lstres[,lj]), lfitname[lj], labsresname[lj],
do.smooth=lpllevel,
smooth.power = 0.5, ylim = absreslim[,lj],
smooth.sim = largs$smooth.sim,
smooth.legend=lsmlegend["absresfit"],
simy = lsimabsres,
arguments = largs )
}
} else
warning(":plot.regr: No standardized residuals found")
## --- plot abs. res vs. weights
if(lpls=="absresweights") {
## lplweights <- (is.logical(weights) && weights) | (length(weights)>1)
## if (lpllevel>0) { # plot on weights
lwgts <- largs$weights
if (length(lwgts)!=lnr)
warning(":plot.regr: no suitable weights found. cannot plot absres on weights")
else {
lwg <- lwgts
lwg[lwg<=0] <- NA
labsres <-
if (length(lstres)) {
lrlab <- labsresname
abs(lstres)
} else {
lrlab <- paste("|",lresname,"| * sqrt(w)", sep="")
abs(lres)*sqrt(lwg)
}
for (lj in seq_len(NCOL(labsres))) {
i.plotlws(lwg,labsres[,lj], "weight",
lrlab, ## paste("|",largs$stresname[lj],"|*sqrt(w)"),
do.smooth = lpllevel, smooth.power = 0.5,
ylim = absreslim[,lj],
smooth.sim=largs$smooth.sim,
smooth.legend=lsmlegend["weights"],
simy = lsimabsres,
arguments = largs)
} } }
## --- normal plot qq plot
if(lpls=="qq") {
## if (lpllevel>0)
lpchqq <- if(lIcq) ifelse(lstres[,"prob"]>0, 3, 15) else rep(15,lnr)
lnsims <- largs$smooth.sim
for (lj in 1:largs$rescol) {
llr <- if(lIcq) lstres[,"random"] else lstres[,lj]
if (largs$smresid) {
## lsra <- if (length(lsimabsres)) lsimabsres[lnna,] else NULL
## drop NAs, since resid are wrong otherwise
## no simulated residuals here, because they are not from normal distr.
lfsmsm <- smoothM(lfit, abs(lstres[,lj]), power=0.5,
weights=lweights, resid="ratio", par=lsmpar)
llr <- lstres[,lj]/lfsmsm$ysmorig
llr <- llr / mad(llr, center=0, na.rm=TRUE) ## needs standardization
}
## ---
lxy <- qqnorm(llr, ylab = largs$stresname[lj], main="", type="n")
abline(0,1, lty = largs$lty[2], col=lclr[2])
if (lnsims>0) {
lxx <- qnorm(ppoints(lnobs))
lsimstr <-
attr(simresiduals.default(x, nrep=lnsims, simy=rnorm), "stres")
## lsimr does not contain sim. res. according to normal distr.
##- if (nrow(lsimstr)!=lnobs)
##- lsimstr <- naresid(x$na.action, lsimstr)[lnna]
## be sure to have the correct observations
for (lr in 1:lnsims) {
lines(lxx,sort(lsimstr[,lr]), lty=largs$lty[4], lwd=largs$lwd[4],
col=lclr[4])
}
## lines(lxx, sort(lstres[,lj]), col=colors[1])
}
li <- order(lxy$x)
lxx <- lxy$x[li]
lyy <- lxy$y[li]
lines(lxx,lyy, col=lclr[1])
if (is.character(lplab)) text(lxx,lyy,lplab[lnna][li], col=lclr[1]) else
points(lxx,lyy, pch=rep(largs$pch, length=lnr)[lnna][li], col=lclr[1])
##- lquart <- quantile(lstresa,c(0.25,0.75))
##- abline(0, diff(lquart)/(2*qnorm(0.75)), lty = lty[2], col=colors[2])
i.main(largs$main)
if(lIcq & lj==largs$rescol)
legend("bottomright",pch=c(15,3,NA),
legend=c("uncensored","simulated","for censored"))
}
}
## --- leverage plot. If weight are present, use "almost unweighted h"
if(lpls=="leverage")
if ((!is.na(lpllevel))&&lpllevel>0 && lnnls) {
if (diff(range(llev,na.rm=TRUE))<0.001)
warning(":plot.regr: all leverage elements equal, no leverage plot")
else {
lplabh <- lplab[lnna]
if (largs$markprop>0 & largs$markprop<1) {
li <-
order(llev, decreasing=TRUE)[1:(lnobs*largs$markprop/2)]
lplabh[li] <- largs$plabels[li]
}
llevtit <- paste("leverages", if(lIwgt) "(unweighted)")
for (lj in 1:largs$rescol) {
i.plotlws(llev, lstres[lnna,lj], llevtit, lstresname[lj], do.smooth=0,
ylim = streslim[,lj], smooth.legend=FALSE,
arguments = largs)
## line with constant Cook distance
lcookl <- largs$cookdistlines
if (length(lcookl)>0) {
ldfres <- df.residual(x)
llx <- seq(min(c(lcookl,4),na.rm=TRUE)^2 *(1-ldfres/lnobs)/6,
max(llev,na.rm=TRUE), length=50)
llr <- outer(sqrt((1-llx)^2*(lnobs-ldfres)/(llx*ldfres)),
c(lcookl,-lcookl))
matlines(llx, llr, lty=largs$lty[2], lwd=largs$lwd[2],
col=lclr[2]) ## !!! check: corr for stres?
}
}
}
}
## -----------------------------------------------------------------
## multivariate:
## residual matrix for multivariate regr
if(lpls=="resmatrix") {
lpanel <- function(xx, yy, indx, indy, pch, col, plab, ...) {
text(xx,yy,lplab)
li <- ifelse(is.na(lplab),TRUE,lplab=="")
points(xx[li],yy[li],pch=pch)
}
if (largs$rescol>1)
plmatrix(lres, panel=lpanel, main=largs$main, plab=lplab, pch=largs$pch)
}
## mahalanobis residuals
if(lpls=="qqmult") ## qq plot of Mahalanobis lenghts for multivariate regr
if ((!is.na(lpllevel))&&lpllevel>0) {
lresmahal <- x$resmahal
if (is.null(lresmahal)) lresmahal <- mahalanobis(lres,0,var(lres))
lxx <- sqrt(qchisq(ppoints(lresmahal),ncol(lres)))
lor <- order(lresmahal)
lyy <- sqrt(lresmahal[lor])
lop <- par(mfrow=c(1,1))
plot(lxx,lyy, xlab="sqrt(Chisq.quantiles)",type="n",
ylab = "Mahal.oulyingness", main="", col=largs$colors.ra[1])
lines(lxx,lyy)
if (ltxt) text(lxx,lyy,lplab[lor]) # else points(lxx,lyy,pch=lplab[lor])
axis(1)
axis(2)
abline(0,1,lty = largs$lty[2], col=largs$colors.ra[2])
i.main(largs$main)
stamp(sure=FALSE)
par(lop)
}
} ## end lplsel
## ----------------------------------------------------------------
## plot residuals vs. explanatory variables by calling plresx
## --- sequence
lIseq <- i.def(sequence, FALSE, TRUE, FALSE)
## lIseq <- lIseq|length(sequence)>1
if (lIseq) {
## is the seqence represented by any other variable?
lseqvar <- if (length(largs$xvars)>0)
sapply(largs$pldata[,largs$xvars,drop=FALSE],function(x) {
if (is.factor(x)||is.character(x)) FALSE else {
ld <- diff(x)
sum(ld==0)<0.1*length(x) && (all(ld<=0) | all(ld>=0)) }
} ) else FALSE
if (any(lseqvar)) {
warning(paste(":plot.regr / lpresx: sequence represented by",
paste(largs$xvars[lseqvar],collapse=", ")))
lIseq <- FALSE
## otherwise, plresx will plot against
}
}
## -------------------------------------------
largs$mf <- FALSE ## avoid a new page
## largs$ylim <- lylim ## no need to calculate again
plresx(x, data=data, resid=lres, vars=largs$xvars,
rawvars = rawvars, sequence=lIseq,
weights= if ("weights"%in%names(lplsel)) FALSE else NULL,
addcomp = addcomp, smooth.legend=lsmlegend,
fromPlotRegr = largs)
## --- end
par(loldpar)
invisible(largs)
}
## ==========================================================================
plresx <-
function(x, data = NULL, xvars = NULL, formula = NULL,
rawvars = TRUE, sequence=FALSE, weights=NULL,
addcomp = FALSE, smooth.legend = FALSE, ...)
## ------------------------------------------------------------
{
argPlresx <- c("x", "data", "xvars", "formula", "rawvars",
"sequence", "weights", "addcomp", "smooth.legend")
## ----------------
largs <- ## if ("fromPlotRegr"%in%names(substitute(list(...))))
list(...)[["fromPlotRegr"]]
if (is.null(largs)) {
lac <- as.list(match.call())[-1]
if (length(lac$formula)==0) lac$formula <- lac$xvars
lextra <- setdiff(names(lac),c(argPlresx,i.argPlrControl))
if (length(lextra))
warning(":plresx: argument(s) ",paste(lextra, collapse=", "),
" not used")
lcl <- c(list(quote(plotregr.control)),
lac[match(i.argPlrControl,names(lac), nomatch=0)] )
mode(lcl) <- "call"
largs <-eval(lcl)
} else largs <- list(...)[["fromPlotRegr"]]
## -------------------------------------------------------------------
if (inherits(x,"mulltinom"))
stop("!plresx! I do not know how to plot residuals of a mulitnomial regression")
ldt <- largs$pldata
lres <- largs$residuals
lIcq <- inherits(lres, "condquant")
is.fac <- attr(ldt,"is.fac")
lform <- largs$formula
lvars <- largs$xvars
lty <- largs$lty
lwd <- largs$lwd
lnnls <- !inherits(x, "nls")
lnr <- nrow(ldt)
lclr <- largs$colors.ra
## factors
if (length(lisfac <- attr(largs$pldata,"is.fac"))==ncol(ldt))
for (lj in seq_len(ncol(ldt)))
if (lisfac[lj]) ldt[,lj] <- factor(ldt[,lj])
## --- sequence
lIseq <- i.def(sequence, FALSE, TRUE, FALSE)
if (lIseq) {
if (length(lvars)) {
## is the seqence represented by any other variable?
lseqvar <-
if (length(lvars)>0)
sapply(ldt[,lvars,drop=FALSE],function(x) {
if (is.factor(x)||is.character(x)) FALSE else {
ld <- diff(x)
sum(ld==0)<0.1*length(x) && (all(ld<=0) | all(ld>=0)) }
} ) else FALSE
lIseq <- !any(lseqvar)
if (!lIseq) warning(paste(":plresx: sequence represented by",
paste(lvars[lseqvar],collapse=", ")))
}
ldt$"(sequence)" <- 1:lnr
lvars <- c(lvars,"(sequence)")
is.fac <- c(is.fac, "(sequence)"=FALSE)
}
## --- weights as x variable
lIwgt <- length(largs$pldata$"(weights)")>0
lIweights <- i.def(weights, lIwgt, TRUE, FALSE)
if (lIweights)
if (!lIwgt)
warning(":plresx; No weights found.",
" Cannot plot residuals against weights") else {
lvars <- c(lvars, "(weights)")
is.fac <- c(is.fac, "(weights)"=FALSE)
}
## ------------------
lnvars <- length(lvars)
if (lnvars==0) {
warning(":plresx: I did not find any x variables")
return() }
## terminmodel
lrawv <- sapply(as.list(lvars),
function(x) all.vars(as.formula(paste("~",x))) )
lvmod <- all.vars(formula(x))
terminmodel <- lrawv%in%lvmod
## reference lines
reflines <- i.def(largs$reflines, !inherits(x,"coxph"))
## type
addcomp <- as.logical(i.def(addcomp, FALSE, TRUE, FALSE))
if (addcomp) lty[3] <- max(1,lty[3]-2) ## for terms
## x and y axes
xlabs <- largs$xlabs
if (length(xlabs)>0)
if (length(xlabs)!=length(lvars)) {
warning("argument xlabs has wrong length")
xlabs <- NULL }
if (length(xlabs)==0) xlabs <- lvars
xlabs[xlabs=="(sequence)"] <- "sequence"
xlabs[xlabs=="(weights)"] <- "weights"
names(xlabs) <- lvars
ylabs <- largs$ylabs
if (length(ylabs)==0)
ylabs <- if (addcomp) {
ifelse(terminmodel, paste("Partial for", lvars), "Residuals")
} else "Residuals"
ylabs <- rep(ylabs, length=lnvars)
names(ylabs) <- lvars
## ---
lse <- largs$reflines.se
## -----------------------------------
## data to be plotted
nnls <- !inherits(x, "nlls")
lv <- unique(lrawv[terminmodel])
if (any(terminmodel) && reflines &&lnnls) {
lcmp <- fitcomp(x, vars=lv, xfromdata=FALSE, se=lse,
noexpand=is.fac)
lcompx <- lcmp$x
lcompy <- if (addcomp) lcmp$comp else -lcmp$comp
lcompse <- lcmp$se
if (addcomp) {
lcompdt <-
fitcomp(x, ldt, vars=lv, xfromdata=TRUE)$comp
## !!! add to lres, careful for condq
}
}
lqnt <- if(length(largs$dfres)>0) qt(1-largs$testlevel/2, largs$dfres) else
qnorm(1-largs$testlevel/2)
## --- plot range preparation
## get plot range for residuals, also needed for vars not in model
lylim <- pllimits(largs$ylim, lres, limfac=largs$ylimfac[1])
if(is.matrix(lylim))
lylim <- setNames(rep(list(lylim), ncol(ldt)), colnames(ldt) )
##
lIsmooth <- largs$smooth
if (length(names(smooth.legend))==0) {
lsmlegend <- i.def(smooth.legend, NULL, TRUE, NULL)
if (length(lsmlegend)==1)
lsmlegend <- setNames(lsmlegend, lvars[1])
} else lsmlegend <-
if("(xvars)"%in%names(smooth.legend))
setNames(smooth.legend, lvars[1]) else smooth.legend
## --- multivariate
if (inherits(x,"mlm")) {
lpanel <- function(xx, yy, indx, indy, pch, col, ...) {
lcmpx <- lcmpy <- NULL
ltin <- terminmodel[indx]
lvx <- lvars[indx]
lcnt <- !is.fac[lvx]
if (ltin) {
lcmpy <- lcompy[,lvx,indy]
if (lcnt) lcmpx <- lcompx[,lvx]
}
lsm <- if(is.factor(xx)) FALSE else lIsmooth
i.plotlws(xx,yy, do.smooth = lsm, ## ylim = lreslim[,indy],
smooth.legend=smooth.legend, new=FALSE, main = "",
arguments=largs, ...)
}
plmatrix(ldt[,lvars,drop=FALSE],lres, panel=lpanel,
##pch=largs$plab, plcol=largs$pldata$plcol,
nrow = largs$multnrow, ncol = largs$multncol,
range.=lylim, reference=FALSE,
main=largs$main) # clrsmooth=colors[3]
return()
}
## ------------------------------------------------------------------
lmf <- largs$mf
if (length(lmf)) {
if (is.logical(lmf)&&lmf)
lmf <- if (lnvars<=6) lnvars else
min(lnvars,ceiling(lnvars/((lnvars-1)%/%6+1)))
}
loma <- i.def(largs$oma, c(2,1)*(length(lmf)>0), valuefalse=NULL)
if (length(loma)<4) loma <- c(0,0,loma,0)[1:4]
loldpar <-
if (length(lmf)&(!is.logical(lmf))) {
if (length(lmf)==1) mframe(mft=lmf, oma=loma) else
mframe(lmf[1], lmf[2], oma=loma)
} ## else par(oma=loma) ## , ask=largs$ask
if (length(loldpar)) on.exit(par(loldpar), add=TRUE)
## --- loop --- plresx
for (lj in 1:lnvars) {
lv <- lvars[lj]
lvr <- lrawv[lj]
lcmpj <- terminmodel[lj] && reflines&&lnnls
lci <- if (lcmpj) lcompy[, lvr] else 0 ## !!!
rr <- lres
lylj <- lylim[[lv]]
if (largs$partial.resid)
if (addcomp && lcmpj) {
rr <- rr+lcompdt[, lvr]
if (largs$ylim)
lylj <- pllimits(lylj, rr, limfac=args$ylimfac[2])
}
## ---
if (is.fac[lv]) { # ---
## factors
ff <- factor(ldt[, lv])
ll <- levels(ff)
lnl <- length(ll)
if (largs$mbox) {
if (lIcq) rr <- lres[,"random"]
plmboxes(rr~ff, data=ldt, xlab = xlabs[lv], ylab = ylabs[lv],
refline=0, mar=NULL, ilim=lylj)
} else {
##- xx <- as.numeric(ff)+runif(lnr,-jitter,jitter)
##- xlims <- c(0.5,lnl+0.5)
i.plotlws(ff, rr, xlab = xlabs[lv], ylab = ylabs[lv],
do.smooth = 0, ylim = lylj, reflinex=NULL,
smooth.sim=0, arguments=largs) ## axes=2,
}
## -
if (lcmpj) {
lx <- seq(along = ll)
##- ww <- if (addcomp) match(ll,as.character(ff)) else 1:lnl
lcil <- lci[1:lnl]
if ((!is.null(lylj))&&diff(lylj)) {
lcilp <- plcoord(lcil, lylj, largs$ylimfac, largs$ylimext)
if (any(attr(lcilp,"nmod"))) {
liout <- lcilp!=lcil
segments(lx[liout]-0.4, lcilp[liout], lx[liout]+0.4, lcilp[liout],
lwd = lwd[5]/2, lty=lty[5], col=lclr[5])
lcil[liout] <- NA
}
}
segments(lx-0.4, lcil, lx+0.4, lcil,
lwd = lwd[5], lty=lty[5], col=lclr[5])
if (lse) {
wid <- lqnt * lcompse[1:lnl, lv]
lines(c(rbind(lx-0.1,lx+0.1,lx+0.1,lx-0.1,lx-0.1,NA)),
c(rbind(lcil-wid,lcil-wid,lcil+wid,lcil+wid,lcil-wid,NA)),
lty = lty[6], lwd=lwd[6], col = largs$colors.ra[6])
}
}
} else { # ---
## --- continuous explanatory variable
lrefx <- NULL
lrefyw <- NULL
if (lcmpj) {
lrefx <- lcompx[,lvr]
if (lse) lrefyw <- lqnt*lcompse[,lvr]
## !!! comp + lsimres
}
i.plotlws(as.numeric(ldt[, lv]), rr, xlab = xlabs[lv], ylab = ylabs[lv],
ylim = lylj, do.smooth = lIsmooth,
reflinex=lrefx, refliney=lci, reflineyw=lrefyw,
smooth.sim=largs$smooth.sim, smooth.legend=smooth.legend[lv],
main = largs$main,
arguments=largs )
}
##- i.main(if (lonetitle) lmain else lmain[lj], cex = largs$cex.title,
##- outer.margin = largs$outer.margin)
##- stamp(sure=FALSE)
}
invisible(largs)
}
## ====================================================================
i.plotlws <-
function(x,y, xlab="",ylab="", do.smooth = 2, smooth.power=1,
ylim=NULL, reflinex=NULL, refliney=NULL, reflineyw=NULL,
smooth.sim=0, smooth.legend = FALSE, simy = NULL,
new=TRUE, axes=1:2, main = NULL,
arguments = NULL)
{
## Purpose: panel for residual plots (labels, weights, smooth)
## shows vertical bars if y is a matrix of class condquant
## ----------------------------------------------------------------------
## Arguments:
## !!! simres only works with ncol(y)==1
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 5 May 2004, 06:46
largs <- arguments
if (is.null(largs)) stop("!i.plotlws! argument list not available")
lsmooth <- largs$smooth
lclr <- largs$colors.ra
lIcq <- inherits(y,"condquant")
lnr <- NROW(x)
lplw <- largs$pldata$"(plweights)"
lIplw <- length(lplw)>0
lwgt <- largs$pldata$"(weights)"
lyplext <- largs$plext[3:4]
## lIylim <- !(is.logical(ylim)&&all(!ylim))
## ylim is not NULL nor FALSE
## if (lIylim)
ylim <- pllimits(ylim, y, limfac=largs$ylimfac[1])
lIylim <- NROW(ylim)==2
lrf <- !is.null(reflinex)
lrfyw <- length(reflineyw)>0
smooth.legend <- i.def(unlist(smooth.legend), NULL, TRUE, NULL)
## ---
lplab <- largs$plab ## pldata$"(plab)"
lpclr <- largs$pldata$"(pcol)"
lpclr <- rep(if (length(lpclr)) lpclr else lclr[1], length=lnr)
if (lIcq) {
lcqint <- y[,"prob"]!=0
lpclr <- ifelse(lcqint, lclr[7], lpclr) ## color for random points
}
lpch <- largs$pldata$"(pch)"
if (length(lpch)==0) lpch <- rep(largs$pch, length=lnr)
ltxt <- is.character(lplab)
## --- smooth
if (do.smooth) {
lsmgrp <- largs$pldata$"(smooth.group)"
lIsmgrp <- !is.null(lsmgrp)
lsimy <- if (is.null(simy)) largs$simres else simy
lsmooth <- if(is.list(lsmooth)) lsmooth else
smoothM(x, cbind(y, lsimy), lwgt,
group=lsmgrp, power=smooth.power, band=do.smooth>2,
par=largs$smooth.par, iterations=largs$smooth.iter)
lsmclr <-
if (lIsmgrp) largs$colors.smgrp else rbind(lclr[3:4])
}
## -----------------------------------------------------------------------
## do plot(s) (i.plotlws)
if (lIfac <- is.factor(x)) {
ll <- levels(x)
lnl <- length(ll)
jitter <- i.def(largs$jitter, 0.3*(1-10^(-0.01*pmax(0, largs$nobs-10))) )
x <- as.numeric(x)+runif(lnr,-jitter,jitter)
lxrg <- c(0.5,lnl+0.5)
} else {
lxrg <- range(as.numeric(x), na.rm=TRUE)
lxrg <- lxrg + largs$plext[1:2]*diff(lxrg)*c(-1,1)
}
if (lIcq) { # condquant: only 1 y possible
## ly <- y[,1]
lypl <-
if (lIylim) {
cbind(plcoord(y[,1:4], range=ylim, limext=largs$ylimext,
plext=lyplext), y[,5:6]) ## column "prob" is needed
} else y
} else { # usual case
if (lIylim) {
## ly <- y
lypl <- plcoord(y, range=ylim, limext=largs$ylimext, plext=lyplext)
} else lypl <- y
}
## plotting frame
if (new) {
lyrg <- attr(lypl, "plrange")
if (length(lyrg)==0) {
lyrg <- range(lypl,na.rm=TRUE)
lyrg <- lyrg + lyplext*diff(lyrg)*c(-1,1)
}
plot(lxrg, lyrg, xlab = xlab, ylab = ylab,
type="n", bty="n", axes=FALSE, xaxs="i", yaxs="i")
if (1%in%axes)
if (lIfac) axis(1,labels=ll,at=1:lnl) else axis(1)
## box
if (lIylim & length(attr(lypl,"nmod"))) {
lusr <- par("usr")
box(lty=3)
## inner box
lines(lusr[c(1,1,2,2,1)],
c(max(ylim[1],lusr[3]),min(ylim[2],lusr[4]))[c(1,2,2,1,1)],
xpd=TRUE)
if (2%in%axes) { ## axis labels only in inner range
if (is.null(largs$yaxp)) {
lat <- pretty(ylim, n=6, min.n=5)
lat <- lat[lat>=ylim[1]&lat<=ylim[2]]
axis(2,at=lat)
} else axis(2,yaxp=largs$yaxp)
}
} else {
box()
if (2%in%axes) axis(2)
}
}
abline(0, 0, lty = largs$lty[2], col=lclr[2])
lusr <- par("usr")
## conditional quantiles
if(lIcq) {
## vertical
li <- lcqint & lypl[,"prob"]>=largs$condprobrange[1] &
lypl[,"prob"]<=largs$condprobrange[2]
if (any(li,na.rm=TRUE))
segments(x[li],lypl[li,"lowq"],x[li],lypl[li,"uppq"],
col=largs$colors.ra[9])
## horizontal
ldx <- diff(lusr[1:2])*0.02
segments(x[lcqint]-ldx,lypl[lcqint,"median"],
x[lcqint]+ldx,lypl[lcqint,"median"], col=largs$colors.ra[8])
}
if (lrf) { ## prep reference line
lrfx <- reflinex
lrfy <- refliney
if(length(lrfx)==1) { # given as abline: intercept, slope
lrfy <- c(lrfx)+c(lrfy)*lusr[1:2]
lrfx <- lusr[1:2]
}
if (lrfyw) {
lrfyl <- lrfy-reflineyw*lrfx
lrfyu <- lrfy+reflineyw*lrfx
}
if (lIylim) { ## adjust ref lines to limited y range
if (length(lrfx)==2) {
lrfx <- seq(lusr[1],lusr[2],length=50)
lrfy <- reflinex+refliney*lrfx
if (lrfyw) {
lrfyl <- lrfy-reflineyw*lrfx
lrfyu <- lrfy+reflineyw*lrfx
}
}
lrfyp <- plcoord(lrfy, range=ylim, limext=largs$ylimext,
plext=lyplext)
lrfyl <- lrfy<lrfyp
lrfyh <- lrfy>lrfyp
lrfyr <- lrfy==lrfyp
## draw reference lines
if (any(lrfyr,na.rm=TRUE))
lines(lrfx[lrfyr],lrfy[lrfyr],
lty=largs$lty[5],col=lclr[5],lwd=largs$lwd[5])
if (any(lrfyl,na.rm=TRUE))
lines(lrfx[lrfyl],lrfyp[lrfyl],
lty=largs$lty[5],col=lclr[5],lwd=largs$lwd[5]/2)
if (any(lrfyh,na.rm=TRUE))
lines(lrfx[lrfyh],lrfyp[lrfyh],
lty=largs$lty[5],col=lclr[5],lwd=largs$lwd[5]/2)
if (lrfyw) {
lrfyl[lrfyl<ylim[1]|lrfyl>ylim[2]] <- NA
lrfyu[lrfyu<ylim[1]|lrfyu>ylim[2]] <- NA
}
} else {
lines(lrfx,lrfy, lty=largs$lty[5],col=lclr[5],lwd=largs$lwd[5])
if (lrfyw) {
lines(lrfx,lrfyl,lty=largs$lty[6],col=lclr[6],lwd=largs$lwd[6])
lines(lrfx,lrfyu,lty=largs$lty[6],col=lclr[6],lwd=largs$lwd[6])
}
}
}
## smooth
if(do.smooth) {
lsm <- lsmooth
## plot smooths
lsmx <- lsm$x
if (is.null(dim(lsmx))) dim(lsmx) <- c(length(lsmx),1)
lsmy <- lsm$y
## groups
lgrp <- as.numeric(lsm$group)
if (length(lgrp)==0) {
lgrp <- rep(1, NROW(lsmx))
lngrp <- 1
} else lngrp <- length(unique(lgrp))
if (lIylim) lsmy[lsmy<ylim[1]|lsmy>ylim[2]] <- NA
if (largs$smooth.sim>0) {
for (lgr in 1:lngrp) {
lig <- lgrp==lgr
matlines(lsmx[lig,1], lsmy[lig,-1], col=lsmclr[lgrp,2],
lty=largs$lty[4], lwd=largs$lwd[4])
}
}
for (lgr in 1:lngrp) {
lig <- lgrp==lgr
lines(lsmx[lig,1], lsmy[lig,1], col=lsmclr[lgr,1],
lty=largs$lty[3], lwd=largs$lwd[3])
if (do.smooth>2) { ## quantile smooths
ligi <- lig & lsm$ybandindex
lines(lsmx[ligi,1], lsm$yband[ligi], col=lsmclr[lgr,1],
lty=largs$lty[3], lwd=0.5*largs$lwd[3])
ligi <- lig & !lsm$ybandindex
lines(lsmx[ligi,1], lsm$yband[ligi], col=lsmclr[lgr,1],
lty=largs$lty[3], lwd=0.5*largs$lwd[3])
}
}
}
##- points
lcplab <- largs$cex.plab[1]
lcpch <- largs$cex.plab[2]
lcplab <- rep(lcplab* if (lcplab>0) 1 else -(if(lIplw) lplw else 1),
length=lnr)
lcpch <- rep(lcpch* if (lcpch>0) 1 else -(if(lIplw) lplw else 1),
length=lnr)
if (lIcq) lypl <- lypl[,"random"]
if (length(y)) { ## ???
if (ltxt) {
text(x,lypl, lplab, cex=lcplab, col=lpclr)
lipch <- is.na(lplab)|lplab==""
} else lipch <- rep(TRUE,length(x))
if (any(lipch)) {
if (lIplw)
symbols(x[lipch], lypl[lipch], circles=sqrt(lcpch[lipch]),
inches=par("cin"), fg=lpclr[lipch], add=TRUE)
else
points(x[lipch], lypl[lipch], pch=lpch[lipch], cex=lcpch[lipch],
col=lpclr[lipch])
}
}
##
##- ljx <- ljx+ldj
##- ljrflx <- ljrflx+ldjrflx
##- ljrfly <- ljrfly+ldjrfly
##- }
## --------------------------------
if (do.smooth) {
if (lngrp>1) { ## legend for smooths
if (length(smooth.legend)) {
if (is.character(smooth.legend)&&smooth.legend[1]%in%c("TRUE","FALSE"))
smooth.legend <- as.logical(smooth.legend)
if (is.logical(smooth.legend))
smooth.legend <- if (smooth.legend) "bottomright" else NULL
if (length(smooth.legend))
legend(smooth.legend[1], smooth.legend[2], legend=levels(lsmgrp),
lty=largs$lty[3], ## rep(largs$lty[3],lngrp),
col=largs$colors.smgrp, lwd=3, bg="white", xpd=TRUE)
}
}
}
i.main(if (is.null(main)) largs$main else main, cex = largs$cex.title)
if (new) stamp(sure=FALSE)
NULL ## return last yrange
}
## ==========================================================================
i.plotselect <-
function(plotselect, smooth=2, wgt = FALSE, mult = FALSE,
famgauss = TRUE, famglm = FALSE, famcount = FALSE)
{
## plot selection
lsmdef <- 1+smooth-famglm
lplsel <- c( yfit=0, resfit=lsmdef, absresfit = NA,
absresweights = NA, qq = NA,
leverage = 1, resmatrix = 1, qqmult = 1)
if (length(plotselect)>0) {
lpls <- TRUE
lplnm <- names(plotselect)
if (length(lplnm)==0) {
if (length(plotselect)==length(lplsel))
lplnm <- names(lplsel)
else {
warning(":plot.regr: Inadequate argument plotselect")
lpls <- FALSE}
}
if (lpls) {
if ("default"%in%lplnm) {
lplnm <- setdiff(lplnm, "default")
lplsel[] <- if (plotselect["default"]==0) 0 else
pmin(lplsel,plotselect["default"])
}
lina <- is.na(match(lplnm,c(names(lplsel),"default")))
if (any(lina)) {
warning(":plot.regr: Inadequate elements in plotselect: ",
paste(names(plotselect)[lina], collapse=", "))
lplnm <- lplnm[!lina] }
lplsel[lplnm] <- plotselect[lplnm]
}
}
if (!mult) lplsel[c("resmatrix","qqmult")] <- 0
if (is.na(lplsel["yfit"])) lplsel["yfit"] <- 0
if (is.na(lplsel["resfit"]))
lplsel["resfit"] <- lsmdef * (lplsel["yfit"]==0)
if (is.na(lplsel["absresfit"]))
lplsel["absresfit"] <- !famcount
if (is.na(lplsel["absresweights"]))
lplsel["absresweights"] <- famgauss&wgt
if (is.na(lplsel["qq"])) lplsel["qq"] <- famgauss # how about gamma? !!!
lplsel
}
## ------------------------------------------------------------------------
pllimits <-
function(pllim, data, limfac = 4.0, FUNC=robrange)
{ ## determine inner plot range
## if pllim is a list or a matrix, leave it alone
lIcq <- inherits(data, "condquant")
ldt <- cbind( if (lIcq) c(data[,1:3]) else data )
pllim <- i.def(pllim, TRUE, TRUE, FALSE)
if (length(pllim)>1)
if (any(dim(cbind(pllim))!=c(2,NCOL(ldt)))) {
warning(":plot.regr/pllimits: unsuitable argument pllim ")
pllim <- TRUE
}
if (length(pllim)==1 && is.logical(pllim))
pllim <-
if (pllim) apply(ldt, 2, FUNC, fac=limfac) else
matrix(FALSE, 2, NCOL(ldt))
if ((!is.list(pllim))&length(pllim)==2) pllim <- as.matrix(pllim)
pllim
}
## --------------------------------------------------------------------
i.main <- function(main, line=1-outer.margin, cex=NULL, mincex=0.7,
adj=NULL, outer.margin=NULL, col="black",
doc=getOption("doc"))
{
## Purpose: title
## ----------------------------------------------
scale <- 2.5
minadj <- 0.1
outer.margin <- i.def(outer.margin, par("oma")[3]>0,
valuefalse = FALSE)
if (outer.margin && 1!=prod(par("mfg")[1:2])) return()
lwd <- if(outer.margin) par("mfg")[3] else 1
lcex <- lwd * scale*par("pin")[1]/(par("cin")[1]*nchar(main))
cex <- i.def(cex, max(mincex, min(1.2, lcex)), valuefalse = 0 )
ladj <- i.def(adj, max(minadj,0.5*(cex>mincex)), 0.5, minadj)
ltxt <- main
lmaxchar <- lwd * scale*par("pin")[1]/(cex*par("cin")[1])
if (nchar(ltxt)>lmaxchar) ltxt <- paste(substr(ltxt, 1, lmaxchar-3),"...")
if (length(main)!=0 & cex>0)
mtext(ltxt, 3, line, cex = cex, adj=ladj, outer = outer.margin,
col=col)
if ((!is.null(doc))&&doc&&length(tit(main)))
mtext(tit(main), 3, line-1, outer = outer.margin, col=col)
}
## -----------------------------------------------------------------------
i.argPlrControl <-
c( "x", "formula", "data", "rawvars", "smooth.group", "plweights",
"plab", "pch", "pcol", "cex.plab", "cex.pch", "glm.restype",
## "fit", "hat", "stresiduals",
"markprop", "smooth", "smooth.par",
"smooth.iter", "smooth.sim", "leveragelim", "mbox",
"lty", "lwd",
"colors.ra", "colors.smgrp", "smooth.pale",
"ylim", "ylimfac", "ylimext",
"plext", "yaxp", "main", "cex.title", "xlabs", "ylabs",
"mf", "mfcol", "multnrow", "multncol", "oma", ## "ask",
"smresid", "reflines", "reflines.se", "partial.resid", "cookdistlines")
## ==========================================================================
smoothRegr <-
function(x, y, weights=NULL, par=5*length(x)^log10(1/2), iterations=50,
minobs=NULL, ...)
{
minobs <- i.def(minobs, getUserOption("smoothMinobs"), valuefalse=NULL)
if (is.null(minobs)) minobs <- 8
if (length(x)-sumna(x)<minobs ) return(NULL)
iterations <- max(iterations, 1)
## ----------------------------------------------------------------
lcl <- call("loess", formula=y~x, weights=weights, span=par,
iterations=iterations,
family=if (iterations>0) "symmetric" else "gaussian",
na.action=na.exclude)
if (is.null(weights)) lcl$weights <- NULL
lsm <- if (u.debug()) eval(lcl) else try(eval(lcl), silent=TRUE)
if (class(lsm)=="try-error") {
warning(":smoothRegr: span was too small. Using 0.99")
lcl$span <- 0.99
lsm <- eval(lcl)
}
fitted(lsm)
}
## ========================================================================
smoothM <-
function(x, y, weights=NULL, band=FALSE, group=NULL, power=1,
resid="difference", par=5*length(x)^log10(1/2),
parband = par*2^log10(2), iterations=50, ...)
{
## Purpose: smooth for multiple y : one column from data, the other sim
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 9 Feb 2016, 14:57
lsmfunc <- getUserOption("smoothFunction")
if (is.null(lsmfunc)) lsmfunc <- smoothRegr else
if (is.character(lsmfunc)) lsmfunc <- get(lsmfunc)
power <- i.def(power, 1,1,1)
## ---
lnx <- NROW(x)
ly <- cbind(y)
if (nrow(ly)!=lnx) stop("!smoothM! Incompatible dimensions of 'x' and 'y'")
## if (length(weights)<=1) weights <- rep(1, lnx)
lIwgt <- length(weights)>0
if (lIwgt&&length(weights!=lnx))
stop("!smoothM! Incompatible dimensions of 'x' and 'weights'")
if (ljnogrp <- length(group)<=1) group <- rep(1, lnx)
if (length(group)!=lnx)
stop("!smoothM! Incompatible dimensions of 'x' and 'group'")
lgrp <- factor(group)
if (is.character(resid))
resid <- pmatch(resid, c("difference","ratio"))
if (is.na(resid)) {
warning(":smoothM: argument 'resid' not suitable.",
" Difference residuals calculated")
resid <- 1
}
lnna <- apply(is.finite(cbind(x,y)), 1,all)
x[!lnna] <- NA
lio <- order(as.numeric(lgrp),x)
lio <- lio[!is.na(x[lio])]
lxo <- x[lio] # sorted without NA
lyo <- ly[lio,,drop=F]
lgrpo <- lgrp[lio]
lgrpn <- as.numeric(lgrpo)
lwgto <- if(lIwgt) weights[lio] else NULL
## production
oldopt <- options(warn=-1)
on.exit(options(oldopt))
lysm <- array(NA, dim=dim(lyo), dimnames=dimnames(lyo))
## presently only for matrices
if (band) lysmband <- lsmrpos <- lysm[,1]
for (lgr in seq_along(levels(lgrpo))) { ## smooth within groups (if >1)
lig <- which(lgrpn==lgr)
for (j in ncol(lyo):1) {
lsm <- lsmfunc(lxo[lig], lyo[lig,j]^power,
weights=if(lIwgt) lwgto[lig] else NULL,
par=par, iterations=iterations, ...)
if (length(lsm)) lysm[lig,j] <- lsm^(1/power)
}
if (band & length(lsm)) {
lysmb <- lsm
lsmr <- lyo[lig,1]-lsm^(1/power)
lii <- lsmr>=0
lsmrh <- lsmr[lii]
ligi <- lig[lii]
lsmh <- lsmfunc(lxo[ligi], sqrt(lsmrh),
if (lIwgt) weights=lwgto[ligi] else NULL,
par=parband, iterations=iterations)
if (length(lsmh)) lysmb[lii] <- lsmh^2
lii <- lsmr<=0
lsmrl <- - lsmr[lii]
ligi <- lig[lii]
lsml <- lsmfunc(lxo[ligi], sqrt(lsmrl),
if (lIwgt) weights=lwgto[ligi] else NULL,
par=parband, iterations=iterations)
if (length(lsml)) lysmb[lii] <- - lsml^2
lysmband[lig] <- lysmb + lsm
lsmrpos[lig] <- !lii
}
}
lysmin <- matrix(NA, lnx, ncol(lyo), dimnames=list(names(x),colnames(lyo)))
lysmin[lio,] <- lysm
lres <- if (resid==2) ly/lysmin else ly-lysmin
rr <- list(x = lxo, y = lysm, group = if(!ljnogrp) factor(lgrpo) else NULL,
index = lio, xorig = x, ysmorig = lysmin, residuals = lres)
if (band) rr <- c(rr, yband = list(lysmband), ybandindex = list(lsmrpos) )
rr
}
## ====================================================================
## smoothRegrrob <- function(x,y,weights,par=3*length(x)^log10(1/2),iter=50)
## ==========================================================================
simresiduals <- function(object, ...) UseMethod("simresiduals")
## Purpose: simulate residuals according to regression model
## by permuting residuals of actual model or by random numbers
## ----------------------------------------------------------------------
## Arguments: simfunction: how are residuals generated?
## ---------------------------------------------------------------------
simresiduals.glm <- function(object, nrep=19, simfunction=NULL,
glm.restype="working", ...)
{
lcall <- object$call
if ("weights"%in%names(lcall)) {
warning(":simresiduals: I cannot simulate for weighted regression (yet)")
## get_all_vars contains weights without parentheses -> danger!
return(NULL)
}
loverd <- attr(object$scale, "fixed")
if (length(loverd) && !loverd)
warning(":simresiduals: Cannot simulate from overdispersed model.",
" Using dispersion 1")
lcall[[1]] <- as.name("glm")
## -------
lnaaction <- object$na.action
ldata <- object$allvars
if (is.null(ldata)) {
ldata <- if (u.debug()) eval(lcall$data) else
try(eval(lcall$data))
if (class(ldata)=="try-error"||is.null(dim(ldata))) {
warning(":simresiduals: data not found -> No simulated residuals")
return(NULL)
}
}
if (length(lnaaction)) ldata <- ldata[-lnaaction,]
lfit <- object$fitted.values
## prepare call
lform <- update(formula(object), .Y. ~.)
lynm <- all.vars(lform[[2]])
environment(lform) <- environment()
lcl <- call("glm", formula=lform, data=as.name("ldata"),
family=object$family, start=object$coef, model=FALSE,
y=FALSE, na.action=lcall$na.action)
lfam <- object$distrname
if (length(lfam)==0) lfam <- object$family$family
if (is.null(lfam)) lfam <- ""
ly <- object$response
ln <- nrow(ldata)
lone <- rep(1,ln)
if (!is.function(simfunction)) {
if(lfam%in%c("binomial","quasibinomial")) {
if (NCOL(ly)==1 && length(unique(ly))!=2) {
warning(":simresiduals: binomial distribution with ",
"unsuitable response.\n No residuals simulated")
return(list(simres=numeric(0)))
}
simfunction <-
if(NCOL(ly)==1) function(n, fit, sig=NULL) rbinom(n, lone, fit)
else {
lnbin <- ly[,1]+ly[,2]
function(n, fit, sig=NULL) {
ly1 <- rbinom(n, lnbin, fit)
cbind(N1=ly1,N2=lnbin-ly1)
}
}
} else {
if (lfam%in%c("poisson","quasipoisson"))
simfunction <- function(n, fit, sig) rpois(ln, fit)
else {
warning(":simresiduals: not (yet) available for this ",
"type of model.\n No residuals simulated")
return(list(simres=numeric(0)))
}
}
}
## ---
lsimres <- matrix(NA, ln, nrep)
for (lr in 1:nrep) {
ldata$.Y. <- simfunction(ln, lfit)
lrs <- eval(lcl, environment())
lsimres[,lr] <-
if (substr(glm.restype,1,4)=="cond")
regr0:::residuals.polr(lrs)[,"random"] else
residuals(lrs, type=glm.restype)
}
naresid(lnaaction, lsimres)
}
## ==========================================================================
simresiduals.default <-
function(object, nrep=19, simfunction=NULL, stres=TRUE, ...)
## glm.restype="deviance")
{
## Purpose: simulate residuals according to regression model
## by permuting residuals of actual model or by random numbers
## ----------------------------------------------------------------------
## Arguments: simfunction: how are residuals generated?
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 10 Aug 2008, 07:55
##- if (!class(object)[1]%in%c("regr","lm","glm")) {
##- warning(":simresiduals: ",
##- "I can simulate only for `regr`, `lm`, or `glm` objects")
##- return(NULL)
##- }
if (!inherits(object, c("lm", "lmrob")))
stop("!simresiduals! I cannot simulate for model class ",
paste(class(object), collapse=" "))
lcall <- object$call
if ("weights"%in%names(lcall)) {
warning(":simresiduals: I cannot simulate for weighted regression (yet)")
## get_all_vars contains weights without parentheses -> danger!
return(NULL)
}
lnostres <- !u.true(stres)
lnaaction <- object$na.action
ldata <- object$allvars
if (is.null(ldata)) {
ldata <- if (u.debug()) eval(lcall$data) else
try(eval(lcall$data))
if (class(ldata)=="try-error"||is.null(dim(ldata))) {
warning(":simresiduals: data not found -> No simulated residuals")
return(NULL)
}
}
if (length(lnaaction)) ldata <- ldata[-lnaaction,]
lfit <- object$fitted.values
if (is.null(lfit)) lfit <- object$linear.predictors
if (is.null(lfit)) lfit <- 0
## -------
if (lrgen <- length(simfunction)>0) {
if (!is.function(simfunction)) simfunction <- rnorm
## ---
## weibull not yet implemented
lsig <- object$sigma
if (length(lsig)!=1) lsig <- 1 ## only standardized res useful!
lres <- object$residuals
if (length(lres)==0) lres <- rep(0, nrow(ldata)) ## needed to eliminate NAs
} else {
lres <- object$stres * object$sigma
if (lnostres <- (length(lres)==0)||all(!is.finite(lres)))
lres <- residuals(object)
if (inherits(lres, "condquant"))
lres <- structure(lres[,"random"], names=row.names(lres))
if (length(lres)==0||all(lres==lres[1])) {
warning(":simresiduals: no (distinct) residuals found",
"-> No simulated residuals")
return(NULL)
}
}
if (nrow(ldata)!=length(lres)) {
li <- match(names(lres),row.names(ldata))
if (anyNA(li)) {
warning(":simresiduals: data not suitable -> No simulated residuals")
return(NULL)
}
ldata <- ldata[li,]
}
##!!! weights
lina <- is.na(lres)
if (any(lina)) {
lres <- lres[!lina]
ldata <- ldata[!lina,]
lfit <- rep(lfit,length=length(lres))[!lina]
}
if (nrow(ldata)<=2) {
warning(":simresiduals: <=2 residuals found -> No simulated residuals")
return(NULL)
}
## ---
## prepare call
lcall$data <- as.name("ldata")
lform <- formula(object)
lynm <- all.vars(lform[[2]])
environment(lform) <- environment()
lcall$formula <- lform
lcall <- lcall[names(lcall)%nin%c("yy","fname","family","vif",
"calcdisp","suffmean","termtable")]
lcall$model <- NULL
lcall$termtable <- NULL
lnrow <- nrow(ldata)
lsimres <- matrix(NA,lnrow,nrep)
lfam <- object$distrname
if (length(lfam)==0) lfam <- object$family$family
## if (lfam%in%c("gaussian","Gaussian"))
lcall$formula <- update(lform, paste(lynm,"~.")) ## needed for transformed y
lsimstres <- if (lnostres) NULL else lsimres
for (lr in 1:nrep) {
ldata[,lynm] <-
if (lrgen) simfunction(lnrow,lfit,lsig) else lfit + sample(lres)
## this would not work with polr or other matrix residuals
lenv <- environment()
lrs <- eval(lcall, envir=lenv) ## update(x, formula=lfo, data=ldata)
lrsr <- residuals(lrs)
if (inherits(lrsr, "condquant")) lrsr <- lrsr[,"random"]
lsimres[,lr] <- lrsr
if (!lnostres) lsimstres[,lr] <- naresid(lrs$na.action,lrs$stres)
}
structure(naresid(lnaaction, lsimres), stres=naresid(lnaaction, lsimstres))
}
## =======================================================================
plmatrix <-
function(x, y=NULL, data=NULL, panel=panelDefault,
nrow=0, ncol=nrow, save=TRUE, robrange.=FALSE, range.=NULL,
pch=1, col=1, reference=0, ltyref=3,
log="", xaxs="r", yaxs="r",
xaxmar=NULL, yaxmar=NULL, xlabmar=NULL, ylabmar=NULL,
vnames=NULL, main="", xlab=NULL, ylab=NULL, cex=NA,
cex.points=NA, cex.lab=1, cex.text=1.3, cex.title=1,
bty="o", oma=NULL, mar=rep(0.2,4), keeppar=FALSE,
axes=TRUE, ...) ## ask = par("ask"),
{
## Purpose: pairs with different plotting characters, marks and/or colors
## showing submatrices of the full scatterplot matrix
## possibly on several pages
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: 23 Jul 93; minor bug-fix+comments:
## M.Maechler
lf.axis <- function(k, axm, labm, at=at, txt, ...) {
if (k %in% axm) axis(k)
if (k %in% labm)
mtext(txt, side=k, line=(0.5+1.2*(k %in% axm)), at=at)
}
##
oldpar <- par(c("mfrow","mar","cex","mgp")) ##, "ask"
lmfg <- par("mfg")
on.exit(if (!keeppar) par(oldpar))
##---------------------- preparations --------------------------
## data
if (is.formula(x)) {
ld <- model.frame(x, data, na.action=NULL)
if (length(x)>2) { ## response last
ld1 <- as.data.frame(ld[,1])
if (NCOL(ld1)==1) names(ld1) <- names(ld)[1] else
if (colnames(ld1)[1]=="V1") ## rectify names
names(ld1) <- extractNames(names(ld[1]), names(ld1))
ld <- cbind( ld[,-1,drop=FALSE], ld1 )
}
x <- ld
} ## else ld <- x
ldata <- x <- cbind(x)
nv1 <- ncol(x)
lv1 <- lv2 <- 0
if (is.null(y)) {
if (save) { nv1 <- nv1-1; lv2 <- 1 }
nv2 <- nv1
} else { # cbind y to data for easier preparations
save <- FALSE
if (is.formula(y)) {
ld <- model.frame(y,data, na.action=NULL)
if (length(y)>2) ## response last
y <- cbind( ld[,-1], as.data.frame(ld[,1]) ) else y <- ld
}
ldata <- cbind(x, y)
nv2 <- NCOL(y)
lv2 <- nv1
}
nvv <- ncol(ldata)
tnr <- nrow(ldata)
## variable labels
lvn <- colnames(ldata)
if (is.null(lvn)) lvn <- paste("V",1:nvv)
lvnm <- lvn
if (!is.null(vnames)) {
vnames <- rep(vnames,length=nvv)
lvnm[!is.na(vnames)] <- vnames[!is.na(vnames)]
}
lvsurv <- sapply(ldata, function(x) inherits(x, "Surv") )
if (any(lvsurv)) {
lf.surv <- function(dt) structure(dt[,1], pch=dt[,2]+1)
ldata[lvsurv] <- lapply(ldata[lvsurv], lf.surv)
}
vnames <- lvnm
lxlab <-
if (is.null(xlab)) vnames[lv1+1:nv1] else rep(xlab, length=nv1)
lylab <-
if (is.null(ylab)) vnames[lv2+1:nv2] else rep(ylab, length=nv2)
## plotting characters, color
lpch <- eval(as.expression(substitute(pch)), data, parent.frame())
if (length(lpch)>tnr) lpch <- lpch[1:tnr]
lcol <- eval(as.expression(substitute(col)), data, parent.frame())
if (length(lcol)>tnr) lcol <- lcol[1:tnr]
## range
rg <- matrix(nrow=2,ncol=nvv,dimnames=list(c("min","max"),lvn))
if(is.matrix(range.)) {
if (is.null(colnames(range.))) {
if (ncol(range.)==ncol(rg)) rg[,] <- range. else
warning(":plmatrix: argument 'range.' not suitable. ignored")
} else {
lj <- match(colnames(range.),lvn)
if (anyNA(lj)) {
warning(":plmatrix: variables ", paste(colnames(range.)[is.na(lj)],collapse=", "),
" not found")
if (any(!is.na(lj))) rg[,lj[!is.na(lj)]] <- range.[,!is.na(lj)]
}
}
}
else
if (length(range.)==2&&is.numeric(range.)) rg[,] <- matrix(range.,2,nvv)
lna <- which(apply(is.na(rg),2, any))
if (length(lna)){
ldt <- ldata[,lna,drop=FALSE]
lfac <- !sapply(ldt, is.numeric)
if (any(lfac)) { ## factors
rg[,lna[lfac]] <-
rbind(0.2, apply(ldt[,lfac, drop=FALSE], 2,
function(x) length(unique(x))+0.8 ) )
lna <- lna[!lfac]
ldt <- ldt[,!lfac,drop=FALSE]
}
if (any(lna))
rg[,lna] <-
apply(ldt, 2,
if(robrange.) robrange else range, na.rm=TRUE, finite=TRUE)
}
colnames(rg) <- lvn
## reference lines
tjref <- (length(reference)>0)&&!(is.logical(reference)&&!reference)
if (tjref) {
if(length(reference)==1) lref <- rep(reference,length=nvv) else {
lref <- rep(NA,nvv)
lref[match(names(reference),lvn)] <- reference
}
names(lref) <- lvn
}
## ask?
##- if (length(ask)==0) {
##- ask <- getOption("ask")
##- if(length(ask)==0)
##- ask <- interactive() && last(c("",names(dev.list())))%in%c("X11cairo")
##- }
##- par(ask=ask)
## plot
jmain <- !is.null(main)&&main!=""
lpin <- par("pin")
lnm <- if (nv1==6 && nv2==6) c(6,6) else {
if (lpin[1]>lpin[2]) c(6,8) else c(8,6) }
lnr <- nrow
if (u.nuna(lnr)||lnr<1)
lnr <- min(nv2,ceiling(nv2/((nv2-1)%/%lnm[2]+1)))
lnc <- ncol
if (u.nuna(lnc)||lnc<1)
lnc <- min(nv1,ceiling(nv1/((nv1-1)%/%lnm[1]+1)))
if (u.nuna(xaxmar)) xaxmar <- 1+(nv1*nv2>1)
## if (anyNA(xaxmar)) xaxmar <- 1+(nv1*nv2>1)
xaxmar <- ifelse(xaxmar>1,3,1)
if (u.nuna(yaxmar)) yaxmar <- 2+(nv1*nv2>1)
## if (anyNA(yaxmar)) yaxmar <- 2+(nv1*nv2>1)
yaxmar <- ifelse(yaxmar>2,4,2)
if (u.nuna(xlabmar)) xlabmar <- if (nv1*nv2==1) xaxmar else 4-xaxmar
if (u.nuna(ylabmar)) ylabmar <- if (nv1*nv2==1) yaxmar else 6-yaxmar
if (length(oma)!=4)
oma <- c(2+(xaxmar==1)+(xlabmar==1), 2+(yaxmar==2)+(ylabmar==2),
1.5+(xaxmar==3)+(xlabmar==3)+cex.title*2*jmain,
2+(yaxmar==4)+(ylabmar==4))
# oma <- 2 + c(0,0,!is.null(main)&&main!="",1)
if (!keeppar) par(mfrow=c(lnr,lnc))
par(oma=oma*cex.lab, mar=mar, mgp=cex.lab*c(1,0.5,0))
if (keeppar) par(mfg=lmfg, new=FALSE)
if (!is.na(cex)) cex.points <- cex
if (is.na(cex.points)) cex.points <- max(0.2,min(1,1.5-0.2*log(tnr)))
##
## log
if (length(grep("x",log))>0) ldata[ldata[,1:nv1]<=0,1:nv1] <- NA
if (length(grep("y",log))>0) ldata[ldata[,lv2+1:nv2]<=0,lv2+1:nv2] <- NA
npgr <- ceiling(nv2/lnr)
npgc <- ceiling(nv1/lnc)
##----------------- plots ----------------------------
for (ipgr in 1:npgr) {
lr <- (ipgr-1)*lnr
for (ipgc in 1:npgc) {
lc <- (ipgc-1)*lnc
if (save&&((lr+lnr)<=lc)) break
for (jr in 1:lnr) { #-- plot row [j]
jd2 <- lr+jr
j2 <- lv2 + jd2
if (jd2<=nv2) v2 <- ldata[,j2]
for (jc in 1:lnc) { #-- plot column [j2-lv2] = 1:nv2
jd1 <- lc+jc
j1 <- lv1 + jd1
if (jd2<=nv2 & jd1<=nv1) {
v1 <- ldata[,j1]
plot(NA,NA, type="n", xlab="", ylab="", axes=FALSE,
xlim <- rg[,j1], ylim <- rg[,j2],
xaxs=xaxs, yaxs=yaxs, log=log, cex=cex.points)
if (axes) {
usr <- par("usr")
at=c(mean(usr[1:2]),mean(usr[3:4]))
if (jr==lnr||jd2==nv2)
lf.axis(1, xaxmar, xlabmar, at[1], lxlab[j1-lv1])
if (jc==1) lf.axis(2, yaxmar, ylabmar, at[2], lylab[j2-lv2])
if (jr==1) lf.axis(3, xaxmar, xlabmar, at[1], lxlab[j1-lv1])
if (jc==lnc||jd1==nv1)
lf.axis(4, yaxmar, ylabmar, at[2], lylab[j2-lv2])
}
box(bty=bty)
if (is.character(all.equal(v1,v2))) { # not diagonal
panel(v1,v2, indx=jd1,indy=jd2, pch=lpch, col=lcol, ...)
if (tjref) abline(h=lref[j1],v=lref[j2],lty=ltyref)
}
else { uu <- par("usr") # diagonal: print variable name
text(mean(uu[1:2]),mean(uu[3:4]), vnames[j1], cex=cex.text) }
}
else frame()
}
}
if (jmain) mtext(main,3,oma[3]*0.9-2*cex.title,outer=TRUE,cex=cex.title)
##- stamp(sure=FALSE,line=par("mgp")[1]+0.5)
stamp(sure=FALSE,line=oma[4]-1.8) # ??? why does it need so much space?
}}
"plmatrix: done"
}
## ====================================================================
panelDefault <- function(xx, yy, indx, indy, pch=par("pch"), col=par("col"),
cex=par("cex"), size=NULL, ...) {
if (is.character(xx)) xx <- factor(xx)
if (is.character(yy)) yy <- factor(yy)
if (is.factor(xx)) {
if (is.factor(yy)) {
lsize <- if (is.null(size))
min(par("pin"))/(4*max(length(levels(xx)),length(levels(yy))))
else size
sunflowerplot(yy~xx, add=T, col=col, size=lsize)
}
else
plmboxes(yy~xx, data.frame(xx=xx,yy=yy), add=TRUE, ...)
}
else {
if (is.factor(yy)) yy <- as.numeric(yy)
if (is.character(pch)) text(xx,yy,pch,col=col,cex=cex)
else points(xx,yy,pch=pch,col=col,cex=cex,...)
}
}
## ====================================================================
panel.smooth <-
function(xx, yy, indx, indy, pch=par("pch"), col=par("col"),
cex=par("cex"), ...)
graphics::panel.smooth(xx, yy, pch=pch, col=col, cex=cex, ...)
## ====================================================================
plmbox <- function(x, at=0, probs=NULL, outliers=TRUE, na.pos=NULL,
width=1, wfac=NULL, minheight= NULL, adj=0.5, extquant=TRUE,
ilim=NULL, ilimext=0.05, widthfac=c(max=2, med=1.3, medmin=0.3, outl=NA),
colors=c(box="lightblue2",med="blue",na="gray90"), lwd=c(med=3, range=2),
warn=options("warn") )
{
## Purpose: multi-boxplot
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 14 Dec 2013, 10:09
f.box <- function(wid, quant, col, lwmax) {
if (wid>lwmax) {
polygon(at+lwmax*lpos, quant, col="black")
if (is.na(col)||col==0) col="white"
polygon(at+lpos*lwmax^2/wid, quant, col=col)
} else
if(wid>0) polygon(at+wid*lpos, quant, col=col)
}
lq <- lwid <- NULL
lfac <- 0
lx <- x[!is.na(x)]
if (length(lx)==0) {
if (warn>=0) warning(":plmbox: no non-missing data")
} else {
stopifnot(length(width)==1,length(wfac)<=1)
if (is.null(probs))
probs <- if (sum(!is.na(x))<20) c(0.1,0.5,1)/2 else
c(0.05,0.1,0.25,0.50,0.75,1)/2
lprobs <- if (all(probs<=0.5)) c(probs,1-probs) else c(probs)
lprobs <- sort(unique(lprobs))
colors <- as.list(colors)
box.col <- colors[["box"]]
if (length(box.col)==1)
box.col <- ifelse(0.25<=last(lprobs,-1) & lprobs[-1]<=0.75, box.col, NA)
## values for degenerate case
lxsd <- IQR(lx)
lfac <- if (is.null(wfac)) width*2*lxsd else wfac*length(lx)
# was mad/dnorm(0)
lmed <- median(lx)
lwmed <- width
lrg <- range(lx)
lrgd <- diff(lrg)
loutl <- lx
lwoutl <- widthfac["outl"]
if (lrgd > 0) { ## non-degenerate
if (is.null(minheight))
minheight <- if (lxsd==0) lrgd*0.02 else lxsd*0.01
lqy <- lq <- quinterpol(lx, probs=lprobs, extend=extquant)
if (length(ilim)) {
lrg <- plcoord(lrg, range=ilim, limext=ilimext)
lqy <- plcoord(lqy, range=ilim, limext=ilimext)
}
loutl <- lx[lx<min(lq)|lx>max(lq)]
## ---
lwid <- lfac*diff(lprobs)/pmax(diff(lq), minheight)
## lxsd <- IQR(x, na.rm=TRUE)
lwmax <- widthfac["max"]*lfac*0.5/ifelse(lxsd>0, lxsd, 1)
lwmed <- max(widthfac["med"]*min(lwmax,max(nainf.exclude(lwid))),
widthfac["medmin"],na.rm=TRUE)
lpos <- c(-adj,-adj,1-adj,1-adj)
if (is.na(lwoutl)) lwoutl <- 0.1*lwmax
## ---
for (li in 1:(length(lprobs)-1))
f.box(lwid[li], lqy[li+c(0,1,1,0)], box.col[li], lwmax)
} ## else warning(":plmbox: degenerate group. no mbox")
## median
lines(at+lwmed*c(-adj,1-adj), rep(lmed,2), col=colors[["med"]],
lwd=lwd["med"])
lines(c(at,at), # +linepos*0.01*diff(par("usr")[1:2])*(0.5-adj),
lrg, lwd=lwd["range"])
if (outliers&&length(loutl)) {
lat <- rep(at,length(loutl))
segments(lat-lwoutl*adj, loutl, lat+lwoutl*(1-adj), loutl)
}
}
if (!is.null(na.pos)) {
lmna <- mean(is.na(x))
if (lmna) {
ldna <- diff(na.pos)
if (length(ldna)==0 || is.na(ldna) || ldna==0)
stop("!plmbox! argument 'na.pos' not suitable")
lwidna <- lfac*lmna/abs(ldna)
f.box(lwidna, na.pos[c(1,2,2,1)], colors[["na"]], lwmax)
}
}
invisible(structure(lfac/length(x), q=lq, width=lwid))
}
## ====================================================================
plmboxes <- function(formula, data, width=1, at=NULL,
probs=NULL, outliers=TRUE, na=FALSE,
refline=NULL, add=FALSE, ilim=NULL, ilimfac=4, ilimext=0.05,
xlim=NULL, ylim=NULL, axes=TRUE, xlab=NULL, ylab=NULL,
labelsvert=FALSE, mar=NULL,
widthfac=NULL, minheight=NULL, colors=NULL, lwd=NULL, ...)
{
## Purpose: multibox plot
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 14 Dec 2013, 23:38
f.ylim <- function(ylm, ext)
c((1+ext)*ylm[1]-ext*ylm[2], (1+ext)*ylm[2]-ext*ylm[1])
formula <- as.formula(formula)
if (length(formula)<3) stop("!plmboxes! formula must have left hand side")
## widths
lwfac <- modarg(widthfac, c(max=2, med=1.3, medmin=0.3, outl=NA, sep=0.003))
## colors, widths
lcol <- modarg(colors,
c(box="lightblue",med="blue",na="gray90",refline="magenta") )
llwd <- modarg(lwd, c(med=3, range=2))
## data
if (length(dim(data))!=2||nrow(data)==0)
stop("!plmboxes! Argument 'data' has dimension ",
paste(dim(data),colapse=" "))
ldt <- model.frame(formula, data, na.action=na.pass)
ly <- ldt[,1]
##
l1asymbox <- length(formula[[3]])>1 && as.character(formula[[3]][[2]])=="1"
if (l1asymbox) ldt <- data.frame(ldt[,1],0,ldt[,2])
## preliminary
lx <- ldt[,2] <- factor(ldt[,2]) # unused levels are dropped
lxx <- ldt[,-1]
llr <- ncol(ldt)>2 ## asymmetrix mboxes required for binary factor
llist <- split(ly,lxx)
if (l1asymbox) {
llev <- ""
llev2 <- c(levels(ldt[[3]]),"","")[1:2]
} else llev <- levels(lx)
lng <- length(llev)
lnn <- sapply(llist,length)
lsd <- mean(sapply(llist,mad,na.rm=TRUE),na.rm=TRUE)
width <- rep(width, length=lng)
lfac <- width*lsd/(max(lnn)*(1+llr))
if (is.null(minheight)) {
lscales <- sapply(llist, IQR, na.rm=TRUE)
minheight <- median(lscales)*0.02
}
## labels
if (is.null(xlab)||is.na(xlab)) {
xlab <- as.character(formula[[3]])
if (length(xlab)>1) xlab <- xlab[2]
if (xlab=="1") xlab <- ""
}
if (is.null(ylab)) ylab <- as.character(formula[[2]])
## position
if (is.null(at)) at <- 1:lng else
if (length(at)!=lng) {
warning(":plmboxes: 'x' has wrong length")
at <- at[1:lng] ## may produce NAs
}
## probabilities
if (is.null(probs))
probs <- if (sum(!is.na(ly))/(lng*(1+llr))<20) c(0.1,0.5,1)/2 else
c(0.05,0.1,0.25,0.50,0.75,1)/2
## box for NA's?
if (is.null(na)||is.na(na)||(is.logical(na)&&!na)) na.pos <- NULL else
if (is.logical(na))
na.pos <- c(min(ly, na.rm=TRUE)*(1-0.3)-0.3*max(ly, na.rm=TRUE))
if (length(na.pos)==1)
na.pos <- na.pos+ 0.03*diff(range(ly,na.rm=TRUE))*c(-1,1)
lusr <- par("usr")
## plot range
lrg <- if (add) lusr[3:4] else range(ly, na.pos, na.rm=TRUE, finite=TRUE)
ilim <- i.def(ilim)
if (is.logical(ilim)) ilim <- pllimits(ilim, ly, limfac=ilimfac)
if (length(ilim)!=2 || (is.numeric(ilim)&&(ilim[1]>=ilim[2]))) {
warning(":plmboxes: unsuitable argument 'ilim'")
ilim <- NULL
}
if (is.null(ilim)||all(!ilim)) ilim <- lrg
ljlim <- ilim[1]<lrg[1] | ilim[2]<lrg[2] ## inner range is actif
if (add) {
xlim <- lusr[1:2]
ylim <- lusr[3:4]
} else {
if(u.nuna(xlim)) xlim <- ## better: NAs -> default value
range(at, na.rm=TRUE)+ max(width[c(1,length(width))])*c(-1,1)*0.5
if(u.nuna(ylim)) ylim <- f.ylim(ilim,ilimext)
## margins
if (is.null(mar)||is.na(mar)) {
mar <- c(ifelse(labelsvert, min(7,1+1.1*max(nchar(llev))), 4), 4,4,1)
oldpar <- par(mar=mar)
on.exit(par(oldpar))
} else par(mar=rep(mar,length=4))
##- if(is.null(xlim)) xlim <-
##- range(at, na.rm=TRUE)+ max(width[c(1,length(width))])*c(-1,1)*0.5
## ---------------------------------
plot(xlim, ylim, type="n", axes=FALSE, xlab="", ylab=ylab, mar=mar, ...)
if (axes) {
axis(1, at=at, labels=llev, las=1+2*labelsvert)
lat <- pretty(f.ylim(lrg, ilimext)) #, n=7,n.min=5
if(!is.null(na.pos)) {
lat <- lat[lat>max(na.pos)]
mtext("NA",2,1,at=mean(na.pos),las=1)
}
if (l1asymbox) {
mtext(llev2[1], 1,1, at=0.75)
mtext(llev2[2], 1,1, at=1.25)
}
axis(2, at=lat)
if (ljlim) { ## inner and outer box
box(lty=3)
lines(par("usr")[c(1,2,2,1,1)],ilim[c(1,1,2,2,1)])
} else box()
}
mtext(xlab, 1, par("mar")[1]-1)
} # if (!add)
## ---
if (!is.null(refline))
abline(h=refline, col=lcol[["refline"]], lty=3, lwd=1.5)
## ---
lusrd <- diff(par("usr")[1:2])
lsep <- lwfac["sep"]*llr*lusrd
lwoutl <- lwfac["outl"]
if (is.na(lwoutl)) {
lwoutl <- 0.05*lusrd
lwfac["outl"] <- lwoutl/lng
}
if (llr) lwfac[c("medmin","outl")] <- lwfac[c("medmin","outl")] /2
## ------------
for (li in 1:lng) {
if (is.na(at[li])) next
if (length(lli <- llist[[li]]))
plmbox(lli,at[li]-lsep, probs=probs, outliers=outliers, wfac=lfac[li],
## adj=0.5*(1+llr), na.pos=na.pos, minheight=minheight, extquant=TRUE,
adj=1-0.5*(1-llr), na.pos=na.pos, extquant=TRUE,
ilim=if(ljlim) ilim, ilimext=ilimext,
widthfac=lwfac, colors=lcol, lwd=llwd, warn=-1)
if (llr) ## second half of asymmetrix mbox
if (length(llir <- llist[[li+lng]]))
plmbox(llir,at[li]+lsep,probs=probs, outliers=outliers, wfac=lfac[li],
adj=0, na.pos=na.pos, extquant=TRUE,
ilim=ilim, ilimext=ilimext,
widthfac=lwfac, colors=lcol, warn=-1)
}
invisible(at)
}
## ===========================================================================
plres2x <-
function(formula=NULL, reg=NULL, data=reg, restricted=NULL, size = 0,
slwd = 1, scol = 2, xlab = NULL, ylab= NULL, xlim=NULL, ylim=NULL,
main = NULL, cex.title= NULL, ...)
{
## Purpose: plot residuals vs. two x`s
## Author: ARu , Date: 11/Jun/91
## Aenderungen: MMae, 30/Jan/92, Dez.94
## --------------------------------------------------------------------------
## Arguments:
## formula z~x+y, where
## x, y coordinates of points given by two vector arguments.
## z gives orientation (by sign)
## and size (by absolute value) of symbol.
## reg regression results
## data data
## you must specify either reg or data
## restricted absolute value which truncates the size.
## The corresponding symbols are marked by stars.
## size the symbols are scaled so that "size" is the size of
## the largest symbol in cm.
## main main title, defaults to the formula
## ... additional arguments for the S-function `plot`
## the function currently only plots z for the first two terms of the
## right hand side of formula
## --------------------------------------------------------------------------
lform <- as.formula(formula)
if (length(reg)==0) {
if (length(data)==0) stop("either reg or data must be specified")
ldata <- data
if (length(lform)<3)
stop ("left hand side of formula is missing. Did you mean to use reg results?")
} else
if (inherits(reg,"lm")) {
ldata <- eval(reg$call$data)
##- ldata <- eval(parse(text=as.character(reg$call[3])))
lftext <- deparse(formula(reg))
if (length(formula)==0) lform <- formula(reg)[c(1,3)]
if (length(lform)<3) {
lform <- update.formula(lform,residuals~.)
lrs <- resid(reg)
if (length(lrs)!=nrow(ldata)) {
ldata <- ldata[names(lrs),]
if (nrow(ldata)!=length(lrs)) stop("!plres2x! residuals and data incompatible")
}
ldata <- data.frame(ldata,residuals=resid(reg))
}
} else stop("!plres2x! unsuitable argument reg")
lftext <- deparse(lform)
if (length(main)==0) main <- lftext
if (is.logical(main)) main <- if (main) lftext else ""
main <- as.character(main)
if (length(cex.title)==0) cex.title <- max(0.5, min(1.2,
par("mfg")[4]*par("pin")[1]/(par("cin")[1]*nchar(main))))
if (!is.data.frame(ldata)) {
if(is.matrix(data)) ldata <- as.data.frame(data) else
stop("data is not a data.frame") }
ld <- model.frame(lform,ldata)
ld <- nainf.exclude(ld)
z <- ld[,1]
x <- as.numeric(ld[,2])
y <- as.numeric(ld[,3])
if (length(xlim)==0) xlim <- range(x)
if (length(ylim)==0) ylim <- range(y)
##--- restrict z values: ---
if(length(restricted)==0) restr <- FALSE else {
restr <- abs(z) > restricted
z <- pmin( pmax( z, -restricted), restricted) }
## size
if (is.null(size)||is.na(size)||size<=0) size <- 5/log10(length(x))
lpin <- par("pin")
fx <- (size * diff(xlim))/100
fy <- fx/diff(xlim)*diff(ylim)/lpin[2]*lpin[1]
##--
if (length(xlab)==0) xlab <- names(ld)[2]
if (length(ylab)==0) ylab <- names(ld)[3]
plot(x, y, xlim = xlim + c(-1,1)* fx, ylim = ylim + c(-1,1)* fy, pch = ".",
xlab=xlab, ylab=ylab, main="", ...)
##---------------
##--- draw symbols: ---
z <- z/max(abs(z), na.rm = TRUE)
usr <- par("usr")
sxz <- fx * abs(z)
syz <- fy * z
segments(x - sxz, y - syz, x + sxz, y + syz, lwd = slwd, col=scol)
##--- mark restricted observations: ---
if(any(restr)) {
points((x - sxz)[restr], (y - syz)[restr], pch= 8, mkh = 1/40)
points((x + sxz)[restr], (y + syz)[restr], pch= 8, mkh = 1/40)
}
if (length(main)>0) mtext(main, 3, 1, cex=cex.title*par("cex"))
stamp(sure=FALSE)
"plres2x done"
}
## ==========================================================================
plfitpairs <- function(object, ssize=0.02, main=NULL) #, pch=NULL
{
## Purpose: pairs plot of fitted values for multinomial regression
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 5 Aug 2004, 10:54
if (is.null(main)) main <- paste("fitted prob.",object$formula)
lpr <- object$fitted.values
lny <- ncol(lpr)
ly <- object$y
if(length(ly)==0) stop("!plfitpairs! no response values found")
ly <- as.numeric(factor(object$y))
##- if (is.factor(ly)) ly <- as.numeric(factor())
if (max(ly)!=lny)
stop("!plfitpairs! ncol of fitted values != number of levels in y")
## if (length(pch)<lny) pch <- 1:lny
lmx <- max(lpr)
l.panel <- function(x,y,indx,indy,ly,col, ssize) {
lix <- indx==ly
liy <- indy==ly
x[!(lix|liy)] <- NA
segments(x-ssize*lix,y-ssize*liy,x+ssize*lix,y+ssize*liy,col=col)
abline(1,-1,lty=3)
}
plmatrix(lpr, panel=l.panel, pch=ly, range.=c(0,lmx), main=main, ssize=ssize)
"plfitpairs done"
}
## ===================================================================
plTA.polr <- function(object, colbars=grey(0.7), colref=grey(0.7),
ploty=FALSE)
{
## Purpose: plot "conditional median" residuals against fit for
## cumulative logit model
## ----------------------------------------------------------------------
## Arguments:
## object result of polr
## colbars color to be used for plotting residuals
## colref color to be used for plotting the reference line
## ploty if TRUE, the latent response will be plotted instead of the
## residuals
## Remark: experimental function !!!
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 2 Oct 2007, 12:18
if (length(object$call$weights))
warning(":plTA.polr: Weigths are not visualized in the plot.")
lyy <- residuals.polr(object)
lfit <- lyy[,"fit"]
if (ploty) lyy <- lyy[,c("median","lowq","uppq")]+lfit
plot(range(lfit),range(lyy[,1:3]),type="n", # robrange(lyy[,1:3],fac=4),
xlab="fit",ylab=if (ploty) "latent variable" else "residual")
if (ploty) abline(0,1,col=colref) else
abline(h=c(0,-1,1)*qlogis(0.975),col=colref)
for (lk in 1:length(object$zeta))
if (ploty) abline(h=object$zeta[lk],col=colref,lty=5) else
abline(object$zeta[lk],-1,col=colref,lty=5)
segments(lfit,lyy[,"lowq"],lfit,lyy[,"uppq"],col=colbars)
points(lfit,lyy[,"median"],pch="-",cex=1.5)
ls <- loess(lyy[,"median"]~lfit,span=0.7)
lx <- seq(min(lfit),max(lfit),length=51)
lsy <- predict(ls,newdata=data.frame(lfit=lx),family="symmetric")
lines(lx,lsy,col="red")
}
## ===========================================================================
## additional useful functions
## ===========================================================================
dropdata <- function(data, rowid=NULL, incol="row.names", colid=NULL)
{
## Purpose: drop observations from a data frame
## ----------------------------------------------------------------------
## Author: Werner Stahel
li <- lj <- NULL
lattr <- attributes(data)
lattr <- lattr[is.na(match(names(lattr),
c("dim","dimnames","row.names","names")))]
ln <- NROW(data)
if (!is.null(rowid)) {
lrn <- RNAMES(data)
if (is.null(lrn)) lrn <- as.character(1:NROW(data))
if (incol=="row.names")
li <- match(as.character(rowid),lrn,nomatch=0)
else {
incol <- if (is.numeric(incol)) (1:ncol(data))[incol] else
match(incol, colnames(data))
if (is.na(incol)) stop("misspecified argument `incol`")
li <- match(rowid,data[,incol],nomatch=0)
}
if (any(li==0)) warning(":dropdata: observations",
paste(rowid[li==0],collapse=", "),"not found")
li <- li[li>0]
if (!is.null(li)) {
data <- cbind(data)[-li,]
names(li) <- lrn[li]
}
}
## drop variables
if (!is.null(colid)) {
lj <- match(as.character(colid),names(data),nomatch=0)
if (any(lj==0)) warning(":dropdata: variables ",
paste(colid[lj==0],collapse=", ")," not found")
lj <- lj[lj>0]
if (!is.null(lj)) data <- data[,-lj,drop=FALSE]
}
if (length(li)==0&length(lj)==0) {
warning(":dropdata: no data to be dropped")
return(data)
}
if (length(li)) {
if (length(li)==NROW(data)) warning(":dropobs: no observations left")
if (length(lattr$na.action)) {
lin <- which(naresid(lattr$na.action, 1:ln%in%li))
names(lin) <- lrn[li]
li <- c(lattr$na.action, lin)
}
class(li) <- "exclude"
lattr$na.action <- li
}
attributes(data) <- c(attributes(data),lattr)
data
}
## ======================================================================
subset <- function(x, ...) {
## function subset that preserves attributes 'doc' and 'tit'
lattr <- attributes(x)[c("doc","tit")]
lsubs <- base::subset(x, ...)
attributes(lsubs) <- c(attributes(lsubs),lattr)
lsubs
}
## ======================================================================
showd <- function(data, first=3, nrow.=4, ncol.=NULL)
{
## print some rows (and columns) of a matrix or data.frame
ldoc <- getUserOption("doc")
if (length(ldoc)>0 && ldoc && length(tit(data))>0) {
cat("tit: ",tit(data),"\n")
}
lldim <- length(dim(data))
if (lldim>2) stop("!showd not yet programmed for arrays")
if (lldim>0) cat("dim: ",dim(data),"\n") else
if (is.factor(data)) data <- as.character(data)
ldata <- cbind(data)
l.nr <- nrow(ldata)
l.nc <- ncol(ldata)
if (is.null(colnames(ldata))) colnames(ldata) <- paste("c",1:l.nc,sep=".")
## select columns
l.ic <- if (length(ncol.)==0) 1:l.nc else {
if (length(ncol.)==1) {
if (l.nc>ncol.)
c(seq(1,by=l.nc%/%ncol.,length=ncol.-1),l.nc) else 1:l.nc
} else {
lic <- ncol.[ncol.>0&ncol<=l.nc]
if (length(lic)>0) lic else 1:l.nc
}
}
## select rows
if (l.nr<=nrow.+first) l.dc <- format(ldata[,l.ic, drop=FALSE]) else {
l.ir <- c(1:first,round(seq(first,l.nr,length=nrow.+1))[-1])
l.ir <- unique(c(last(l.ir,-1),l.nr))
l.dc <- data.frame(u.merge(format(ldata[l.ir,l.ic]),"",after=first),
stringsAsFactors=FALSE)
names(l.dc) <- colnames(ldata)[l.ic]
lrn <- row.names(ldata)
if (is.null(lrn)) lrn <- paste("r",1:l.nr,sep=".")
row.names(l.dc) <- c(lrn[1:first],"...", lrn[l.ir[-(1:first)]])
}
## was vector or array with only 1 column
if (l.nc==1) {
if (lldim>0) cat(" transposed column\n")
row.names(l.dc) <-
format(rbind(row.names(l.dc),l.dc[,1]),justify="right")[1,]
l.dc <- t(l.dc)
}
print(l.dc,quote=FALSE)
if (length(ldoc)&&ldoc&&length(doc(data)))
cat("\ndoc: ",paste(doc(data),collapse="\n "),"\n")
invisible(l.dc)
}
## -------------------------------------------------------------------------
mframe <-
function(mfrow=NULL, mfcol=NULL, mft=NULL, row=TRUE, oma=c(0,0,2,1),
mar=getUserOption("mar"), mgp=getUserOption("mgp"), ...)
{
## Purpose: par(mfrow...)
## Author: Werner Stahel, 1994 / 2001
if (length(mfrow)==2) {
mfcol <- mfrow[2]
mfrow <- mfrow[1]
}
if (is.null(mft)) {
if (is.null(mfrow)) mfrow <- 1
if (is.null(mfcol)) mfcol <- 1
} else {
t.din <- par("din")
if (is.null(mfrow))
mfrow <- max(1,ceiling(sqrt(mft*t.din[2]/t.din[1])))
mfcol <- ceiling(mft/mfrow)
mfrow <- ceiling(mft/mfcol)
}
mfrow <- max(1,mfrow)
mfcol <- max(1,mfcol)
t.oma <- if (mfrow*mfcol>1) oma else rep(0,4)
if (length(mar)==0) mar <- c(3,3,1,1)+0.5 else
mar <- rep(mar, length=4)
if (length(mgp)!=3) mgp <- c(2,0.8,0) else
invisible(if(row)
par(mfrow=c(mfrow,mfcol), oma=oma, mar=mar, mgp=mgp, ...) else
par(mfcol=c(mfrow,mfcol), oma=oma, mar=mar, mgp=mgp, ...) )
}
## ==========================================================================
robrange <-
function(data, trim=0.2, fac=3, na.rm=TRUE)
{
lna <- any(!is.finite(data))
if (lna) {
if(!na.rm) stop("!robrange! 'data' contains NAs")
data <- data[is.finite(data)]
}
ln <- length(data)
if (is.character(data)|length(data)==0) stop("!robrange! invalid data")
trim <- c(trim, 0.2)[1]
if (!is.finite(trim)) trim <- 0.2
lmn <- mean(data,trim=trim)
lds <- sort(abs(data-lmn))
lnt <- ceiling((1-trim)*ln)
if (lnt<3 | lnt==ln) {
warning(":robrange: not enough valid data. returning ordinary range")
lsd <- Inf } else {
lsd <- fac*sum(lds[1:lnt]/(lnt-1))
if (lsd==0) {
warning(":robrange: robust range has width 0. returning ordinary range")
lsd <- Inf }
}
c(max(lmn-lsd,min(data)), min(lmn+lsd,max(data)))
}
## ==========================================================================
stamp <- function(sure=TRUE, outer.margin = NULL,
project=getUserOption("project"), step=getUserOption("step"),
stamp=getUserOption("stamp"), ...)
{
## Purpose: plot date and project information
## -------------------------------------------------------------------------
## Arguments:
## sure if F, the function only plots its thing if getOption("stamp")>0
## outer if T, the date is written in the outer margin
## project project title
## step title of step of data analysis
## ... arguments to mtext , e.g., line=3
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: 13 Aug 96, 09:00
if (length(stamp)==0) {
message("stamp() setting userOptions(stamp=1)")
userOptions(stamp=1)
stamp <- 1
}
if (length(outer.margin)==0) outer.margin <- par("oma")[4]>0
t.txt <- date()
t.txt <- paste(substring(t.txt,5,10),",",substring(t.txt,22,23),"/",
substring(t.txt,13,16),sep="")
if (length(project)>0) t.txt <- paste(t.txt,project,sep=" | ")
if (length(step)>0) t.txt <- paste(t.txt,step,sep=" | ")
if( sure | stamp==2 | ( stamp==1 & (
## last figure on page
{ t.tfg <- par("mfg") ; all(t.tfg[1:2]==t.tfg[3:4]) }
|| (is.logical(outer.margin)&&outer.margin) )) )
mtext(t.txt, 4, cex = 0.6, adj = 0, outer = outer.margin, ...)
invisible(t.txt)
}
## =======================================================================
quinterpol <- function(x, probs = c(0.25,0.5,0.75), extend=TRUE)
{
## Purpose:
## ----------------------------------------------------------------------
## Arguments:
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 15 Nov 2014, 16:04
lx <- x[!is.na(x)]
ltb <- table(lx)
ln <- length(lx)
lnn <- length(ltb)
ln1 <- lnn+1
lxx <- as.numeric(names(ltb))
lxm <- (lxx[-1]+lxx[-lnn])/2
lx0 <- if(extend) 2*lxx[1]-lxm[1] else lxx[1]
lx1 <- if(extend) 2*lxx[lnn]-lxm[lnn-1] else lxx[lnn]
lxe <- c(rbind(c(lx0,lxm),lxx),lx1)
lp <- c(0,cumsum(ltb)/ln)
lpp <- (lp[-1]+lp[-ln1])/2
lpe <- c(rbind(lp,c(lpp,1))) ## last element (1) is ineffective
ld <- outer(probs,lpe,"-")
li <- apply(ld>0,1,sum)
lii <- 1:length(probs)
ldd <- cbind(ld[cbind(lii,li)],ld[cbind(lii,li+1)])
lh <- ldd[,1]/(ldd[,1]-ldd[,2])
lxe[li]*(1-lh) + lxe[li+1]*lh
}
## =======================================================================
quantilew <- function(x, probs=c(0.25,0.5,0.75), weights=1, na.rm=FALSE)
{
## Purpose: quantile with weights, crude version
## -------------------------------------------------------------------------
## Arguments:
## -------------------------------------------------------------------------
## Author: KSK Projekt, Date: 14 Dec 1999, 12:02
probs <- probs[!is.na(probs)]
if (length(weights)==1) return(quantile(x, probs))
if (length(weights)!=length(x))
stop("!quantilew! lengths of 'x' and 'weights' must be equal")
if (any(t.ina <- is.na(x))) {
if (!na.rm) stop("!quantilew! NAs not allowed while 'na.rm' is FALSE")
x <- x[!t.ina]
weights <- weights[!t.ina]
}
t.i <- order(x)
t.d <- x[t.i]
t.wg <- cumsum(weights[t.i])/sum(weights)
t.i1 <- apply(outer(t.wg,probs,"<"),2,sum)+1
t.i2 <- pmin(apply(outer(t.wg,probs,"<="),2,sum)+1,length(t.d))
(t.d[t.i1]+t.d[t.i2])/2
}
## ===========================================================================
getUserOption <- function (x, default = NULL)
{
if (is.null(default))
return(userOptions(x))
if (x %in% names(userOptions())&&!is.null(userOptions(x)))
userOptions(x)
else default
}
userOptions <- function (x=NULL, default=NULL, list=NULL, ...)
{
##- lpos <- find("UserOptions")
##- luopt <- get("UserOptions", pos=lpos)
luopt <- if (exists("UserOptions", where=1)) get("UserOptions", pos=1) else
UserOptions
if ((!is.null(x)&&is.character(x))) ## asking for options
return(if(length(x)==1) luopt[[x]] else luopt[x])
if (!is.null(default)) {
default <- as.character(default)
if (default=="TRUE"|default=="all") return(userOptions(list=UserDefault))
if (default=="unset")
userOptions(list=UserDefault[names(UserDefault)%nin%names(luopt)])
if (!is.character(default))
stop("!userOptions! Unsuitable argument default .")
return(userOptions(list=UserDefault[
default[default%in%names(UserDefault)]]))
}
lop <- c(list,list(...))
## show all options
if (length(lop)==0) return(luopt[order(names(luopt))])
## set options
lold <- luopt[names(lop)]
for (li in names(lop))
luopt[li] <- list(lop[[li]])
assign("UserOptions", luopt, pos=1)
## assignInMyNamespace does not work
invisible(lold)
}
## -----------------------------------------------------
c.colors.ra <- c("gray3","gray2","blue","cyan","darkgreen","green",
"burlywood4","burlywood3","burlywood4")
c.colors <- c("black","firebrick3","deepskyblue3","springgreen3",
"darkgoldenrod3","olivedrab3","purple3","orange3","palegreen3")
UserDefault <- UserOptions <-
list(stamp=1, project="", step="", doc=TRUE, show.termeffects=TRUE,
colors = c.colors, colors.ra = c.colors.ra,
mar=c(3,3,3,1), mgp=c(2,0.8,0), plext=0.05, digits=4,
regr.contrasts=c(unordered="contr.wsum", ordered="contr.wpoly"),
termcolumns=c("coef", "df", "ciLow","ciHigh","R2.x",
"signif", "p.value", "p.symb"),
termeffcolumns="coefsymb",
na.print=".",
smoothFunction="smoothRegr", smoothMinobs = 8,
debug=0
)
##- if (!exists("UserOptions")) UserOptions <- UserDefault else
##- userOptions(default="unset")
## ===========================================================================
modarg <- function(arg=NULL, default) {
if (is.null(arg)) return(default)
if (is.null(names(arg))) {
if (length(arg)>length(default)) {
warning(":modarg: argument too long. I use default")
return(default)
}
names(arg) <- names(default)[1:length(arg)]
}
if (any(i <- names(arg)%nin%names(default))) {
warning(":modarg: argument has unsuitable names: ", names(arg)[i])
arg <- arg[!i]
}
if (length(arg)==0) return(default)
if (is.list(default)) arg <- as.list(arg)
default[names(arg)] <- arg
default
}
## ---------------------------------------------------------
last <-
function(data,n = NULL, ncol=NULL, drop=is.matrix(data))
{
ldim <- dim(data)
if (is.null(ldim)) {
if (is.null(n)) n <- 1
ldt <- length(data)
return(data[sign(n)*((ldt-abs(n)+1):ldt)])
}
if (length(ldim)!=2)
stop ("!last! not programmed for arrays of dimension >2")
if (is.null(n)&is.null(ncol)) n <- 1
if (is.null(n)) n <- ldim[1]
if (is.null(ncol)) ncol <- ldim[2]
data[sign(n)*((ldim[1]-abs(n)+1):ldim[1]),
sign(ncol)*((ldim[2]-abs(ncol)+1):ldim[2]), drop=drop]
}
## ==============================================================
createNAvars <-
function(data, vars=NULL, na.prop=0.1, na.label=".NA.",
na.values=NULL, name.suffix=c(".X",".NA"), append=TRUE, ...)
{
if (is.matrix(data)) data <- as.data.frame(data)
if (!(is.data.frame(data)))
stop("!createNAvars! unsuitable first argument")
ldt <- if (length(vars)) {
if (length(lwr <- setdiff(vars, colnames(data))))
stop("!createNAvars! Variables ",paste(lwr, collapse=", "),
" not in 'data'.")
data[,vars]} else data
if ((!is.numeric(na.prop))||na.prop>=1)
stop("!createNAvars! unsuitable argument 'na.prop'")
lvna <- sumna(data) > max(na.prop,0) * nrow(data)
ldclass <- sapply(data, is.numeric) + 2*sapply(data, is.factor)
if (any(lwr <- ldclass==0))
stop("!createNAvars! Funny variables ", paste(lwr, collapse=", "))
if (any(lnum <- lvna & ldclass==1))
rrx <- xNA(ldt[,lnum, drop=FALSE], na.values=na.values,
name.suffix=name.suffix)
if (any(lfac <- lvna & ldclass==2)) {
rrf <- factorNA(ldt[,lfac, drop=FALSE],na.label=na.label,
...)
}
if (!any(c(lnum,lfac))) {
warning(":createNAvars: no variables had enough NAs to be modified")
return( if(append) data else NULL )
}
if (append) { if (any(lfac)) data[names(rrf)] <- rrf
if (any(lnum)) data.frame(data, rrx) else data
}
else data.frame(rrf,rrx)
}
## --------------------------------------------------------------------
factorNA <- function(data, na.label=".NA.", na.prop=0, ...)
{
if (missing(data)||length(data)==0)
stop("!xNA! Argument 'data' missing, with no default, or NULL")
data <- data.frame(data)
## if (!is.data.frame(data)) stop("!xNA! Argument 'data' ...")
lnalabel <- as.character(na.label)
lvn <- names(data)[sapply(data,is.factor)]
ldt <- data[,lvn, drop=FALSE]
lnanum <- na.prop*nrow(data)
for (lv in seq_along(lvn)) {
lfac <- factor(data[,lvn[lv]], ...)
lna <- is.na(lfac)
if (sum(lna)>lnanum) {
levels(lfac) <- c(levels(lfac), lnalabel)
lfac[lna] <- lnalabel
ldt[,lv] <- lfac
}
}
structure(ldt, NA.label = na.label)
}
## -------------------------------------------------------------------
xNA <-
function(data, na.values=NULL, na.prop=0.1, name.suffix=c(".X",".NA"))
{
if (missing(data)||length(data)==0)
stop("!xNA! Argument 'data' missing, with no default, or NULL")
data <- data.frame(data)
## if (!is.data.frame(data)) stop("!xNA! Argument 'data' ...")
lvn <- names(data)
lnsuff <- if (length(name.suffix)==1) c(name.suffix,".NA") else
c(name.suffix,".X",".NA")[1:2]
lvnx <- paste(lvn, lnsuff[1], sep="")
lvnna <- paste(lvn, lnsuff[2], sep="")
na.values <-
if (is.null(na.values)) apply(data,2,median,na.rm=TRUE) else
rep(na.values, length=length(lvn))
names(na.values) <- lvn
ldt <- data[,1,drop=FALSE]
names(ldt) <- "."
for (lv in seq_along(lvn)) {
lna <- is.na(data[,lv])
if (any(lna)) {
ldt[,lvnx[lv]] <-
structure(ifelse(lna, na.values[lv], data[,lv]), na.value=na.values[lv])
ldt[,lvnna[lv]] <- lna
} else ldt[,lvn[lv]] <- data[,lv]
}
##structure(ldt[,-1, drop=FALSE], xNA.values = na.values)
ldt[, -1, drop=FALSE]
}
## ==============================================================
nainf.exclude <- function (object, ...)
## na.omit, modified to omit also Inf and NaN values
{
if (is.atomic(object)) {
i <- is.finite(object)
if (length(dim(i))) ## matrix
return( object[apply(i,1,all),,drop=FALSE] )
else return( object[i] )
}
## list
n <- length(object)
omit <- FALSE
vars <- seq_len(n)
for (j in vars) {
x <- object[[j]]
if (!is.atomic(x))
next
##- x <- is.na(x)
x <- if (is.numeric(x)) !is.finite(x) else is.na(x)
d <- dim(x)
if (is.null(d) || length(d) != 2)
omit <- omit | x
else for (ii in 1:d[2]) omit <- omit | x[, ii]
}
xx <- object[!omit, , drop = FALSE]
if (any(omit > 0L)) {
temp <- seq(omit)[omit]
names(temp) <- attr(object, "row.names")[omit]
attr(temp, "class") <- "exclude"
attr(xx, "na.action") <- temp
}
xx
}
## ===================================================
notna <- function(x,inf=TRUE) if (inf) x[is.finite(x)] else x[!is.na(x)]
## ===================================================
sumna <- function(object,inf=TRUE)
{
## Purpose: count NAs along columns
## ----------------------------------------------------------------------
## Arguments:
## object data.frame, matrix or vector
## inf treat Inf as NA
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 10 Oct 2007, 08:18
ff <- if(inf) {
function(x)
if(is.numeric(x)) sum(!is.finite(x)) else sum(is.na(x)) }
else function(x) sum(is.na(x))
if (is.matrix(object)) apply(object,2,ff) else {
if (is.list(object)) sapply(object,ff)
else if(is.atomic(object)) ff(object)
}
}
## ==========================================================================
logst <- function(data, calib=data, threshold=NULL, mult=1)
{
## Purpose: logs of data, zeros and small values treated well
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: 3 Nov 2001, 08:22
data <- cbind(data)
calib <- cbind(calib)
lncol <- ncol(calib)
ljthr <- length(threshold)>0
if (ljthr) {
if (is.logical(threshold)&&threshold)
threshold <- attr(data, "threshold")
if (!length(threshold)%in%c(1, lncol))
stop("!logst! argument `threshold` is inadequate")
lthr <- rep(threshold, length=lncol)
ljdt <- !is.na(lthr)
} else {
ljdt <- rep(TRUE, lncol)
lthr <- rep(NA, lncol)
for (lj in 1:lncol) {
lcal <- calib[,lj]
ldp <- lcal[lcal>0&!is.na(lcal)]
if(length(ldp)==0) ljdt[lj] <- FALSE else {
lq <- quantile(ldp,probs=c(0.25,0.75),na.rm=TRUE)
if(lq[1]==lq[2]) lq[1] <- lq[2]/2
lthr[lj] <- lc <- lq[1]^(1+mult)/lq[2]^mult
}
}
}
## transform data
for (lj in 1:lncol) {
if (ljdt[lj]) {
ldt <- data[,lj]
lc <- lthr[lj]
li <- which(ldt<lc)
if (length(li))
ldt[li] <- lc * 10^((ldt[li]-lc)/(lc*log(10)))
data[,lj] <- log10(ldt)
} }
if (length(colnames(data)))
lnmpd <- names(ljdt) <- names(lthr) <- colnames(data) else
lnmpd <- as.character(1:lncol)
if (ncol(data)==1) data <- data[,1]
attr(data,"threshold") <- unname(lthr)
if (any(!ljdt)) {
warning(":logst: no positive data",
if(lncol>1) paste(" for variables ",lnmpd[!ljdt],
". These are not transformed") else ". No transformation")
attr(data,"transformed") <- unname(ljdt)
}
data
}
## ===========================================================================
asinp <- function(x) asin(sqrt(x/100))/asin(1)
## asinperc <- asinp ## compatibility
## ===========================================================================
plcoord <-
function(x, range=NULL, limfac=3.0, limext=0.1, plext=0.05)
{
## Purpose: values for plot with limited "inner" plot range
lrg <- if (length(notna(range))==0)
robrange(x, fac=limfac) else range(range, na.rm=TRUE)
if (length(lrg)==0) lrg <- range(x, na.rm=TRUE)
if (diff(lrg)==0) lrg <- c(-1,1)*lrg
rr <- pmax(pmin(x,lrg[2]),lrg[1])
lxd <- x-rr
lnmod <- c(sum(lxd<0,na.rm=TRUE),sum(lxd>0,na.rm=TRUE))
lrgext <- diff(lrg)*limext
if (sum(lnmod)>0) rr <- rr+lxd/(1+abs(lxd)/lrgext)
attr(rr,"plrange") <- lrg + ifelse(lnmod>0, lrgext, plext*lrgext)*c(-1,1)
attr(rr,"range") <- lrg
attr(rr,"nmod") <- lnmod
class(rr) <- class(x)
rr
}
## ===========================================================================
legendr <- function(x=0.05,y=0.95,legend, ...) {
lusr <- par("usr")
lx <- lusr[1] + x*diff(lusr[1:2])
ly <- lusr[3] + y*diff(lusr[3:4])
legend(lx,ly,legend, ...)
}
## ====================================================================
i.def <- function(arg, value = TRUE, valuetrue = value, valuefalse = FALSE)
{
rr <- arg
if (length(arg)==0 || all(is.na(arg)))
{ rr <- value
} else {
if (length(arg)==1 && is.logical(arg))
rr <- if (arg) valuetrue else valuefalse
}
rr
}
## ===========================================================================
doc <- function(x) attr(x,"doc")
## ---
"doc<-" <- function(x, value)
{
##-- Create doc attribute or PREpend new doc to existing one.
value <- as.character(value)
attr(x, "doc") <- if (length(value)==0) NULL else
if(value[1]=="^") value[-1] else c(value, attr(x, "doc"))
x
}
## ---
tit <- function(x) attr(x,"tit")
## ---
"tit<-" <- function(x, value) ## ! argument must be `value`. demanded by attr
{
attr(x, "tit") <- value
x
}
## ---
is.formula <- function(object)
length(class(object))>0 && class(object)=="formula"
## ----
factor.na <- function(x, ordered=FALSE, naname="NA") {
if (ordered) x <- ordered(x)
if (is.ordered(x)) {
levels(x) <- c(levels(x), naname)
x[is.na(x)] <- naname
return(x)
}
x <- as.character(x)
x[is.na(x)] <- naname
factor(x)
}
## ---------------------------------------
quantNA <- function(vn, data, na.value=NULL) {
if (missing(data)) stop("!quantNA! Argument 'data' missing, woth no default")
dname <- as.character(substitute(data))
if (is.character(vn)) {
if (any(lvna <- vn%nin%names(data)))
stop("!quantNA! Variable(s) ",lv[lvna]," not in 'data")
} else vn <- names(data)[vn]
na.value <- if (is.null(na.value))
apply(data[,vn, drop=FALSE],2,median,na.rm=TRUE) else
rep(na.value, length=length(lv))
names(na.value) <- vn
for (lv in vn) {
lna <- is.na(data[,lv])
if (any(lna)) {
data[,paste(lv, ".NA", sep="")] <- lna
data[lna, lv] <- na.value[lv]
}
}
assign(dname, data, pos=1)
}
## =================================================================
## auxiliary functions
## ============================================================
nafalse <- function(x) if (is.null(x)) FALSE else ifelse(is.na(x), FALSE, x)
Surv <- survival::Surv
u.true <- function(x) length(x)>0 && (!is.na(lx <- as.logical(x[1]))) && lx
u.debug <- function() u.true(getUserOption("debug"))
u.merge <- function(dd1, dd2 = NA, which=NULL, after=NULL,
length=NULL, names=NULL)
{
## Purpose: merge two vectors or expand a vector by NA s
## -------------------------------------------------------------------------
## Arguments:
## dd1 first vector or matrix or data.frame (?),
## dd2 second vector, ...
## which is T for indices for which first vector is used
## after elements of dd2 will be inserted after "after" in dd1
## length length of the result (will be expanded if necessary)
## names names of the result (if length is adequate)
## -------------------------------------------------------------------------
## Author: Werner Stahel, Date: 11 Mar 93, 13:50, and later
llen <- length
n1 <- length(dd1)
nc1 <- ncol(dd1)
nc2 <- ncol(dd2)
if (length(nc1)>0) {
n1 <- nrow(dd1)
if (!( length(dd2)==1 || is.null(nc2) || nc2==nc1 ))
stop("unsuitable second argument")
}
## --- generate which vector for all cases
if (length(which)==0) {
## - after specified
if (length(after)==0) stop("specify either which or after")
if (is.logical(after)) after <- which(after)
wh <- rep(TRUE,n1+length(after))
wh[after+1:length(after)] <- FALSE }
else {
## - which specified
if(is.logical(which)) wh <- which
else {
if (length(llen)==0) llen <- n1+length(which)
wh <- rep(TRUE, llen)
wh[which] <- FALSE }
}
## --- merge
nn <- length(wh)
n2 <- nn-n1
if (!(is.null(names)|length(names)==nn))
warning("argument names not used (unsuitable length)")
if (length(nc1)>0) {
if (!(length(dd2)==1 || NROW(dd2)==n2))
stop("unsuitable number of rows")
rr <- matrix(NA,nn,nc1)
rr[wh,] <- as.matrix(dd1)
rr[!wh,] <- if (is.data.frame(dd2)) as.matrix(dd2) else dd2
##- if (length(names)>0) row.names(rr) <- names else {
##- if (length(lrn1 <- row.names(dd1))>0)
}
else {
rr <- rep(NA,nn)
rr[wh] <- dd1
rr[!wh] <- dd2
if (length(names)>0) names(rr) <- names
}
rr
}
## ============================================================
i.main <- function(main, line=1-outer.margin, cex=NULL, adj=NULL,
outer.margin=NULL, col="black",
doc=getOption("doc"))
{
## Purpose: title
## ----------------------------------------------
cex <- i.def(cex, max(0.5, min(1.2,
par("mfg")[4]*par("pin")[1]/(par("cin")[1]*nchar(main)))),
valuefalse = 0 )
ladj <- i.def(adj, 0.5*(cex>0.5), 0.5, 0)
outer.margin <- i.def(outer.margin, par("oma")[3]>0,
valuefalse = FALSE)
if (outer.margin && 1!=prod(par("mfg")[1:2])) return()
if (length(main)!=0)
mtext(main, 3, line, cex = cex*par("cex"), adj=ladj, outer = outer.margin,
col=col)
if ((!is.null(doc))&&doc&&length(tit(main)))
mtext(tit(main), 3, line-1, outer = outer.margin, col=col)
}
## ==========================================================================
## ==========================================================================
##- is.R <- function ()
##- exists("version") && !is.null(vl <- version$language) && vl == "R"
extractNames <- function(x, orig=NULL) {
if (length(grep("cbind", x)))
eval( parse(text=
paste('c("',
gsub(" *, *",'","', sub("cbind *\\((.*)\\)", "\\1", x)),
'")', sep="") ) ) else {
if(is.null(orig)) NULL else
paste("V",seq_along(orig), sep="")
}
}
RNAMES <- function(x) if (!is.null(dim(x))) row.names(x) else names(x)
"%nin%" <- function(x,y) !x%in%y
getmeth <- function(fn,mt) getS3method(as.character(substitute(fn)),
as.character(substitute(mt)))
warn <- function()
table(paste(names(lw <- warnings()),"@",substr(unlist(lw),1,10)))
BR <- function() {browser();browser()}
DB <- function(on=TRUE) options(error=if(on) recover else NULL, warn=on)
# options(show.termeffects=TRUE)
IR <- function(condition) {
if (condition) {
cat("INTERRUPT: ",as.character(substitute(condition)))
traceback()
browser()
}
}
u.nuna <- function(x) length(x)==0 || (is.atomic(x)&&any(is.na(x)))
regrModelClasses <- c("regr","lm","glm","survreg","coxph","rq","polr")
## ===========================================================================
if (length(getUserOption("colors"))==0)
userOptions(colors = c.colors)
if (length(getUserOption("colors.ra"))==0)
userOptions(colors.ra = c.colors.ra)
c.weekdays <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
"Saturday", "Sunday")
c.months <- c("January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November",
"December")
c.mon <- substring(c.months,1,3)
drevgumbel <- function (x, location = 0, scale = 1)
{ # from VGAM
if (!nafalse(scale>0))
stop("\"scale\" must be positive")
E <- exp((x - location)/scale)
E * exp(-E)/scale
}
prevgumbel <- function (q, location = 0, scale = 1)
{
if (!nafalse(scale>0))
stop("\"scale\" must be positive")
-expm1(-exp((q - location)/scale)) # expm1(u) = exp(u)-1, accurately also for |u| << 1
}
qrevgumbel <- function (p, location = 0, scale = 1)
{
if (!nafalse(scale>0))
stop("\"scale\" must be positive")
location + scale * log(-log(p))
}
qrevgumbelexp <- function (p) exp(qrevgumbel(p))
rrevgumbel <- function (n, location = 0, scale = 1)
{
if (!nafalse(n>=1))
stop("bad input for argument \"n\"")
if (!nafalse(scale>0))
stop("\"scale\" must be positive")
location + scale * log(-log(runif(n)))
}
## ===========================================================================
## repaired versions of drop functions
## ===========================================================================
##- if(getRversion() <= "2.7.1") {
add1.default <- function(object, scope, scale = 0, test=c("none", "Chisq"),
k = 2, trace = FALSE, ...)
{
if(missing(scope) || is.null(scope)) stop("no terms in scope")
if(!is.character(scope))
scope <- add.scope(object, update.formula(object, scope))
if(!length(scope))
stop("no terms in scope for adding to object")
## newform <- update.formula(object,
## paste(". ~ . +", paste(scope, collapse="+")))
## data <- model.frame(update(object, newform)) # remove NAs
## object <- update(object, data = data)
ns <- length(scope)
ans <- matrix(nrow = ns + 1, ncol = 2,
dimnames = list(c("<none>", scope), c("df", "AIC")))
ans[1, ] <- extractAIC(object, scale, k = k, ...)
n0 <- length(object$residuals)
env <- environment(formula(object))
for(i in seq(ns)) {
tt <- scope[i]
if(trace > 1) {
cat("trying +", tt, "\n", sep="")
utils::flush.console()
}
nfit <- update(object, as.formula(paste("~ . +", tt)),
evaluate = FALSE)
nfit <- eval(nfit, envir=env) # was eval.parent(nfit)
ans[i+1, ] <- extractAIC(nfit, scale, k = k, ...)
if(length(nfit$residuals) != n0)
stop("number of rows in use has changed: remove missing values?")
}
dfs <- ans[,1] - ans[1,1]
dfs[1] <- NA
aod <- data.frame(Df = dfs, AIC = ans[,2])
test <- match.arg(test)
if(test == "Chisq") {
dev <- ans[,2] - k*ans[, 1]
dev <- dev[1] - dev; dev[1] <- NA
nas <- !is.na(dev)
P <- dev
P[nas] <- pchisq(dev[nas], dfs[nas], lower.tail=FALSE)
aod[, c("LRT", "Pr(Chi)")] <- list(dev, P)
}
head <- c("Single term additions", "\nModel:",
deparse(as.vector(formula(object))),
if(scale > 0) paste("\nscale: ", format(scale), "\n"))
class(aod) <- c("anova", "data.frame")
attr(aod, "heading") <- head
aod
}
## ==================================================================
drop1.default <- function(object, scope, scale = 0, test=c("none", "Chisq"),
k = 2, trace = FALSE, ...)
{
tl <- attr(object$terms, "term.labels")
if(missing(scope)) scope <- drop.scope(object)
else {
if(!is.character(scope))
scope <- attr(terms(update.formula(object, scope)), "term.labels")
if(!all(match(scope, tl, 0L) > 0L))
stop("scope is not a subset of term labels")
}
## data <- model.frame(object) # remove NAs
## object <- update(object, data = data)
ns <- length(scope)
ans <- matrix(nrow = ns + 1, ncol = 2,
dimnames = list(c("<none>", scope), c("df", "AIC")))
ans[1, ] <- extractAIC(object, scale, k = k, ...)
n0 <- nobs(object)
env <- environment(formula(object))
for(i in seq(ns)) {
tt <- scope[i]
if(trace > 1) {
cat("trying -", tt, "\n", sep="")
utils::flush.console()
}
nfit <- update(object, as.formula(paste("~ . -", tt)),
evaluate = FALSE)
nfit <- eval(nfit, envir=env) # was eval.parent(nfit)
ans[i+1, ] <- extractAIC(nfit, scale, k = k, ...)
if (nobs(nfit) != n0)
stop("number of rows in use has changed: remove missing values?")
}
dfs <- ans[1,1] - ans[,1]
dfs[1] <- NA
aod <- data.frame(Df = dfs, AIC = ans[,2])
test <- match.arg(test)
if(test == "Chisq") {
dev <- ans[, 2] - k*ans[, 1]
dev <- dev - dev[1] ; dev[1] <- NA
nas <- !is.na(dev)
P <- dev
P[nas] <- pchisq(dev[nas], dfs[nas], lower.tail = FALSE)
aod[, c("LRT", "Pr(Chi)")] <- list(dev, P)
}
head <- c("Single term deletions", "\nModel:",
deparse(as.vector(formula(object))),
if(scale > 0) paste("\nscale: ", format(scale), "\n"))
class(aod) <- c("anova", "data.frame")
attr(aod, "heading") <- head
aod
}
# ==================================================================
i.polrfit <-
function (formula, data, weights, start, ..., subset, na.action,
contrasts = NULL, Hess = FALSE, model = FALSE, x = TRUE,
method = c("logistic", "probit", "cloglog", "cauchit"),
envir = parent.frame())
## copy of polr from MASS.
## argument x added: keep model.matrix
## 1 line added by WSt to keep eta in the result
## ::: change argument name method:::
{
logit <- function(p) log(p/(1 - p)) ## !!!???!!! use qlogis() --- why?
fmin <- function(beta) {
theta <- beta[pc + 1:q]
gamm <- c(-100, cumsum(c(theta[1], exp(theta[-1]))),
100)
eta <- offset
if (pc > 0)
eta <- eta + drop(x %*% beta[1:pc])
pr <- pfun(gamm[y + 1] - eta) - pfun(gamm[y] - eta)
if (all(pr > 0))
-sum(wt * log(pr))
else Inf
}
gmin <- function(beta) {
jacobian <- function(theta) {
k <- length(theta)
etheta <- exp(theta)
mat <- matrix(0, k, k)
mat[, 1] <- rep(1, k)
for (i in 2:k) mat[i:k, i] <- etheta[i]
mat
}
theta <- beta[pc + 1:q]
gamm <- c(-100, cumsum(c(theta[1], exp(theta[-1]))),
100)
eta <- offset
if (pc > 0)
eta <- eta + drop(x %*% beta[1:pc])
pr <- pfun(gamm[y + 1] - eta) - pfun(gamm[y] - eta)
p1 <- dfun(gamm[y + 1] - eta)
p2 <- dfun(gamm[y] - eta)
g1 <- if (pc > 0)
t(x) %*% (wt * (p1 - p2)/pr)
else numeric(0)
xx <- .polrY1 * p1 - .polrY2 * p2
g2 <- -t(xx) %*% (wt/pr)
g2 <- t(g2) %*% jacobian(theta)
if (all(pr > 0))
c(g1, g2)
else rep(NA, pc + q)
}
m <- match.call(expand.dots = FALSE)
method <- match.arg(method)
pgumbel <- function(q) exp(pweibull(log(q))) # ???
dgumbel <- function(q) stop("BUG: dgumbel not programmed")
pfun <- switch(method, logistic = plogis, probit = pnorm,
cloglog = pgumbel, cauchit = pcauchy)
dfun <- switch(method, logistic = dlogis, probit = dnorm,
cloglog = dgumbel, cauchit = dcauchy)
if (is.matrix(eval.parent(m$data)))
m$data <- as.data.frame(data)
m$start <- m$Hess <- m$method <- m$model <- m$... <- m$envir <- NULL
m[[1]] <- as.name("model.frame")
m <- eval(m, envir=environment())
Terms <- attr(m, "terms")
x.ret <- x ## ! Wst need to copy x because x is used for the matrix
x <- model.matrix(Terms, m, contrasts)
xint <- match("(Intercept)", colnames(x), nomatch = 0)
n <- nrow(x)
pc <- ncol(x)
asgn <- attr(x, "assign") ## ws
cons <- attr(x, "contrasts")
if (xint > 0) {
x <- x[, -xint, drop = FALSE]
pc <- pc - 1
asgn <- asgn[-xint] ## ws
attr(Terms, "intercept") <- 0 ## ws
}
else warning("an intercept is needed and assumed")
wt <- model.weights(m)
if (!length(wt))
wt <- rep(1, n)
offset <- model.offset(m)
if (length(offset) <= 1)
offset <- rep(0, n)
y <- model.response(m)
if (!is.factor(y))
stop("response must be a factor")
lev <- levels(y)
if (length(lev) <= 2)
stop("response must have 3 or more levels")
y <- unclass(y)
q <- length(lev) - 1
Y <- matrix(0, n, q)
.polrY1 <- col(Y) == y
.polrY2 <- col(Y) == y - 1
if (missing(start)) {
q1 <- length(lev)%/%2
y1 <- (y > q1)
X <- cbind(Intercept = rep(1, n), x)
fit <- switch(method,
logistic = glm.fit(X, y1, wt, family = binomial(),
offset = offset),
probit = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset),
cloglog = glm.fit(X, y1, wt, family = binomial("probit"),
offset = offset),
cauchit = glm.fit(X, y1, wt, family = binomial("cauchit"),
offset = offset)
)
if (!fit$converged) { ## new attempt
fit <- lm.fit(X, as.numeric(y), wt)
fit$coefficients <- fit$coef/sqrt(mean(fit$resid^2))
warning("attempt to find suitable starting values may have failed")
}
coefs <- fit$coefficients
if (anyNA(coefs)) {
warning("design appears to be rank-deficient, so dropping some coefs")
keep <- names(coefs)[!is.na(coefs)]
coefs <- coefs[keep]
x <- x[, keep[-1], drop = FALSE]
pc <- ncol(x)
asgn <- asgn[keep[-1]] ## ws
}
spacing <- logit((1:q)/(q + 1))
if (method != "logistic")
spacing <- spacing/1.7
gammas <- -coefs[1] + spacing - spacing[q1]
thetas <- c(gammas[1], log(diff(gammas)))
s0 <- c(coefs[-1], thetas)
}
else if (length(start) != pc + q)
stop("\"start\" is not of the correct length")
else {
s0 <- if (pc > 0)
c(start[seq_len(pc + 1)], diff(start[-seq_len(pc)]))
else c(start[1], diff(start))
}
##
res <- optim(s0, fmin, gmin, method = "BFGS", hessian = Hess,
...)
##
beta <- res$par[seq_len(pc)]
theta <- res$par[pc + 1:q]
zeta <- cumsum(c(theta[1], exp(theta[-1])))
deviance <- 2 * res$value
niter <- c(f.evals = res$counts[1], g.evals = res$counts[2])
names(zeta) <- paste(lev[-length(lev)], lev[-1], sep = "|")
if (pc > 0) {
names(beta) <- colnames(x)
eta <- drop(x %*% beta)
}
else {
eta <- rep(0, n)
}
cumpr <- matrix(pfun(matrix(zeta, n, q, byrow = TRUE) - eta),
, q)
fitted <- t(apply(cumpr, 1, function(x) diff(c(0, x, 1))))
dimnames(fitted) <- list(row.names(m), lev)
##
fit <- list(coefficients = beta, zeta = zeta, deviance = deviance,
linear.predictor = eta, ## added by WSt
fitted.values = fitted, lev = lev, terms = Terms,
df.residual = sum(wt) - pc - q, edf = pc + q, n = sum(wt),
nobs = sum(wt),
call = match.call(), method = method,
convergence = res$convergence,
niter = niter)
if (Hess) {
dn <- c(names(beta), names(zeta))
H <- res$hessian
dimnames(H) <- list(dn, dn)
fit$Hessian <- H
}
if (model) fit$model <- m
if (x.ret)
fit$x <- structure(x, assign=asgn) ## !WSt return model.matrix
## structure(cbind("(Intercept)"=1, x), assign=c("(Intercept)"=0, asgn) )
## , contrasts=cons probably not needed
fit$na.action <- attr(m, "na.action")
fit$contrasts <- cons
fit$xlevels <- .getXlevels(Terms, m)
class(fit) <- "polr"
fit
}
## -------------------------------------------------------
i.multinomfit <-
function (formula, data, weights, subset, na.action, contrasts = NULL,
Hess = FALSE, summ = 0, censored = FALSE, model = FALSE, x = TRUE,
...)
## copy of multinom from MASS. Argument x added by WSt
{
## require(nnet)
class.ind <- function(cl) {
n <- length(cl)
x <- matrix(0, n, length(levels(cl)))
x[(1L:n) + n * (as.integer(cl) - 1L)] <- 1
dimnames(x) <- list(names(cl), levels(cl))
x
}
summ2 <- function(X, Y) {
X <- as.matrix(X)
Y <- as.matrix(Y)
n <- nrow(X)
p <- ncol(X)
q <- ncol(Y)
Z <- t(cbind(X, Y))
storage.mode(Z) <- "double"
z <- .C(nnet:::VR_summ2, as.integer(n), as.integer(p), as.integer(q),
Z = Z, na = integer(1L))
Za <- t(z$Z[, 1L:z$na, drop = FALSE])
list(X = Za[, 1L:p, drop = FALSE], Y = Za[, p + 1L:q])
}
call <- match.call()
m <- match.call(expand.dots = FALSE)
m$summ <- m$Hess <- m$contrasts <- m$censored <- m$model <- m$... <- NULL
m[[1L]] <- as.name("model.frame")
m <- eval.parent(m)
Terms <- attr(m, "terms")
XX <- X <- model.matrix(Terms, m, contrasts) ## !WSt store X
cons <- attr(X, "contrasts")
Xr <- qr(X)$rank
Y <- model.response(m)
if (!is.matrix(Y))
Y <- as.factor(Y)
w <- model.weights(m)
if (length(w) == 0L)
if (is.matrix(Y))
w <- rep(1, dim(Y)[1L])
else w <- rep(1, length(Y))
lev <- levels(Y)
if (is.factor(Y)) {
counts <- table(Y)
if (any(counts == 0L)) {
empty <- lev[counts == 0L]
warning(sprintf(ngettext(length(empty), "group %s is empty",
"groups %s are empty"), paste(sQuote(empty),
collapse = " ")), domain = NA)
Y <- factor(Y, levels = lev[counts > 0L])
lev <- lev[counts > 0L]
}
if (length(lev) < 2L)
stop("need two or more classes to fit a multinom model")
if (length(lev) == 2L)
Y <- as.integer(Y) - 1
else Y <- class.ind(Y)
}
if (summ == 1) {
Z <- cbind(X, Y)
z1 <- cumprod(apply(Z, 2L, max) + 1)
Z1 <- apply(Z, 1L, function(x) sum(z1 * x))
oZ <- order(Z1)
Z2 <- !duplicated(Z1[oZ])
oX <- (seq_along(Z1)[oZ])[Z2]
X <- X[oX, , drop = FALSE]
Y <- if (is.matrix(Y))
Y[oX, , drop = FALSE]
else Y[oX]
w <- diff(c(0, cumsum(w))[c(Z2, TRUE)])
print(dim(X))
}
if (summ == 2) {
Z <- summ2(cbind(X, Y), w)
X <- Z$X[, 1L:ncol(X)]
Y <- Z$X[, ncol(X) + 1L:ncol(Y), drop = FALSE]
w <- Z$Y
print(dim(X))
}
if (summ == 3) {
Z <- summ2(X, Y * w)
X <- Z$X
Y <- Z$Y[, 1L:ncol(Y), drop = FALSE]
w <- rep(1, nrow(X))
print(dim(X))
}
offset <- model.offset(m)
r <- ncol(X)
if (is.matrix(Y)) {
p <- ncol(Y)
sY <- Y %*% rep(1, p)
if (any(sY == 0))
stop("some case has no observations")
if (!censored) {
Y <- Y/matrix(sY, nrow(Y), p)
w <- w * sY
}
if (length(offset) > 1L) {
if (ncol(offset) != p)
stop("ncol(offset) is wrong")
mask <- c(rep(FALSE, r + 1L + p), rep(c(FALSE, rep(TRUE,
r), rep(FALSE, p)), p - 1L))
X <- cbind(X, offset)
Wts <- as.vector(rbind(matrix(0, r + 1L, p), diag(p)))
fit <- nnet.default(X, Y, w, Wts = Wts, mask = mask,
size = 0, skip = TRUE, softmax = TRUE, censored = censored,
rang = 0, ...)
}
else {
mask <- c(rep(FALSE, r + 1L), rep(c(FALSE, rep(TRUE,
r)), p - 1L))
fit <- nnet.default(X, Y, w, mask = mask, size = 0,
skip = TRUE, softmax = TRUE, censored = censored,
rang = 0, ...)
}
}
else {
if (length(offset) <= 1L) {
mask <- c(FALSE, rep(TRUE, r))
fit <- nnet.default(X, Y, w, mask = mask, size = 0,
skip = TRUE, entropy = TRUE, rang = 0, ...)
}
else {
mask <- c(FALSE, rep(TRUE, r), FALSE)
Wts <- c(rep(0, r + 1L), 1)
X <- cbind(X, offset)
fit <- nnet.default(X, Y, w, Wts = Wts, mask = mask,
size = 0, skip = TRUE, entropy = TRUE, rang = 0,
...)
}
}
fit$formula <- attr(Terms, "formula")
fit$terms <- Terms
fit$call <- call
fit$weights <- w
fit$lev <- lev
fit$deviance <- 2 * fit$value
fit$rank <- Xr
edf <- ifelse(length(lev) == 2L, 1, length(lev) - 1) * Xr
if (is.matrix(Y)) {
edf <- (ncol(Y) - 1) * Xr
if (length(dn <- colnames(Y)) > 0)
fit$lab <- dn
else fit$lab <- 1L:ncol(Y)
}
fit$coefnames <- colnames(X)
fit$vcoefnames <- fit$coefnames[1L:r]
fit$na.action <- attr(m, "na.action")
fit$contrasts <- cons
fit$xlevels <- .getXlevels(Terms, m)
fit$edf <- edf
fit$AIC <- fit$deviance + 2 * edf
if (model)
fit$model <- m
class(fit) <- c("multinom", "nnet")
if (Hess)
fit$Hessian <- nnet:::multinomHess(fit, X)
if (x) fit$x <- XX ## !Wst return design matrix
fit
}
## ================================================================
regrAllEqns <-
function(formula, data, weights = NULL, nbest = 50, nvmax = 20,
force.in = NULL, force.out = NULL, codes=NULL, really.big=FALSE,
...)
{
## Purpose: all subsets
## ----------------------------------------------------------------------
## Author: Werner Stahel, Date: 14 Oct 2017, 09:26
really.big <- really.big | nbest<=50
lcall <- match.call()
mm <- lcall[c("","formula","data","weights")]
mm[[1]] <- as.name("model.frame")
lcall$formula <- eval(lcall$formula)
mf <- eval(mm, sys.frame(sys.parent()))
x <- model.matrix(terms(formula, data = data), mf)
lasgn <- attr(x,"assign")
lxnm <- colnames(x)
names(lasgn) <- lxnm
ljint <- "(Intercept)"==lxnm[1]
ltermnm <- c(if(ljint) "(Intercept)", attr(terms(mf),"term.labels"))
y <- model.extract(mf, "response")
wt <- model.extract(mf, "weights")
if (is.null(wt)) wt <- rep(1, length(y))
## force.in and force.out: allow for formulas
if (is.formula(force.in)) {
if (any(lvi <- !(lv <- all.vars(force.in))%in%names(data)))
stop("!regrAllEqns! Variable ", paste(lv[lvi], collapse=", "),
" of argument 'force.in' not in 'data'")
force.in <- setdiff(colnames(model.matrix(force.in, data)),"(Intercept)")
}
if(is.character(force.in)) {
lwrong <- setdiff(force.in, lxnm)
force.in <- which(lxnm%in%force.in)
} else lwrong <- force.in%nin%(1:ncol(x))
if (length(lwrong)) stop(paste("!regrAllEqns! Term ", lwrong,
" of 'force.in' not in model"))
if (is.formula(force.out)) {
if (any(lvi <- !(lv <- all.vars(force.out))%in%names(data)))
warning("!regrAllEqns! Variable ", paste(lv[lvi], collapse=", "),
" of argument 'force.out' not in 'data'")
force.out <- setdiff(colnames(model.matrix(force.out, data)),"(Intercept)")
}
if(is.character(force.out)) {
lwrong <- setdiff(force.out, lxnm)
force.out <- which(lxnm%in%force.out)
} else lwrong <- force.out%nin%1:ncol(x)
if (length(lwrong)) stop(paste("!regrAllEqns! Term ", lwrong,
" of 'force.out' not in model"))
if (ljint) x <- x[,-1]
## ---------------
ls <- leaps::regsubsets(x, y, weights=wt, nbest=nbest, nvmax=nvmax,
force.in=force.in, force.out=force.out, int=ljint,
really.big=really.big, ...)
lss <- summary(ls)
lwhich <- lss$which
## factors: identify models that are unsuitable
lbl <- unique(lasgn[duplicated(lasgn)])
liok <- rep(TRUE, nrow(lwhich))
ljok <- structure(rep(TRUE, ncol(lwhich)),names=colnames(lwhich))
for (lk in lbl) {
lj <- which(lasgn==lk)
lwh <- lwhich[,lxnm[lj]]
liok <- liok & ( apply(lwh,1,sum)%in%c(0,ncol(lwh)) )
ljok[lxnm[lj[-1]]] <- FALSE
}
lwhs <- lwhich[liok,ljok,drop=FALSE]
colnames(lwhs) <- lnm <- ltermnm[ljint+lasgn[colnames(lwhich)]][ljok]
## codes
if (is.null(codes) || (length(codes)==1 & is.na(codes)))
codes <- c(LETTERS,letters)
if (length(names(codes))) {
if (length(lwr <- setdiff(lnm,names(codes)))) {
warning(":regrAllEqns: terms ", paste(lwr, collapse=", "),
" not in 'names(codes)'. names are not used.")
codes <- unname(codes)
}
codes <- codes[lnm]
}
if (is.null(names(codes)))
codes <-
structure(c(if(ljint) "1", rep(codes, length=ncol(lwhs)-ljint)),
names=lnm)
llb <- apply(lwhs,1, function(x) paste(codes[x], collapse="") )
dimnames(lwhs) <- list(llb, lnm)
lout <- lwhs
lout[,] <- c(" ","*")[lwhs+1]
## criteria
ldf <- apply(lwhich, 1, sum)
lcr <- data.frame(df=ldf, lss[c("rsq","rss","adjr2","cp","bic")])
lcrs <- lcr[liok,]
dimnames(lcrs)[[1]] <- llb
lall <- list(criteria=lcr, modsuit=liok, df=apply(lss$which, 1, sum),
lss[c("which", "rsq", "rss", "adjr2", "cp", "bic", "outmat")])
structure(
list(which=lwhs, criteria=lcrs, codes=codes, force.in=force.in,
force.out=force.out, call=lcall, outmat=lout, allsubsets=lall,
obj=lss$obj),
class="regrAllEqns")
}
## ------------------------------------------------------------
regrAllEqnsXtr <- function(object, nbest=1, criterion="cp")
{
## Author: Werner Stahel, Date: 18 Oct 2017, 17:37
lwh <- object$which
lwh <- lwh[order(object$criteria[,criterion])[1:nbest],,drop=F]
rr <- apply(lwh,1, function(x)
update(formula(object),
as.formula(paste("~", paste(setdiff(colnames(lwh)[x], "(Intercept)"),
collapse="+"))) ) )
if (nbest==1) structure(rr[[1]], modelcode=names(rr)) else rr
}
## ---------------------------------------------------------------------
print.regrAllEqns <-
function(x, nbest=20, criterion="cp", printcriteria=FALSE, printcodes=TRUE,
...)
{
## Author: Werner Stahel, Date: 14 Oct 2017, 14:46
li <- order(x$criteria[,criterion])[1:min(nbest,nrow(x$criteria))]
lout <- x$outmat[li,]
colnames(lout) <- x$codes[colnames(lout)]
lout <- cbind(code=row.names(lout), df=as.character(x$criteria[li,"df"]), lout)
row.names(lout) <- 1:nrow(lout)
print(lout, quote=FALSE, ...)
if (printcriteria) {
lcr <- x$criteria[li,]
lcr <- cbind(code=row.names(lcr), lcr)
row.names(lcr) <- 1:nrow(lcr)
print(lcr, ...)
}
if (printcodes) print(cbind(code=x$codes), quote=FALSE, ...)
}
## ------------------------------------------------------------------
plot.regrAllEqns <-
function(x, criterion="cp", critrange=10, minnumber=10, nbest=10,
codes=x$codes, ncharhorizontal=6, col="blue",
legend=TRUE, mar=6, main="", cex=0.7*par("cex"),
cex.lab = par("cex.lab"), ...)
{
## Author: Werner Stahel, Date: 14 Oct 2017, 14:43
lmod <- x$criteria[,c("df",criterion)]
lcr <- lmod[,2]
limod <- 1:nrow(lmod)
if (is.null(critrange)) critrange <- Inf
if ((!is.na(critrange)) && critrange>0)
limod <- lcr<min(lcr)+critrange
if (sum(limod)<minnumber) limod <- order(lcr)[1:min(minnumber,nrow(lmod))]
lmod <- lmod[limod,]
ldf <- lmod[,1]
lcr <- lmod[,2]
lwh <- x$which[limod,]
llab <- codes[colnames(lwh)]
lmar <- par("mar")
if (length(mar)) {
if (length(mar)!=4) {
lmar <- c(lmar[1:2],mar[1],lmar[4])
}
oldpar <- par(mar=lmar)
on.exit(par(oldpar))
}
plot(ldf,lmod[,2], type="n", xlab="df", ylab=criterion, main="", ...)
if (length(main)) mtext(main, 3, lmar[3]-1.2, cex=1.2)
for (ls in unique(ldf)) {
lii <- which(ldf==ls)
llcr <- lcr[lii]
if (length(lii)) {
lk <- lii[order(llcr)[1:min(length(lii),nbest)]]
lvf <- apply(lwh[lk,,drop=F],2,all) ## in all models
lvft <- paste(c(llab[lvf],"+"),collapse="")
mtext(lvft, 3, 1, at=ls, cex=cex.lab, las=(nchar(lvft)>ncharhorizontal)+1,
col=col)
lwhs <- lwh[lk,!lvf, drop=F]
llbv <- llab[!lvf]
llb <- apply(lwhs,1,function(x) paste(llbv[x],collapse=""))
llcr <- lcr[lk]
if (length(lk)==1) text(ls, llcr, "+", cex=2*cex, col=col) else
text(rep(ls,length(lk)),lcr[lk],llb, cex=cex, col=col)
}
}
## text(lmod[,1],lmod[,2], row.names(lmod))
if (length(legend)) {
llab <- x$codes
if (is.logical(legend)&&legend) {
lcmin <- sapply(split(lcr,ldf), min)
legend <- if (lcmin[1]>last(lcmin)) "bottomleft" else "bottomright"
}
if (is.character(legend)) {
if (legend%nin%c("topleft","topright","bottomleft","bottomright"))
legend <- "bottomright"
legend(legend, paste(llab,names(llab)),pch=rep("",length=length(llab)))
}
if (is.numeric(legend)) {
if (length(legend)==2)
legend(legend[1],legend[2],
paste(llab,names(llab)),pch=rep("",length=length(llab)))
else warning(":plot.regrAllEqns! Argument 'legend' not suitable")
}
}
invisible(lmod)
}
## ======================================================================
colorpale <- function(col=NA, pale=0.3, ...)
{
lcolna <- is.na(col)
if (any(lcolna)) {
col[lcolna] <- palette()[2]
warning(":colorpale: Argument 'col' is NA. I assume ", col)
}
crgb <- t(col2rgb(col)/255)
rgb(1-pale*(1-crgb), ...)
}
## ----------------------------------------------------------------
factor2character <- function(x) {
for (lj in 1:ncol(x))
if (is.factor(x[,lj])) x[,lj] <- as.character(x[,lj])
x
}
## ==================================================================
## until repaired ...
get_all_vars <-
function (formula, data = NULL, ...)
{
if (missing(formula)) {
if (!missing(data) && inherits(data, "data.frame") &&
length(attr(data, "terms")))
return(data)
formula <- as.formula(data)
}
else if (missing(data) && inherits(formula, "data.frame")) {
if (length(attr(formula, "terms")))
return(formula)
data <- formula
formula <- as.formula(data)
}
formula <- as.formula(formula)
if (missing(data))
data <- environment(formula)
else if (!is.data.frame(data) && !is.environment(data) &&
!is.null(attr(data, "class")))
data <- as.data.frame(data)
else if (is.array(data))
stop("'data' must be a data.frame, not a matrix or an array")
if (!inherits(formula, "terms"))
formula <- terms(formula, data = data)
env <- environment(formula)
rownames <- .row_names_info(data, 0L)
varnames <- all.vars(formula)
inp <- parse(text = paste("list(", paste(varnames, collapse = ","),
")"), keep.source = FALSE)
variables <- setNames(eval(inp, data, env), varnames) ## !!!
if (is.null(rownames) && (resp <- attr(formula, "response")) >
0) {
lhs <- variables[[resp]]
rownames <- if (is.matrix(lhs))
rownames(lhs)
else names(lhs)
}
extras <- substitute(list(...))
extranames <- names(extras[-1L])
extras <- setNames(eval(extras, data, env),extranames) ## !!!
x <- data.frame(c(variables, extras)) ## !!!
if (!is.null(rownames))
attr(x, "row.names") <- rownames
x
}
##- getallvars <-
##- function (formula, data = NULL, ...)
##- {
##- if (missing(formula)) {
##- if (!missing(data) && inherits(data, "data.frame") &&
##- length(attr(data, "terms")))
##- return(data)
##- formula <- as.formula(data)
##- }
##- else if (missing(data) && inherits(formula, "data.frame")) {
##- if (length(attr(formula, "terms")))
##- return(formula)
##- data <- formula
##- formula <- as.formula(data)
##- }
##- formula <- as.formula(formula)
##- if (missing(data))
##- data <- environment(formula)
##- else if (!is.data.frame(data) && !is.environment(data) &&
##- !is.null(attr(data, "class")))
##- data <- as.data.frame(data)
##- else if (is.array(data))
##- stop("'data' must be a data.frame, not a matrix or an array")
##- if (!inherits(formula, "terms"))
##- formula <- terms(formula, data = data)
##- env <- environment(formula)
##- rownames <- .row_names_info(data, 0L)
##- varnames <- all.vars(formula)
##- inp <- parse(text = paste("list(", paste(varnames, collapse = ","),
##- ")"), keep.source = FALSE)
##- variables <- setNames(eval(inp, data, env), varnames) ## !!!
##- if (is.null(rownames) && (resp <- attr(formula, "response")) >
##- 0) {
##- lhs <- variables[[resp]]
##- rownames <- if (is.matrix(lhs))
##- rownames(lhs)
##- else names(lhs)
##- }
##- extras <- substitute(list(...))
##- extranames <- names(extras[-1L])
##- extras <- setNames(eval(extras, data, env),extranames) ## !!!
##- x <- data.frame(c(variables, extras)) ## !!!
##- if (!is.null(rownames))
##- attr(x, "row.names") <- rownames
##- x
##- }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.