# glm stuff
predict.frame <- function(object) {
# returns a frame relating predicted values to actual values
# for glms, the predicted value is the value before transformation
resp = response.var(object)
frame = data.frame(z=predict(object),response=model.frame(object)[[resp]])
names(frame)[2] = resp
frame
}
model.plot <- function(object,data,pred=predictor.terms(data),
highlight,layout=NULL,
partial=F,smooth=F,se=F,span=2/3,
col="green",scol=2,lwd=2,add=F,lty=1,
bg=par("bg"),key=T,type="p",...) {
# pred=predictor.vars(data)?
# this only makes predictions at the data, while plot.loess does more
if(missing(data)) data <- model.frame(object)
resp = response.var(object)
data <- expand.frame(data,setdiff(pred,predictor.vars(data)))
if(inherits(object,"tree")) {
p = predict(object,data,type="vector")[,2]
} else {
p <- predict(object,data,type="response",se=se)
}
if(is.factor(data[[resp]])) {
if(!se) p = p+1
else p$fit = p$fit+1
}
if(add) {
# must have one predictor term
if(length(pred) > 1) stop("model has more than one predictor")
x <- data[[pred]]
i <- order(x)
x <- sort(x)
if(!se) {
lines(x,p[i],col=col,lwd=lwd,lty=lty,...)
} else {
lines(x,p$fit[i],col=col,lwd=lwd,lty=lty,...)
lines(x,p$fit[i]+p$se[i],col=col,lty=2,lwd=lwd,...)
lines(x,p$fit[i]-p$se[i],col=col,lty=2,lwd=lwd,...)
}
return(invisible(p))
}
if(is.null(layout)) layout <- auto.layout(length(pred))
opar <- par(bg=bg,mar=auto.mar(main="f"))
if(any(layout != c(1,1))) {
opar = c(opar,par(mfrow=layout))
}
on.exit(par(opar))
if(missing(highlight)) highlight = predictor.terms(object)
for(v in pred) {
#print(data[[v]])
plot.default(data[[v]],data[[resp]],xlab="",ylab=resp,type=type,...)
font.lab = if(v %in% highlight) 2 else 1
title(xlab=v,font.lab=font.lab)
x = data[[v]]
if(length(predictor.terms(object)) == 1 &&
all(pred==predictor.terms(object))) {
i <- order(x)
lines(x[i],p[i],col=col,lwd=lwd,lty=lty,...)
} else {
if(partial) {
fit <- my.update(object,paste(".~. -",v))
if(v %in% predictor.terms(fit)) warning("update didn't work")
p = predict(fit,data,type="response",se=se)
if(!se) p = p+1
else p$fit = p$fit+1
}
# doesn't work
#i <- order(x)
#lines(x[i],p[i],col=col,lwd=lwd,lty=lty,...)
#fit = smooth(p~x)
#fit = loprob(x,p,span=span)
# unfortunately lowess uses degree=1
fit = lowess(x,p,f=span,iter=0)
lines(fit,col=col,lwd=lwd,lty=lty,...)
}
if(smooth) {
if(is.factor(data[[resp]])) fit = loprob(x,data[[resp]],span=span)
else fit = lowess(x,data[[resp]],f=span)
lines(fit,col=scol,lwd=lwd,lty=lty,...)
if(key) color.key(c(col,scol),labels=c("predicted","actual"))
}
}
}
predictClass <- function(object,data,p=0.5) {
resp <- response.var(object)
if(inherits(object,"glm")) {
r = predict(object,data,type="response")
factor.logical(r>p,labels=levels(data[[resp]]))
} else if(p == 0.5) {
predict(object,data,type="class")
} else {
r = predict(object,data,type="vector")[,2]
factor.logical(r>p,labels=levels(data[[resp]]))
}
}
misclassResiduals <- function(object,data,p=0.5) {
r = predictClass(object,data,p=p)
y = data[[response.var(object)]]
factor.logical(r != y, labels=c("Correct","Error"))
}
misclass.default <- function(fit,data=NULL,rate=F) {
if(is.null(data)) data = model.frame(fit)
resp <- response.var(fit)
r = predictClass(fit,data)
r = sum(r != data[[resp]])
if(rate) r/nrow(data) else r
}
misclass <- function(object, ...) UseMethod("misclass")
#formula.nnet = formula.lm
deviance.glm <- function(object,data,rate=F) {
if(missing(data)) {
dev = object$deviance
if(rate) dev = dev/nrow(model.frame(fit))
return(dev)
}
resp <- response.var(object)
if(object$family$family == "binomial") {
truth <- (as.numeric(data[[resp]]) == 2)
p <- predict(object,data,type="response")
p[p == 0] <- 1e-3
p[!truth] <- 1-p[!truth]
dev = -2*sum(log(p))
} else {
stop("family not handled")
}
if(rate) dev/nrow(data) else dev
}
confusion.default <- function(object,data=NULL,p=0.5) {
if(is.null(data)) data = model.frame(object)
resp <- response.var(object)
r = predictClass(object,data,p=p)
table(truth=data[[resp]],predicted=r)
}
confusion <- function(object, ...) UseMethod("confusion")
factor.logical <- function(x,labels=c("No","Yes")) {
f <- factor(x,levels=c(F,T))
levels(f) <- labels
f
}
# expands a model formula to include all squares and cross-products of the
# predictors
expand.quadratic <- function(fmla,cross=T) {
resp <- response.var(fmla)
pred <- predictor.vars(fmla)
pred2 <- sapply(pred,function(s) paste("I(",s,"^2)",sep=""))
s <- paste(resp,"~",paste(pred,collapse="+"),
"+",paste(pred2,collapse="+"))
len <- length(pred)
if(cross && len > 1) {
pred3 <- c()
for(i in 1:(len-1)) {
for(j in (i+1):len) {
pred3 <- c(pred3, paste(pred[i],":",pred[j],sep=""))
}
}
s <- paste(s,"+",paste(pred3,collapse="+"))
}
formula(s)
}
logistic <- function(formula,data,...) {
#environment(formula) = sys.frame(sys.nframe())
control = glm.control(maxit=30,trace=F)
# can't use control else update will fail
glm(formula_with_data(formula,data),family=binomial,...)
#glm(formula,data,family=binomial,control=control,...)
}
my.logistic <- function(formula,data,lambda=NULL,...) {
data = model.frame(formula,data)
pred = predictor.terms(formula)
resp = response.var(formula)
edata = expand.frame(data,setdiff(pred,colnames(data)))
x = as.matrix(edata[pred])
x = cbind("(Intercept)"=numeric(nrow(x))+1,x)
y = 2*as.numeric(data[[resp]])-3
w = logistic.fit(scale.rows(x,y),lambda=lambda)
object = list(coefficients=w,model=data,levels=levels(data[[resp]]),
terms=terms(data))
class(object) = "logistic"
object
}
print.logistic <- function(object) {
cat("Logistic regression on ")
dput(object$levels)
print(object$coefficients)
}
# R 1.9.0
#coef.logistic = coef.glm
model.frame.logistic = model.frame.glm
predict.logistic <- function(object,data=NULL,
type=c("class","response","vector"),se=F) {
pred = predictor.terms(object)
if(is.null(data)) data = model.frame(object)
else {
no.resp = formula(paste("~",paste(pred,collapse="+")))
data = model.frame(no.resp,data)
}
edata = expand.frame(data,setdiff(pred,colnames(data)))
x = as.matrix(edata[pred])
w = t(t(object$coefficients[-1]))
z = (x %*% w) + object$coefficients[1]
type = match.arg(type)
s = 1/(1+exp(-z))
if(type == "response") s
else if(type == "class") object$levels[(s>0.5)+1]
else if(type == "vector") {
r = array(0,c(nrow(x),2),list(rownames(data),object$levels))
names(dimnames(r)) = c("case",response.var(object))
r[,1] = 1-s
r[,2] = s
r
}
}
deviance.logistic <- function(object,data=NULL,rate=F) {
if(is.null(data)) data = model.frame(object)
resp <- response.var(object)
truth = as.numeric(data[[resp]])
p = predict(object,data,type="vector")
p[p == 0] = 1e-3
i = (1:nrow(p)) + (truth-1)*nrow(p)
dev = -2*sum(log(p[i]))
if(rate) dev/length(truth) else dev
}
logistic.fit <- function(x,lambda=NULL) {
# each row of x is a data point
d = ncol(x)
if(is.null(lambda)) lambda = 1e-4
w = array(0,c(1,d))
colnames(w) = colnames(x)
for(iter in 1:100) {
old.w = w
# s1 is column
s1 = 1/(1+exp(x %*% t(w)))
a = s1*(1-s1)
# g is row
g = (t(s1) %*% x) - lambda*w
h = (t(x) %*% scale.rows(x,a)) + lambda*diag(d)
w = w + t(solve(h,t(g)))
#cat("loglik =",sum(log(1-s1)),"\n")
#print(drop(w))
if(max(abs(w - old.w)) < 1e-6) break
}
drop(w)
}
logistic.fit.bohning <- function(x,lambda=1e-4) {
# each row of x is a data point
d = ncol(x)
h = (t(x) %*% x)/4 + lambda*diag(d)
# r is upper triangular
r = chol(h)
w = array(0,c(1,d))
for(iter in 1:10) {
old.w = w
# s1 is column
s1 = 1/(1+exp(x %*% t(w)))
# g is row
g = (t(s1) %*% x) - lambda*w
# u is column
u = backsolve(r,forwardsolve(t(r),t(g)))
# line search along u
ug = drop(g %*% u)
ux = x %*% u
a = s1*(1-s1)
uhu = sum((ux^2)*a) + lambda*sum(u*u)
w = w + (ug/uhu)*t(u)
print(drop(w))
if(max(abs(w - old.w)) < 1e-6) break
}
colnames(w) = colnames(x)
w
}
# Functions for lab 3
subtable <- function(x,i,j) {
if(!inherits(x,"table")) x <- as.table(as.matrix(x))
if(is.character(i)) i <- pmatch(i,rownames(x))
if(is.character(j)) j <- pmatch(j,colnames(x))
rs <- sum(x[i,])
cs <- sum(x[,j])
n <- sum(x)
r <- array(c(x[i,j],cs-x[i,j],rs-x[i,j],n-cs-rs+x[i,j]),c(2,2))
rownames(r) <- c(rownames(x)[i], paste("not",rownames(x)[i]))
colnames(r) <- c(colnames(x)[j], paste("not",colnames(x)[j]))
names(dimnames(r)) <- names(dimnames(x))
r
}
entropy <- function(x) {
x = as.vector(x)
x = x/sum(x)
-sum(x*log2(x+eps))
}
as.factor.data.frame <- function(x) {
for(i in 1:length(x)) if(!is.factor(x[[i]])) x[[i]] = factor(x[[i]])
x
}
tabulate.matrix <- function(x) {
if(is.logical(x)) {
s = colSums(x)
rbind("FALSE"=nrow(x)-s,"TRUE"=s)
} else {
nb = max(x)
apply(x,2,function(f) tabulate(f,nb=nb))
}
}
col.entropy <- function(x) {
# x is matrix of counts
# entropy of each column of x
x = x+eps
s = colSums(x)
-colSums(x*log2(x))/s +log2(s)
}
information <- function(x,y=NULL,actual=NULL,smooth=0) {
if(is.null(y)) {
# x is a matrix of counts
x = x + smooth
if(is.null(actual)) {
# average information
if(F) {
p = colSums(x)
p = p/sum(p)
entropy(rowSums(x)) - sum(p*apply(x,2,entropy))
} else {
entropy(rowSums(x))+entropy(colSums(x))-entropy(x)
}
} else {
# conditional information
entropy(rowSums(x)) - entropy(x[,actual])
}
} else {
# x is a matrix of factors, y is a factor
# return the information in each table(x[,i],y)
p = table(y) + 2*smooth
p = p/sum(p)
if(is.null(actual)) {
h = col.entropy(tabulate.matrix(x)+smooth)
for(lev in levels(y)) {
h = h - col.entropy(tabulate.matrix(x[y==lev,])+smooth)*p[lev]
}
h
} else {
tab = array(0,c(nlevels(y),ncol(x)),list(levels(y),colnames(x)))
for(lev in levels(y)) {
tab[lev,] = tabulate.matrix(x[y==lev,])[actual,]
}
tab = tab + smooth
entropy(p) - col.entropy(tab)
}
}
}
all.subsets <- function(n) {
if(n == 1) subsets = list(1)
else if(n == 2) subsets = list(1,2,1:2)
else if(n == 3) subsets = list(1,2,3,1:2,2:3,c(1,3),1:3)
else {
subsets = list()
for(k in 1:n) {
subsets = c(subsets,nchoosek(n,k))
}
}
subsets
}
interaction.information <- function(x,y=NULL,actual=NULL) {
if(is.null(y)) {
if(!is.null(actual)) {
return(interaction.information(x[,,actual]) -
interaction.information(margin.table(x,1:2)))
}
nd = if(is.null(dim(x))) 1 else length(dim(x))
subsets = all.subsets(nd)
total = 0
for(s in subsets) {
sig = (-1)^(length(s)-nd+1)
total = total + entropy(margin.table(x,s))*sig
}
total
} else {
# x is a matrix of factors, y is a factor
# return the interaction in each table(x[,i],x[,j],y)
}
}
interaction.variance <- function(x) {
n = dim(x)[length(dim(x))]
a = c()
for(i in 1:n) {
a[i] = interaction.information(x,actual=i)
}
w = margin.table(x,length(dim(x)))
w = w/sum(w)
m = sum(w*a)
print(m)
sum(w*(a-m)^2)
}
information.graph <- function(tab,main="",color=grey(0.7)) {
tab = as.table(tab)
nd = if(is.null(dim(tab))) 1 else length(dim(tab))
subsets = all.subsets(nd)
theta = seq(0,2*pi,len=nd+1)[-1]
x = cbind(cos(theta),sin(theta))*4
if(nd > 3) {
x = x + array(rnorm(prod(dim(x))),dim(x))
}
mar=auto.mar(main=main,axes=F,xlab="",ylab="")
opar = par(mar=mar)
on.exit(par(opar))
plot.new()
plot.window(c(-5,5),c(-5,5),asp=1)
for(s in rev(subsets)) {
mid = colMeans(x[s,,drop=F])
for(i in s) {
segments(mid[1],mid[2],x[i,1],x[i,2])
}
h = interaction.information(margin.table(tab,s))
col = if(h < 0) color else "white"
draw.circle(mid[1],mid[2],sqrt(abs(h)),col=col,fill=T)
}
labels = names(dimnames(tab))
text(x[,1],x[,2],labels)
}
score.binary <- function(x,y,type=c("info","a.info"),a=1) {
if(!inherits(x,"table")) x <- as.table(as.matrix(x))
y = table(as.numeric(y))
x = x + a
y = y + 2*a
n = sum(y)
type <- match.arg(type)
if(type == "info") {
not.x = rep.col(y,ncol(x)) - x
r = (x/n)*log2(x/n) + (not.x/n)*log2(not.x/n)
r = colSums(r)
r = r - sum((y/n)*log2(y/n))
x = colSums(x)
not.x = n - x
r - (x/n)*log2(x/n) - (not.x/n)*log2(not.x/n)
} else if(type == "a.info") {
col.total = rep.row(colSums(x),nrow(x))
r = (x/col.total)*log2(x/col.total)
r = colSums(r)
r = r - sum((y/n)*log2(y/n))
r
}
}
score.words <- function(x,type=c("info","bayes","chisq",
"a.info","odds","lift","overlap"),
a=1,z=1) {
# info,bayes,chisq are very similar
# a.info,odds are similar
# input is contingency table (matrix of counts)
if(!inherits(x,"table")) x <- as.table(as.matrix(x))
x = x + a
fit <- indep.fit(x)
type <- match.arg(type)
if(type == "info") {
row.total <- rep.col(rowSums(x),ncol(x))
not.x = row.total - x
n = sum(as.vector(x))
r = (x/n)*log2(x/n) + (not.x/n)*log2(not.x/n)
r = r - (row.total/n)*log2(row.total/n)
r = colSums(r)
x = colSums(x)
not.x = n - x
r - (x/n)*log2(x/n) - (not.x/n)*log2(not.x/n)
} else if(type == "a.info") {
row.total <- rep.col(rowSums(x),ncol(x))
col.total = rep.row(colSums(x),nrow(x))
n = sum(as.vector(x))
r = (x/col.total)*log2(x/col.total)
r = r - (row.total/n)*log2(row.total/n)
r = colSums(r)
r
} else if(type == "bayes") {
# extra smoothing
x = x + 1
row.total <- rep.col(rowSums(x),ncol(x))
not.x = row.total - x
r = lgamma(x) + lgamma(not.x) - lgamma(row.total)
r = colSums(r)
x = colSums(x)
row.total = colSums(row.total)
not.x = row.total - x
r - (lgamma(x)+lgamma(not.x)-lgamma(row.total))
} else if(type == "chisq") {
r <- (x - fit)^2/fit
total.words <- rep.col(rowSums(x),ncol(x))
r <- r + (x - fit)^2/(total.words - fit)
colSums(r)
} else if(type == "lift") {
r <- (x - sqrt(x))/fit
#colSums(-log(abs(r)+1e-4))
#colSums(r^2)
apply(r,2,max)
} else if(type == "odds") {
n <- sum(x)
cs <- rep.row(colSums(x),nrow(x))
rs <- rep.col(rowSums(x),ncol(x))
r <- log(x/(rs-x)/(cs-x)*(n-rs-cs+x))
v <- 1/x + 1/(rs-x) + 1/(cs-x) + 1/(n-rs-cs+x)
#colSums((r - z*sqrt(v))^2)
# subtract the expected sum of squares under independence
colSums(r^2 - z*v)
#colSums(abs(r)) - z*sqrt(colSums(v))
} else if(type == "overlap") {
cs <- colSums(x)
r <- apply(x,2,max)/cs
v <- r*(1-r)/cs
r-z*sqrt(v)
}
}
odds.ratios <- function(x) {
# returns the log odds-ratio for all cells in the table
n <- sum(as.vector(x))
cs <- rep.row(colSums(x),nrow(x))
rs <- rep.col(rowSums(x),ncol(x))
log(x/(rs-x)/(cs-x)*(n-rs-cs+x))
#log(x/(cs-x))
}
odds.ratios.se <- function(x) {
# returns the standard error of each log odds-ratio
n <- sum(x)
cs <- rep.row(colSums(x),nrow(x))
rs <- rep.col(rowSums(x),ncol(x))
sqrt(1/x + 1/(rs-x) + 1/(cs-x) + 1/(n-rs-cs+x))
#sqrt(1/x + 1/(cs-x))
}
discriminate.node <- function(g,i=1,x) {
k <- names(g)[leaves(g,i)]
if(length(k) == 0) return("")
not.k <- setdiff(rownames(x),k)
if(length(not.k) == 0) return("")
if(!is.character(i)) i <- names(g)[i]
not.i <- paste("NOT",i)
xp <- array(0,c(2,ncol(x)),list(c(i,not.i),colnames(x)))
xp[1,] <- colSums(x[k,,drop=F])
xp[2,] <- colSums(x[not.k,,drop=F])
s <- score.features(xp,type="odds")
f <- colnames(x)[which.max(s)]
#print(subtable(xp,1,f))
f
}
discriminate.from.sibling <- function(g,i=1,x) {
leaf1 <- names(g)[leaves(g,i)]
pa = edges.to(g,i)
if(length(pa) == 0) return("")
sibling = setdiff(from(g,pa),i)
if(length(sibling) == 0) return("")
leaf2 <- names(g)[leaves(g,sibling)]
xp <- array(0,c(2,ncol(x)),list(1:2,colnames(x)))
xp[1,] = colSums(x[leaf1,,drop=F])
xp[2,] = colSums(x[leaf2,,drop=F])
xp <- xp + 1e-10*rep.col(rowSums(xp),ncol(xp))
s = odds.ratios(xp)[1,]
f <- colnames(x)[which.max(s)]
if(T) {
cat(f," ",xp[,f],"\n")
ord = rev(order(s))[1:5]
for(i in 1:length(ord)) {
j = colnames(x)[ord[i]]
cat(" ",j," ",xp[,j]," ",s[j],"\n")
}
}
f
}
join.scores <- function(xp,verbose=F,...) {
# xp is matrix with three rows: child1,child2,other
#xp <- xp + 1e-3*rep.col(rowSums(xp),ncol(xp))
#xp <- xp + 1e-2*rep.col(rowSums(xp),ncol(xp))
xp <- xp + 1e-2
if(T) {
# unlike score.words, this measure is asymmetric
# the word must favor the node
#s12 <- abs(odds.ratios(xp[1:2,])[1,])
#s12.se <- odds.ratios.se(xp[1:2,])[1,]
s13 <- odds.ratios(xp[c(1,3),])[1,]
s23 <- odds.ratios(xp[2:3,])[1,]
s13.se <- odds.ratios.se(xp[c(1,3),])[1,]
s23.se <- odds.ratios.se(xp[2:3,])[1,]
if(T) {
# hedging
a = 1
s13 = s13 - s13.se*a
s23 = s23 - s23.se*a
}
if(F) {
s13 = s13/s13.se
s23 = s23/s23.se
}
#s <- pmin(s12/s13,s12/s23)
#s = s12 - pmin(s13,s23)
#s = s12+s12.se - pmin(s13+s13.se,s23+s23.se)
} else {
type = "a.info"
a = 0
#print(subtable(xp[c(1,3),],1,"the"))
s13 = score.words(xp[c(1,3),],type=type,a=a)
s23 = score.words(xp[c(2,3),],type=type,a=a)
}
# hard min or softmin?
if(F) s = pmin(s13,s23)
else if(T) s = -log(exp(-s13)+exp(-s23))
else {
# rank combination
s = rank(s13)+rank(s23)
}
if(verbose) {
# debugging
f <- colnames(xp)[which.max(s)]
cat(f," ",xp[,f],"\n")
ord = rev(order(s))[1:5]
for(i in 1:length(ord)) {
j = colnames(xp)[ord[i]]
cat(" ",j," ",xp[,j]," ",s[j],s13[j],s23[j],"\n")
}
}
s
}
discriminate.node <- function(g,i=1,x,...) {
child <- from(g,i)
if(length(child) < 2) return("")
leaf1 <- names(g)[leaves(g,child[1])]
leaf2 <- names(g)[leaves(g,child[2])]
other <- setdiff(rownames(x),c(leaf1,leaf2))
if(length(other) == 0) return("")
xp <- array(0,c(3,ncol(x)),list(1:3,colnames(x)))
xp[1,] <- colSums(x[leaf1,,drop=F])
xp[2,] <- colSums(x[leaf2,,drop=F])
xp[3,] <- colSums(x[other,,drop=F])
if(F) {
pa = edges.to(g,i)
if(length(pa) == 0) return("")
sibling = setdiff(from(g,pa),i)
if(length(sibling) == 0) return("")
sib.leaves = names(g)[leaves(g,sibling)]
xp.sib = colSums(x[sib.leaves,,drop=F])
xp[3,] = xp[3,]/1 + xp.sib
}
s = join.scores(xp,...)
colnames(x)[which.max(s)]
}
plot.join <- function(hc,doc,cex=0.8,...) {
g <- as.graph.hclust(hc)
labels <- names(g)
for(i in 1:length(g)) {
if(!is.leaf(g,i)) {
labels[i] <- discriminate.node(g,i,doc,...)
}
}
if(T) {
n = length(labels)
col = rep(1,n)
for(i in 1:n) {
lab = strsplit(labels[i],"\\.")[[1]][1]
res = try(col2rgb(lab),silent=T)
if(!inherits(res,"try-error")) col[i] = lab
}
plot.graph(g,labels=labels,orient=180,srt=0,cex=cex,col=col,...)
}
else plot.graph(g,labels=labels,orient=180,srt=0,cex=cex,...)
}
# functions for response tables
# Tom Minka 11/8/01
# given an aov fit, plot the effects for each predictor
# as well as a boxplot of the residuals
# you can do an F-test visually by comparing the spread of the effects
# with the spread of the residuals
effects.plot <- function(object,se=F) {
mt <- model.tables(object,se=se)
vars <- names(mt$tables)
nvar <- length(vars)
opar <- par(mar=c(2.5,4,0,0.1))
on.exit(par(opar))
plot.new()
ylim <- c(min(sapply(mt$tables,min)), max(sapply(mt$tables,max)))
plot.window(xlim=c(0.5,nvar+1+0.5),ylim=ylim)
axis(1,1:(nvar+1), labels=c(vars,"residuals"))
axis(2)
title(ylab=response.var(object))
if(se) {
if(!is.numeric(se)) se <- 1.96
p <- (1-2*pnorm(-se))*100
cat("Displaying ",format(p,digits=2),"% confidence intervals\n",sep="")
}
for(k in 1:nvar) {
eff <- mt$tables[[k]]
text(k, eff, names(eff))
if(se) {
jitter <- runif(length(eff))*0.1
arrows(k+jitter, eff-se*mt$se[[k]], k+jitter, eff+se*mt$se[[k]],
code = 3, col = "green", angle = 75, length = .1)
}
}
res <- residuals(object)
x <- model.frame(object)
res <- tapply(res, x[vars], mean)
boxplot(res, at=nvar+1, add=T)
}
as.rtable <- function(a,resp="Response",check.names=T) {
if(check.names) resp = make.names(resp)
nam = names(dimnames(a))
if(is.null(nam)) stop("Dimensions must have names")
fmla <- paste(resp,"~",
paste(make.names(nam),collapse="+"))
attr(a,"terms") <- terms(formula(fmla))
#attr(a,"ordered") <- dimOrdered(a)
class(a) <- c("rtable","table")
a
}
print.rtable <- function(rt,...) {
cat(response.var(rt),"\n")
# strip attributes and print as a matrix
attributes(rt) <- attributes(rt)[c("dim","dimnames")]
print(rt)
}
terms.rtable <- function(rt) {
attr(rt,"terms")
}
as.data.frame.rtable <- function(rt,...) {
df <- as.data.frame.table(rt,...)
# preserve ordering of factors
i = names(which(dimOrdered(rt)))
#if(length(i) > 0) df[i] = apply.df(df[i],as.ordered)
len <- length(names(df))
names(df)[len] <- response.var(rt)
df
}
as.matrix.rtable <- function(rt,...) {
x = as.matrix(unclass(rt))
if(length(dim(rt)) == 1) {
# why does as.matrix drop the name??
# as.matrix doesn't understand this data type
names(dimnames(x))[[1]] = names(dimnames(rt))
t(x)
} else x
}
#' @export
row.probs <- function(x,smooth=0.5,se=F) {
# converts ctable to rtable
x = x + smooth
col.sum = rep.row(colSums(x),nrow(x))
nam = names(dimnames(x))
if(is.null(nam) || nam[1] == "") nam = "row"
p = as.rtable(x/col.sum,paste("Probability of",nam[1]))
if(se != F) {
if(se == T) se = 1.64
p.se = sqrt(p*(1-p)/col.sum) * se
list(p=p,se=p.se)
} else p
}
#' @export
log.counts <- function(x,smooth=0.5,se=F) {
x = x+smooth
r = as.rtable(log(x),"log(count)")
if(se != F) {
if(se == T) se = 1.64
se = 1/sqrt(x) * se
list(r=r,se=se)
} else r
}
#' @export
logit.rtable = function(x,smooth=0.5,se=F) {
resp = response.var(x)
pred = predictor.vars(x)
lev = levels(x[[resp]])
i = (x[[resp]]==lev[1])
y1 = table(x[i,pred])+smooth
y2 = table(x[!i,pred])+smooth
r = log(y2/y1)
r = as.rtable(r,paste("Logit of",resp))
if(se != F) {
if(se == T) se = 1.64
se = (1/sqrt(y1)+1/sqrt(y2)) * se
list(r=r,se=se)
} else r
}
#' @export
pclass.rtable = function(x,pclass=2,smooth=0.5,se=F) {
resp = response.var(x)
pred = predictor.vars(x)
lev = levels(x[[resp]])
i = (x[[resp]]==lev[pclass])
y1 = table(x[i,pred])+smooth
y2 = table(x[!i,pred])+smooth
n = y1+y2
p = y1/(y1+y2)
p = as.rtable(p,paste("Probability of",resp,"=",lev[pclass]))
if(se != F) {
if(se == T) se = 1.64
se = sqrt(p*(1-p)/n) * se
list(p=p,se=se)
} else p
}
aov.rtable <- function(rt,med=F) {
frame <- as.data.frame.rtable(rt)
if(med) {
p <- medpolish(rt,trace.iter=F)
return(structure(terms=terms(rt), model=frame, class="aov"))
}
aov(terms(rt), frame)
}
aov.residual <- function(rt,...) {
rt - rtable(aov.rtable(rt,...))
}
# returns a table of responses, stratified according to the terms in fmla
# and aggregated according to fun.
rtable.terms <- function(fmla, x, fun = mean) {
resp <- response.var(fmla)
pred <- predictor.vars(fmla)
rt <- tapply(x[[resp]], x[pred], fun)
if(F && (length(dim(rt)) == 1)) {
# force it to be a 1-row table
dim(rt) = c(1,length(rt))
dimnames(rt) = named.list(NULL,levels(x[[pred]]),names.=c("",pred))
}
class(rt) <- "rtable"
attr(rt,"terms") <- fmla
dimOrdered(rt) <- sapply(x[pred],is.ordered)
rt
}
# model.frame is a data.frame with a "terms" attribute describing a model
rtable.data.frame <- function(object, ...) {
rtable(terms(object), object, ...)
}
rtable.aov <- function(object, ...) {
x <- model.frame(object)
resp <- response.var(x)
pred <- predictor.vars(x)
y <- expand.grid(lapply(x[pred],levels))
y[[resp]] <- predict(object,y)
y <- model.frame(terms(x),y)
rtable(y, ...)
}
# legal input:
# rtable(y~f1+f2,x)
# rtable(y~.,x)
# rtable(x$y ~ x$f1 + x$f2)
rtable.formula <- function(formula, data, ...) {
# convert "|" into "+"
rhs = formula[[3]]
if(is.call(rhs) && (deparse(rhs[[1]]) == "|")) {
rhs[[1]] = as.name("+")
formula[[3]] = rhs
model = model.frame.default(formula,data)
} else {
expr <- match.call(expand = F)
expr$... <- NULL
expr[[1]] <- as.name("model.frame.default")
model <- eval(expr, parent.frame())
}
return(rtable(model, ...))
}
rtable <- function(object, ...) UseMethod("rtable")
# plots rows of x as traces
# similar to parallel_plot
# replacement for dotchart
# y is a named vector or matrix
#' Linechart
#'
#' Plot each data row as a curve.
#'
#' If \code{xscale="linear"}, the columns are placed to make each curve as
#' straight as possible. If \code{xscale="equal"}, the columns are placed
#' similar to \code{"linear"} but with the constraint that they must be equally
#' spaced. If \code{xscale="none"}, the columns are placed in the order that
#' they appear in the matrix. This is automatic if \code{y} has ordered
#' columns (see \code{\link{dim.ordered}}). If \code{se != NULL}, error bars
#' are drawn around each point.
#'
#' Linecharts are a replacement for dotcharts and mosaics.
#'
#' @param y a named vector or matrix.
#' @param se a vector or matrix, same size as \code{y}, of error bounds.
#' Alternatively, \code{y} can be \code{list(y,se)}.
#' @param effects If \code{TRUE}, the columns are shifted to have zero mean.
#' @param med If \code{TRUE} and \code{effects=TRUE}, the columns are shifted
#' to have zero median.
#' @param xscale describes how the columns should be placed.
#' @param ... additional arguments to \code{\link{labeled.curves}}.
#' @author Tom Minka
#' @export
#' @seealso \code{\link{dotchart}}, \code{\link{mosaicplot}}
#' @examples
#'
#' # compare to a dotchart
#' data(VADeaths)
#' dotchart(VADeaths, main = "Death Rates in Virginia - 1940")
#' dimOrdered(VADeaths)[2] = F
#' linechart(VADeaths)
#' linechart(t(VADeaths))
#'
#' # compare to a mosaicplot
#' data(HairEyeColor)
#' x <- margin.table(HairEyeColor,c(1,2))
#' dimOrdered(x) = F
#' mosaicplot(x)
#' x = t(x)
#' col = c("brown","blue","red","green")
#' linechart(row.probs(x),color.pal=col)
#' linechart(row.probs(x,se=T),color.pal=col)
#' linechart(row.probs(x,se=T),jitter=0.02,color.pal=col)
#' mosaicplot(x)
#' linechart(row.probs(t(x),se=T))
#'
#' data(blood)
#' dimOrdered(blood) = F
#' linechart(row.probs(blood,se=T))
#'
#' data(antacids)
#' dimOrdered(antacids) = F
#' linechart(row.probs(antacids,se=T))
#' mosaicplot(t(antacids))
#'
linechart <- function(y,se=NULL,xlab=NULL,ylab,effects=F,med=F,
xscale=c("equal","linear","none"),...) {
if(is.list(y) && missing(se)) {
# y is list(y,se)
se = y[[2]]
y = y[[1]]
}
if(is.vector(y)) {
y = t(as.matrix(y))
if(!is.null(se)) se = t(as.matrix(se))
}
if(!is.matrix(y)) {
y = as.matrix(y)
if(!is.null(se)) se = as.matrix(se)
}
#if(missing(ylab)) ylab <- deparse(substitute(y))
row.var <- names(dimnames(y))[1]
col.var <- names(dimnames(y))[2]
if(is.null(xlab)) if(!is.null(col.var) && !is.na(col.var)) xlab = col.var
if(F) {
# remove all-NA rows and columns
y = y[!apply(y,1,function(z) all(is.na(z))),]
y = y[,!apply(y,2,function(z) all(is.na(z)))]
}
xscale <- match.arg(xscale)
if(effects) {
if(missing(ylab)) ylab <- paste(row.var, "effect on", response.var(y))
# choose x to make the profiles linear
# fails if there are missing values
if(med) {
library(eda)
fit <- medpolish(y,trace.iter=F)
col.centers <- fit$col + fit$overall
resid <- fit$residuals
} else {
col.centers <- colMeans(y,na.rm=T)
row.effects <- rowMeans(y,na.rm=T) - mean(as.vector(y))
resid <- y - outer(row.effects, col.centers, "+")
}
# subtract column centers
y <- y - rep.row(col.centers,nrow(y))
} else {
if(missing(ylab)) ylab <- response.var(y)
# sort columns by center
#v = col.centers
if(length(dim(y)) == 2)
resid <- y - rep.col(rowMeans(y),ncol(y))
else
resid = y
}
if(any(is.na(resid))) {
if(xscale != "none" && !dimOrdered(y)[2])
cat("Warning: NAs prevent automatic ordering\n")
xscale = "none"
}
if(xscale == "none") {
x = 1:ncol(y)
} else {
s <- svd(resid)
x <- s$v[,1]
}
# x is the desired x positions
# remove flip ambiguity
# choose the ordering which has maximum inner product with 1:ncol
if(sum((1:length(x))*x) < sum((length(x):1)*x)) x = -x
i = rank.stable(x)
in.order = (!dimOrdered(y)[2] || all(i == 1:length(i)))
if(in.order) {
x <- switch(xscale,linear=x,equal=i,none=i)
} else {
x <- 1:ncol(y)
}
i <- order(x)
y <- reorder.cols(y,i)
x <- x[i]
if(!is.null(se)) {
se = reorder.cols(se,i)
# kludge for call below
se = t(se)
}
names(x) = colnames(y)
labeled_curves(x,t(y),se,xlab=xlab,ylab=ylab,...)
}
#' Plot labeled curves
#'
#' Draws multiple curves, each with a properly-placed label and unique
#' color/linestyle.
#'
#' Point \code{j} in curve \code{i} is at \code{(x[j],y[j,i])}. Thus all
#' curves must be the same length, and have points at the same horizontal
#' positions. If \code{y[i,j]=NA}, then the curve has a break at the previous
#' point, and resumes at the next \code{non-NA} point.
#'
#' If \code{se} is given, then an error bar will be placed around each point.
#'
#' @param x a numeric vector giving the horizontal position of each point.
#' @param y a matrix or data frame giving the vertical position of each point.
#' Each column defines one curve.
#' @param se a matrix or data frame, the same size as \code{y}, giving an error
#' bound on each value.
#' @param labels a character vector of labels for the lines. Defaults to the
#' column names of \code{y}.
#' @param xlab,ylab axis labels, by default the dimnames of \code{y}.
#' @param type indicates whether to draw points, lines, or both. See
#' \code{\link{plot}}.
#' @param group a numeric vector specifying how to group cases when assigning
#' colors.
#' @param color.palette a vector of colors, arbitrary length, or a function
#' with integer argument which generates a vector of colors. Used if
#' \code{col} is not specified. If shorter than the number of curves, colors
#' will be recycled and the line style will change.
#' @param col a vector of colors, as in a call to \code{\link{plot}}. Used to
#' explicitly set the color of each curve.
#' @param lty.palette a vector of line styles, arbitrary length. The line
#' style will rotate through these when there aren't enough colors.
#' @param lty a vector of line styles, as in a call to \code{\link{plot}}.
#' Used to explicitly set the style of each curve.
#' @param lwd line width.
#' @param jitter the amount by which to jitter the error bars, to avoid
#' overplotting. \code{jitter=0.02} is usually sufficient.
#' @param legend If \code{TRUE}, the labels are placed in a legend. Otherwise
#' the labels are placed next to the lines.
#' @param cex character expansion factor.
#' @param horizontal If \code{TRUE}, the axes are laid out horizontally, so
#' that the curves run vertically.
#' @param mar a numerical vector giving the lines of margin on the four sides
#' of the plot (see \code{\link{par}}). If missing, it is set according to
#' \code{\link{auto.mar}}.
#' @param bg background color.
#' @param ylim desired plotting limits.
#' @param main title for the plot.
#' @param ... extra arguments for low-level plotting commands.
#' @author Tom Minka
#' @seealso \code{\link{parallel_plot}}
#' @export
labeled_curves <- function(x,y,se=NULL,labels,xtick=names(x),
xlab=NULL,ylab,type="o",
group,
color.palette=default.colors(6),col,
lty.palette=1:6,lty,lwd=2,jitter=0,
legend.=NULL,move=T,dryrun=F,
cex=1,horizontal=F,
mar,bg=par("bg"),ylim,main="",
xaxt=par("xaxt"),yaxt=par("yaxt"),...) {
# must include (xaxt,yaxt) because they are swapped for horizontal
# x is vector
# y is a list or matrix of columns
if(horizontal && missing(legend.)) legend. = 1
if(horizontal) x = -x
nam = names(dimnames(y))
if(is.null(xtick)) xtick = rownames(y)
if(is.matrix(y)) y = as.data.frame.matrix(y)
n = length(y)
#if(is.data.frame(y)) y = as.list(y)
if(is.null(xlab)) {
if(!is.null(nam)) xlab = nam[1]
else xlab = ""
}
if(missing(ylab)) {
if(!is.null(nam)) ylab = nam[2]
else ylab = ""
}
if(missing(mar)) {
mar = if(horizontal)
auto.mar(main=main,xlab=ylab,ylab=xlab,xaxt=yaxt,yaxt=xaxt,ytick=xtick,...)
else
auto.mar(main=main,xlab=xlab,ylab=ylab,xaxt=xaxt,yaxt=yaxt,xtick=xtick,...)
}
if(!missing(bg)) opar = par(mar=mar,bg=bg)
else opar = par(mar=mar)
on.exit(par(opar))
# plot.new here so that yinch and strwidth work
plot.new()
xlim <- range(x)
if(missing(labels)) labels = names(y)
if(!is.null(labels) && is.null(legend.)) {
# make space for labels
xspc = 0.1
w <- (max(strwidth(labels,units="inches",cex=cex))+xspc)/par("pin")[1]
xlim[2] <- xlim[2] + diff(xlim)*w/(1-w)
}
if(!is.null(se)) {
if(is.matrix(se)) se = as.data.frame.matrix(se)
if(any(dim(se) != dim(y))) stop("se not same size as y")
if(missing(ylim)) {
vec.y = unlist(y)
vec.se = unlist(se)
# need y in case se is NA
ylim = range(c(vec.y,vec.y-vec.se,vec.y+vec.se),finite=T)
}
# make room for arrow heads
w = 0.2/par("pin")[if(horizontal) 2 else 1]
dw = diff(xlim)*w/(1-w)
xlim[1] = xlim[1] - dw/2
xlim[2] = xlim[2] + dw/2
} else {
if(missing(ylim)) ylim = range(unlist(y),finite=T)
}
#if(dryrun) return(list(mar,xlim,ylim))
if(horizontal) {
plot.window(xlim=ylim,ylim=xlim)
if(is.null(xtick)) {
axis(2,yaxt=xaxt,...)
grid(nx=0)
} else {
#cat("columns are",names(x),"\n")
axis(2,x, labels=xtick,yaxt=xaxt,...)
# grid
abline(h=x,lty="dotted",col="lightgray")
}
axis(1,xaxt=yaxt,...)
}
else {
plot.window(xlim=xlim,ylim=ylim)
if(is.null(xtick)) {
axis(1,xaxt=xaxt,...)
grid(ny=0)
} else {
#cat("columns are",names(x),"\n")
axis(1,x, labels=xtick,xaxt=xaxt,...)
# grid
abline(v=x,lty="dotted",col="lightgray")
}
axis(2,yaxt=yaxt,...)
}
box()
title(main=main)
if(horizontal) title.las(ylab=xlab,xlab=ylab,...)
else title.las(xlab=xlab,ylab=ylab,...)
if(missing(group)) group = 1:n
if(missing(col)) {
if(mode(color.palette) == "function")
color.palette <- color.palette(n)
col <- color.palette[((group-1) %% length(color.palette))+1]
}
if(missing(lty)) {
lty <- lty.palette[(((group-1) %/% length(color.palette)) %% length(lty.palette))+1]
}
jitter = xinch(jitter)
jitter = jitter*((1:n)-0.5-n/2)
#if(length(x) > 1) jitter = jitter*min(diff(x))
if(!is.null(labels) && is.null(legend.)) {
xspc = xinch(xspc)
txt.x = txt.y = c()
}
for(i in 1:n) {
if(horizontal) {
lines(y[[i]],x,col=col[i],lty=lty[i],type=type,lwd=lwd, ...)
if(!is.null(se)) {
# arrows do not have lty
arrows(y[[i]]-se[[i]],x+jitter[i],y[[i]]+se[[i]],x+jitter[i],
len=0.1,angle=75,code=3,col=col[i],lty=lty[i],lwd=lwd,xpd=T)
}
} else {
lines(x, y[[i]],col=col[i],lty=lty[i],type=type,lwd=lwd, ...)
if(lty[i] == 7 && type != "p") {
lines(x, y[[i]],col=8,lty=lty[i],type="l",lwd=1, ...)
}
if(!is.null(se)) {
# arrows do not have lty
arrows(x+jitter[i],y[[i]]-se[[i]],x+jitter[i],y[[i]]+se[[i]],
len=0.1,angle=75,code=3,col=col[i],lwd=lwd,xpd=T)
}
}
if(!is.null(labels) && is.null(legend.)) {
# find the rightmost non-NA column in this row
j <- rev(which(!is.na(y[[i]])))[1]
txt.x[i] = x[j]+xspc
txt.y[i] = y[[i]][j]
}
}
if(!is.null(labels) && is.null(legend.)) {
# move text vertically to avoid collisions
h = strheight(labels,units="inch",cex=cex)
names(txt.y) = labels
if(move) txt.y = yinch(move.collisions(inchy(txt.y),h))
text(txt.x, txt.y, labels, col=col, adj=0, cex=cex, ...)
}
if(!is.null(labels) && !is.null(legend.)) {
#auto.legend(labels,col=col,lty=lty,text.col=col)
my.legend(legend.,labels,lty=lty,col=col,lwd=lwd)
}
}
#' @export
my.legend <- function(pos=c(1,1),...) {
if(length(pos) == 1) pos = c(pos,pos)
xlim = par("usr")[1:2]
ylim = par("usr")[3:4]
x = xlim[1] + diff(xlim)*pos[1]
y = ylim[1] + diff(ylim)*pos[2]
legend(x,y,...,xjust=pos[1],yjust=pos[2])
}
#' @export
title.las <- function(xlab=NULL,ylab=NULL,las=par("las"),...) {
if(las == 0) {
title(xlab=xlab,ylab=ylab,...)
} else if(las == 1) {
title(xlab=xlab,...)
title(ylab=ylab,line=5,...)
} else if(las == 2) {
title(xlab=xlab,line=5.5,...)
title(ylab=ylab,line=5,...)
} else if(las == 3) {
title(ylab=ylab,...)
title(xlab=xlab,line=5.5,...)
}
}
#' @export
errorbars <- function(x,se,las=1,color.palette=1,margin=1/3,...) {
# a dotplot with error bars.
# if x has more than one column, multiple charts will be drawn,
# side-by-side via split.screen().
#layout(matrix(c(1,1,2),1,3))
#layout.show(2)
if(F) {
mxy = linechart(t(x[1]),t(se[1]),labels=NULL,ylab=colnames(x)[1],
horizontal=T,dryrun=T,
las=las,color.pal=color.palette,type="p",legend=NULL,
xaxt="s",...)
print(mxy)
mar = mxy[[1]]; xlim = mxy[[2]]; ylim = mxy[[3]]
opar = par(mar=mar,mfrow=c(1,length(x)+1))
on.exit(par(opar))
plot.new()
plot.window(xlim=c(1,1),ylim=xlim)
print(par("usr"))
axis(2,at=-(1:nrow(x)),labels=rownames(x),las=las)
box()
}
#opar = par(mfrow=c(1,length(x)))
#on.exit(par(opar))
#split.screen(c(1,length(x)))
n = length(x)
if(n > 1) {
m = matrix(0,n,4)
x1 = 0; x2 = margin + (1-margin)/n
for(i in 1:length(x)) {
m[i,1] = x1
m[i,2] = x2
m[i,3] = 0
m[i,4] = 1
x1 = x2
x2 = x1 + (1-margin)/n
}
split.screen(m)
on.exit(close.screen(all=TRUE))
}
for(i in 1:n) {
if(n > 1) screen(i)
linechart(t(x[i]),t(se[i]),labels=NULL,ylab=colnames(x)[i],horizontal=T,
las=las,color.pal=color.palette,type="p",legend=NULL,
xaxt=if(i == 1) "s" else "n",...)
}
}
# Extensions and uses of loess
smooth <- function(x,y) {
if(length(unique(x)) >= 4) {
fit = smooth.spline(x,y)
predict(fit)
} else {
lowess(x,y)
}
}
plot.loess <- function(object,xlim,col=2,lwd=2,res=200,asp=NULL,add=F,...) {
x <- model.frame(object)
pred <- predictor.vars(x)
resp <- response.var(x)
if(missing(xlim)) xlim <- range(x[[pred]])
if(add) {
xlim[1] = max(xlim[1], par("usr")[1])
xlim[2] = min(xlim[2], par("usr")[2])
}
xt <- seq(xlim[1],xlim[2],len=res)
y <- predict(object,xt)
real.asp = if(identical(asp,"auto")) auto.aspect(xt,y) else asp
if(!add)
plot(x[[pred]],x[[resp]],xlab=pred,ylab=resp,asp=real.asp,...)
lines(xt,y,col=col,lwd=lwd,...)
}
if(T) {
smooth <- function(...) {
expr <- match.call()
expr[[1]] <- as.name("loess")
object <- eval(expr, parent.frame())
#object = loess(...)
class(object) = c("smooth","loess")
object
}
print.smooth = get("print.loess",environment(loess))
plot.smooth = plot.loess
predict.smooth = get("predict.loess",environment(loess))
} else {
smooth <- function(fmla,data=parent.frame(),span=2/3,...) {
given = given.vars(fmla)
if(!is.null(given)) {
fmla2 = remove.given(fmla)
g = data[[given]]
if(!is.factor(g)) stop("given must be a factor")
children = list()
for(lev in levels(g)) {
children[[lev]] = smooth(fmla2,data[g == lev,],span,...)
}
object = list(model=data,terms=terms(fmla),given=given,children=children)
class(object) = "multi.smooth"
return(object)
}
x = model.frame(fmla,data)
pred = predictor.vars(x)[1]
resp = response.var(x)
xy = lowess(x[[pred]],x[[resp]],f=span,...)
names(xy$x) = rownames(x)
names(xy$y) = rownames(x)
res = x[[resp]] - xy$y
orig.x = as.named.vector(data[pred])
object = c(xy,list(model=x,terms=terms(x),residuals=res,orig.x=orig.x))
class(object) = "smooth"
object
}
print.multi.smooth <- function(object) {
cat("Multi-smooth object: ")
print(formula(object))
cat(object$given,"values:", names(object$children), "\n")
#print(object$children)
}
print.smooth <- function(object) {
cat("Smooth object: ")
print(formula(object))
}
predict.multi.smooth <- function(object,frame) {
if(missing(frame)) frame = object$model
g = frame[,object$given]
r = named.numeric(rownames(frame))
for(lev in levels(g)) {
i = (g == lev)
r[i] = predict.smooth(object$children[[lev]], frame[i,])
}
r
}
predict.smooth <- function(object,x=object$orig.x) {
if(is.data.frame(x)) x = x[[predictor.vars(object)[1]]]
y = approx(object$x,object$y,x)$y
if(is.null(names(y))) names(y) = names(x)
y
}
}
model.frame.smooth = model.frame.loess
lowess2 <- function(x,y,f=2/3,res=200,...) {
if(length(unique(x)) < length(x)/4) return(lowess(x,y,f=f))
object = smooth(y~x,data.frame(x,y),span=f,...)
xlim <- range(x)
xo <- seq(xlim[1],xlim[2],len=res)
yo <- predict(object,xo)
list(x=xo,y=yo)
}
loprob <- function(x,y,degree=3,...) {
# simulates lowess for binary response
# returns list(x,y)
lev <- levels(y)
if(length(lev) != 2) stop("y must have two levels")
if(!is.factor(x)) {
from <- min(x)
to <- max(x)
}
if(T) {
my.lowess = function(x,y,span=2/3,...) lowess(x,y,f=span,iter=0,...)
my.lowess(x,y,...)
} else if(T) {
# smoothing spline
y <- as.numeric(y)-1
r <- seq(from,to,length=50)
if(degree == 3 && length(unique(x)) >= 4) {
fit = smooth.spline(x,y)
p <- predict(fit,r)$y
} else {
fit <- loess(y~x,degree=degree,...)
p <- predict(fit,r)
}
list(x=r,y=p+1)
} else if(F) {
# density estimate - too bumpy
densf <- function(x) {
if(is.factor(x)) {
list(x=levels(x),y=tabulate(x)/length(x))
} else {
density(x,from=from,to=to,adjust=1.5)
}
}
dens <- lapply(split(x,y),densf)
p <- sum(y==levels(y)[2])/length(y)
p <- p*dens[[2]]$y/((1-p)*dens[[1]]$y + p*dens[[2]]$y)
list(x=dens[[1]]$x,y=p+1)
} else if(F) {
# hill density estimate - too noisy
xr <- seq(from,to,length=50)
densf <- function(x) hill.density(x,xr)
dens <- lapply(split(x,y),densf)
p <- dens[[2]]/(dens[[1]] + dens[[2]])
list(x=xr,y=p+1)
} else {
# gam smooth - too slow
library(mgcv)
y <- as.numeric(y)-1
fit <- gam(y~s(x),family=binomial)
r <- seq(from,to,length=50)
p <- predict(fit,data.frame(x=r),type="response")
list(x=r,y=p+1)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.