rm.boot <- function(time, y, id=seq(along=time), subset=TRUE,
plot.individual=FALSE,
bootstrap.type=c('x fixed','x random'),
nk=6, knots, B=500, smoother=supsmu,
xlab, xlim, ylim=range(y),
times=seq(min(time),max(time),length=100),
absorb.subject.effects=FALSE, rho=0,
cor.pattern=c('independent','estimate'), ncor=10000,
...)
{
bootstrap.type <- match.arg(bootstrap.type)
absorb.subject.effects <- absorb.subject.effects & !missing(id)
if(!is.function(cor.pattern))
cor.pattern <- match.arg(cor.pattern)
if(!(is.character(cor.pattern) && cor.pattern=='independent') &&
rho!=0)
stop("can't specify both cor.pattern='estimate' and rho")
if(rho != 0)
cor.pattern <- 'equal correlation'
dodep <- rho !=0 || !is.character(cor.pattern) || cor.pattern=='estimate'
## X fixed also implies that subjects are fixed
id <- as.character(id)
ylab <- label(y)
if(ylab=='')
ylab <- 'y'
if(missing(xlab)) {
xlab <- units(time)
if(xlab=='')
xlab <- 'Time'
}
if(length(subset) > 1) {
id <- id[subset];
time <- time[subset]; y <- y[subset]
}
s <- is.na(time + y)
if(any(s)) {
s <- !s
id <- id[s]
time <- time[s]
y <- y[s]
}
## Need to order data so that a subject's records stay together
## Otherwise, the mean residuals at each time will not vary over resamples
## when bootstrap.type='x fixed'
s <- order(id, time)
id <- id[s];
time <- time[s];
y <- y[s]
if(bootstrap.type=='x fixed' && diff(range(table(id))) != 0)
warning('To work properly with bootstrap.type="x fixed" all subjects must have the same # observations')
n <- length(y)
clusters <- unique(id)
if(plot.individual) {
ploti <- function(time, y, id, clusters, xlim, ylim, xlab, ylab,
smoother, ...)
{
plot(0,0,xlim=range(pretty(range(time))),ylim=ylim,
xlab=xlab, ylab=ylab, type='n')
j <- 0
for(i in clusters) {
s <- id==i
j <- j+1
lines(smoother(time[s],y[s],...),lty=j)
}
}
ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...)
}
if(nk==0) knots <- double(0)
if(missing(knots) && nk>0) {
knots <- rcspline.eval(time,nk=nk,knots.only=TRUE)
if(length(knots) != nk) {
warning('could not obtain requested number of knots')
nk <- length(knots)
}
} else nk <- length(knots)
p <- if(nk==0) 1
else nk-1
X.times <- if(nk==0) as.matrix(times)
else rcspline.eval(times, knots, inclx=TRUE)
X.Time <- if(nk==0) as.matrix(time)
else rcspline.eval(time, knots, inclx=TRUE)
X <- if(missing(id)) cbind(X.Time,1)
else
model.matrix(~ X.Time+id-1,
data=list(X.Time=X.Time,id=as.factor(id)))
## was id=id 3Apr02 Thanks: Don MacQueen, for R
f <- lm.fit.qr.bare(X, y, intercept=FALSE)
res <- f$residuals
sigma2 <- sum(res^2)/n
if(absorb.subject.effects) {
mean.intercept <- mean(c(0,f$coef[-(1:p)]))
y <- y + mean.intercept - (f$coef[-(1:p)])[paste('id',id,sep='')]
if(plot.individual) {
ploti(time, y, id, clusters, xlim, ylim, xlab, ylab, smoother, ...)
title('Raw Data Adjusted to Have a Common Intercept')
}
}
if(is.character(cor.pattern) && cor.pattern=='estimate') {
timediff <- product <- single(ncor)
used <- 0
i <- 0
meanres <- tapply(res, time, mean)
names(meanres) <- as.numeric(names(meanres))
sdres <- sqrt(tapply(res, time, var))
names(sdres) <- as.numeric(names(sdres))
if(any(is.na(sdres)))
stop('one or more times occur in only one subject')
for(wid in clusters) {
s <- id==wid
x <- time[s]
cx <- as.character(x)
r <- (res[s] - meanres[cx])/sdres[cx]
if(any(is.na(r)))
stop('program logic error')
diffs <- outer(x, x, FUN=function(a,b)abs(a-b))
prods <- outer(r, r, FUN='*')
np <- length(prods)
if(used + np > ncor) {
cat('\nUsed only',i,'subjects in estimating covariance pattern.\nMay want to increase ncor.\n')
break
}
i <- i+1
timediff[(used+1):(used+np)] <- diffs
product[(used+1):(used+np)] <- prods
used <- used+np
}
timediff <- timediff[1:used]; product <- product[1:used]
product <- tapply(product, round(timediff,4), mean)
timediff <- as.numeric(names(product))
product[timediff==0] <- 1
plot(timediff, product, xlab='Absolute Difference in Time',
ylab='Correlation', type='b')
cor.pattern <- list(x=timediff, y=product)
}
##Subject effects are at the end, using cell means model
##Take intercept as average of all subject effects
cof <- function(fit,p)
{
ko <- fit$coef
c(mean(ko[-(1:p)]), ko[1:p])
}
o.coef <- cof(f,p)
if(bootstrap.type=='x random') {
orig.obsno <- split(1:n, id)
} else {
R <- split(res, id)
yhat <- if(!absorb.subject.effects) f$fitted.values
else o.coef[1] + X.Time %*% o.coef[-1]
}
Coef <- matrix(NA, B+1, p+1)
sse <- loglik <- single(B+1)
loglik.dep <- NULL
Coef[1,] <- o.coef
sse[1] <- sigma2*n
loglik[1] <- n*logb(2*pi*sigma2) + n
if(dodep) {
loglik.dep <- loglik
lldep <- function(time, id, sigma2, res, rho, cor.pattern)
{
ll <- 0
for(subj in unique(id)) {
s <- id==subj
x <- time[s]
y <- res[s]
p <- sum(s)
if(is.character(cor.pattern) && cor.pattern=='equal correlation')
cov <- sigma2*(diag(rep(1-rho,p))+rho)
else {
cov <- if(is.function(cor.pattern))
outer(x, x, cor.pattern)*sigma2
else {
timediff <- outer(x, x, function(a,b)abs(a-b))
matrix(approx(cor.pattern, xout=timediff)$y, nrow=p)*sigma2
}
}
## Following code taken from dmvnorm()
eS <- eigen(cov, symmetric = TRUE)
## y <- y %*% (eS$vectors * rep(1/sqrt(eS$values), each = p)) 24Feb02
y <- y %*% (eS$vectors * rep(1/sqrt(eS$values),
rep(p,length(eS$values))))
logl <- sum(y^2) + p*logb(2*pi) + logb(prod(eS$values))
ll <- ll + logl
}
ll
}
loglik.dep[1] <- lldep(time, id, sigma2, res, rho, cor.pattern)
}
uneven <- 0
for(i in 1:B) {
if(i %% 10 ==0)
cat(i,'')
pts <- sample(clusters, replace=TRUE)
if(bootstrap.type=='x random') {
obsn <- unlist(orig.obsno[pts])
idb <- id[obsn]
xt <- X.Time[obsn,,drop=FALSE]
f.b <- lm.fit.qr.bare(if(absorb.subject.effects || missing(id))
cbind(xt,1)
else
model.matrix(~xt+idb-1,
data=list(xt=xt,idb=as.factor(idb))),
y[obsn], intercept=FALSE)
## was idb=idb 3Apr02
} else {
rr <- unlist(R[pts])
lrr <- length(rr)
uneven <- max(uneven, abs(lrr-n))
if(lrr > n)
rr <- rr[1:n]
else if(lrr < n)
rr <- c(rr, sample(rr, n-lrr, replace=TRUE))
yb.e <- yhat + rr
f.b <- if(absorb.subject.effects)
lm.fit.qr.bare(cbind(X.Time,1), yb.e,
intercept=FALSE)
else
lm.fit.qr.bare(X, yb.e, intercept=FALSE)
}
cofb <- cof(f.b, p) #26Jun97
pred <-
if(bootstrap.type=='x fixed') {
if(!absorb.subject.effects)
X %*% f.b$coefficients
else
cofb[1] + X.Time %*% cofb[-1]
} else cofb[1] + X.Time %*% cofb[-1]
## x random case may only work properly if absorb.subject.effects, as
## we have to ignore the original subject ids anyway (the bootstrap
## sample in general won't represent all subjects)
Coef[i+1,] <- cofb #26Jun97
sse[i+1] <- sum((y-pred)^2)
sigma2 <- sum(f.b$residuals^2)/length(f.b$residuals)
loglik[i+1] <- n*logb(2*pi*sigma2) + sse[i+1]/sigma2
if(dodep)
loglik.dep[i+1] <- lldep(time, id, sigma2, y-pred,
rho, cor.pattern)
}
if(uneven>0)
warning(paste('Subjects had unequal number of records.\nMaximum discrepency between ',
'total number of bootstrap records sampled and original\nnumber of ',
'records (',n,') is ',uneven,'. Bootstrap estimates are approximate.',
sep=''))
if(dodep) {
srho <- spearman(loglik, loglik.dep)
cat('\n\nSpearman rank correlation between',B+1,'log likelihoods ',
'assuming independence and assuming dependence:',
round(srho,3),'\n')
}
mode(Coef) <- 'single'
mode(sse) <- 'single'
structure(list(Coef=Coef, sse=sse, loglik=loglik, loglik.dep=loglik.dep,
times=times, X.times=X.times,
xlab=xlab, ylab=ylab, ylim=ylim,
bootstrap.type=bootstrap.type, fit=f, knots=knots,
rho=rho, cor.pattern=cor.pattern),
class='rm.boot')
}
plot.rm.boot <-
function(x, obj2, conf.int=.95,
xlab=x$xlab, ylab=x$ylab, xlim, ylim=x$ylim,
individual.boot=FALSE,
pointwise.band=FALSE,
curves.in.simultaneous.band=FALSE,
col.pointwise.band=2,
objective=c('-2 log L','sse','dep -2 log L'),
add=FALSE, ncurves,
multi=FALSE, multi.method=c('color','density'),
multi.conf=c(.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,.99),
multi.density=c(-1,90,80,70,60,50,40,30,20,10, 7,4),
multi.col =c( 1, 8,20, 5, 2, 7,15,13,10,11,9,14),
subtitles=TRUE, ...)
{
## 2 was between 5 and 7, 17 was between 8 and 20
obj <- x
objective <- match.arg(objective)
if(missing(objective))
objective <-
if(obj$rho==0 && is.character(obj$cor.pattern))
'-2 log L'
else 'dep -2 log L'
sse <- switch(objective,
sse = obj$sse,
'-2 log L' = obj$loglik,
'dep -2 log L' = obj$loglik.dep)
B <- length(sse)
Coef <- obj$Coef
times <- obj$times
if(!missing(obj2)) {
if((length(times) != length(obj2$times)) ||
(any(times != obj2$times, na.rm=TRUE)))
stop('times vector must be identical for both rm.boot objects')
times <- ifelse(is.na(times), NA, obj2$times)
sse <- sse + obj2$sse
if(missing(ylab))
ylab <- paste(obj$ylab,'-',obj2$ylab)
}
## order from best -2 log likelihood or sum of squared errors to worst
i <- order(sse)
## Select best confidence coefficient*B estimates
conf <- if(multi) max(multi.conf)
else conf.int
i <- i[1:round(conf*B)]
if(i[1] != 1)
warning(paste('design is imbalanced enough that best log likelihood or SSE was not\n',
'obtained from overall fit (objective=',format(sse[1]),') but from\n',
'a bootstrap fit (objective=',format(sse[i[1]]),
')\nThis can also happen if the objective is not -2 log L',sep=''))
## Evaluate all fits on time grid and compute point by point max and min
curves <- cbind(1,obj$X.times) %*% t(Coef)
if(!missing(obj2)) {
curves <- curves - cbind(1,obj2$X.times) %*% t(obj2$Coef)
if(missing(ylim))
ylim <- range(curves[,i])
}
if(multi) {
multi.method <- match.arg(multi.method)
if(missing(xlim))
plot(times, curves[,1], type='n',
xlab=xlab, ylab=ylab, ylim=ylim)
else
plot(times, curves[,1], type='n',
xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim)
title(paste('Simultaneous',min(multi.conf),'-',max(multi.conf),
'Confidence Regions'))
high.prev <- low.prev <- curves[,1]
for(j in 1:length(multi.conf)) {
ii <- i[1:round(multi.conf[j]*B)]
high <- apply(curves[,ii], 1, max)
low <- apply(curves[,ii], 1, min)
if(multi.method=='density') {
polygon(c(times,rev(times)), c(high.prev,rev(high)),
density=multi.density[j])
polygon(c(times,rev(times)), c(low.prev, rev(low)),
density=multi.density[j])
} else {
polygon(c(times,rev(times)), c(high.prev,rev(high)),
col=multi.col[j])
polygon(c(times,rev(times)), c(low.prev, rev(low)),
col=multi.col[j])
}
high.prev <- high; low.prev <- low
}
lines(times, curves[,1], lwd=2, col=0) ## point estimates in white
} else {
if(add)
lines(times, curves[,1])
else {
if(missing(xlim))
plot(times, curves[,1], type='l',
xlab=xlab, ylab=ylab, ylim=ylim)
else
plot(times, curves[,1], type='l',
xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim)
title(paste('Simultaneous',conf.int,'Confidence Region'))
}
high <- apply(curves[,i], 1, max)
low <- apply(curves[,i], 1, min)
lines(times, high, lty=2)
lines(times, low, lty=2)
}
result <- list(times=times, fitted=curves[,1], lower=low, upper=high)
if(individual.boot || curves.in.simultaneous.band) {
subs <- if(individual.boot) 1:B
else i
if(!missing(ncurves))
subs <- sample(subs, ncurves)
for(j in subs)
lines(times, curves[,j], lty=2)
}
if(pointwise.band) {
p <- apply(curves, 1, quantile, probs=c((1-conf.int)/2,1-(1-conf.int)/2))
lines(times,p[1,],col=col.pointwise.band)
lines(times,p[2,],col=col.pointwise.band)
result <- c(result, list(pointwise.lower=p[1,], pointwise.upper=p[2,]))
}
if(!add && subtitles) {
title(sub=obj$bootstrap.type,adj=1)
title(sub=paste(B-1,'bootstrap repetitions'),adj=0)
}
invisible(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.