R/regpre.R

regpre <-
function(x,y,regfun=lsfit,error=absfun,nboot=100,adz=TRUE,
mval=round(5*log(length(y))),model=NULL,locfun=mean,pr=FALSE,
xout=FALSE,outfun=out,STAND=TRUE,
plotit=TRUE,xlab="Model Number",ylab="Prediction Error",SEED=TRUE,...){
#
#   Estimate prediction error using the regression method
#   regfun. The .632 method is used.
#   (See Efron and Tibshirani, 1993, pp. 252--254)
#
#   The predictor values are assumed to be in the n-by-p matrix x.
#   The default number of bootstrap samples is nboot=100
#
#   Prediction error is the expected value of the function error.
#   The argument error defaults to squared error.
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimate of
#   the first predictor, etc.
#
#   The default value for mval, the number of observations to resample
#   for each of the B bootstrap samples is based on results by
#   Shao (JASA, 1996, 655-665). (Resampling n vectors of observations
#   model selection may not lead to the correct model as n->infinity.
#
#   The argument model should have list mode, model[[1]] indicates
#   which predictors are used in the first model. For example, storing
#   1,4 in model[[1]] means predictors 1 and 4 are being considered.
#   If model is not specified, and number of predictors is at most 5,
#   then all models are considered.
#
#   If adz=T, added to the models to be considered is where
#   all regression slopes are zero. That is, use measure of location only
#   corresponding to
#   locfun.
#
if(pr){
print("By default, least squares regression is used, ")
print("But from Wilcox, R. R. 2008, Journal of Applied Statistics, 35, 1-8")
print("Setting regfun=tsreg appears to be a better choice for general use.")
print("That is, replace least squares with the Theil-Sen estimator")
print("Note: Default for the argument error is now absfun")
print(" meaning absolute error is used")
print("To use squared error, set error=sqfun")
}
x<-as.matrix(x)
d<-ncol(x)
p1<-d+1
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
y<-temp[,d+1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
if(!STAND)flag<-outfun(x,plotit=FALSE,...)$keep
if(STAND)flag<-outpro(x,STAND=TRUE,plotit=FALSE)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(is.null(model)){
if(d<=5)model<-modgen(d,adz=adz)
if(d>5)model[[1]]<-c(1:ncol(x))
}
mout<-matrix(NA,length(model),5,dimnames=list(NULL,c("apparent.error",
"boot.est","err.632","var.used","rank")))
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(length(y),size=mval*nboot,replace=TRUE),nrow=nboot)
bid<-apply(data,1,idb,length(y))
#  bid is an n by nboot matrix. If the jth bootstrap sample from
#  1, ..., mval contains the value i, bid[i,j]=0; otherwise bid[i,j]=1
for (imod in 1:length(model)){
nmod=length(model[[imod]])-1
temp=c(nmod:0)
mout[imod,4]=sum(model[[imod]]*10^temp)
if(sum(model[[imod]]==0)!=1){
xx<-x[,model[[imod]]]
xx<-as.matrix(xx)
if(sum(model[[imod]]==0)!=1)bvec<-apply(data,1,regpres1,xx,y,regfun,mval,...)
# bvec is a p+1 by nboot matrix. The first row
# contains the bootstrap intercepts, the second row
# contains the bootstrap values for first predictor, etc.
if(sum(model[[imod]]==0)!=1)yhat<-cbind(1,xx)%*%bvec
if(sum(model[[imod]]==0)==1){
bvec0<-matrix(0,nrow=p1,ncol=nboot)
for(it in 1:nboot){
bvec0[1,it]<-locfun(y[data[it,]])
}
yhat<-cbind(1,x)%*%bvec0
}
# yhat is n by nboot matrix of predicted values based on
                           # bootstrap regressions.
bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253
temp<-(bid*(yhat-y))
diff<-apply(temp,1,error)
ep0<-sum(diff/bi)/length(y)
aperror<-error(regfun(xx,y,...)$resid)/length(y) # apparent error
regpre<-.368*aperror+.632*ep0
mout[imod,1]<-aperror
mout[imod,3]<-regpre
temp<-yhat-y
diff<-apply(temp,1,error)
mout[imod,2]<-sum(diff)/(nboot*length(y))
}
if(sum(model[[imod]]==0)==1){
mout[imod,3]<-locpre(y,error=error,est=locfun,SEED=SEED,mval=mval)
}}
mout[,5]=rank(mout[,3])
if(plotit)plot(c(1:nrow(mout)),mout[,3],xlab=xlab,ylab=ylab)
list(estimates=mout)
}
musto101/wilcox_R documentation built on May 23, 2019, 10:52 a.m.