R/ancbbpb.R

ancbbpb <-
function(x1,y1,x2,y2,fr1=1,est=tmean,fr2=1,nboot=200,pts=NA,plotit=TRUE,SCAT=TRUE,
pch1='+',pch2='o',
SEED=TRUE,alpha=.05,RNA=T,sm=FALSE,LP=TRUE,xout=FALSE,outfun=outpro,...){
#
# Compare two independent  groups using an ancova method.
# A running-interval smooth is used to estimate the regression lines and is
# based in part on bootstrap bagging.
#
#  This function is limited to two groups and one covariate.
#
# No assumption is made about the parametric form of the regression
# lines.
# Confidence intervals are computed using a percentile bootstrap
# method. Comparisons are made at five empirically chosen design points when
# pts=NA. To compare groups at specified x values, use pts.
# Example: pts=c(60,70,80) will compare groups at the three design points
# 60, 70 and 80.
#
#   xout=F, when plotting, keep leverage points
#   sm=F, when plotting, do not use bootstrap bagging
#
#  Assume data are in x1 y1 x2 and y2
#
# fr1 and fr2 are the spans used by the smooth.
#
#  SCAT=FALSE will suppress the scatterplot when plotting the regression lines.
#
# RNA=F, when computing bagged estimate, NA values are not removed
#  resulting in no estimate of Y at the specified design point,
#  RNA=T, missing values are removed and the remaining values are used.
#
xy=elimna(cbind(x1,y1))
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
#
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
if(SEED)set.seed(2)
flag=TRUE
if(is.na(pts[1])){
flag=FALSE
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,8)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi","p.value","p.crit"))
gv1<-vector("list")
for (i in 1:5){
j<-i+5
temp1<-y1[near(x1,x1[isub[i]],fr1)]
temp2<-y2[near(x2,x1[isub[i]],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(temp1)
mat[i,3]<-length(temp2)
mat[,4]<-runmbo(x1,y1,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA)-
runmbo(x2,y2,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA)
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
I1<-diag(5)
I2<-0-I1
con<-rbind(I1,I2)
estmat1<-matrix(nrow=nboot,ncol=length(isub))
estmat2<-matrix(nrow=nboot,ncol=length(isub))
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
#
for(ib in 1:nboot){
estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=x1[isub],
pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...)
estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=x1[isub],
pyhat=T,plotit=FALSE,SEED=FALSE,est=est,...)
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
pvals=apply(dif,2,mean,na.rm=TRUE)+.5*apply(dif0,2,mean,na.rm=TRUE)
tmat<-rbind(pvals,1-pvals)
pvals=2*apply(tmat,2,min)
mat[,7]<-pvals
for(ij in 1:length(isub)){
dif<-estmat1[,ij]-estmat2[,ij]
dif<-elimna(dif)
nbad<-length(dif)
lo<-round(nbad*alpha/2)
hi<-nbad-lo
dif<-sort(dif)
mat[ij,5]<-dif[lo]
mat[ij,6]<-dif[hi]
}
}
if(!is.na(pts[1])){
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
if(n1[i]<=5)print(paste("Warning, there are",n1[i]," points corresponding to the design point X=",pts[i]))
if(n2[i]<=5)print(paste("Warning, there are",n2[i]," points corresponding to the design point X=",pts[i]))
}
mat<-matrix(NA,length(pts),7)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi",
"p.value"))
gv<-vector("list",2*length(pts))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
j<-i+length(pts)
gv[[i]]<-g1
gv[[j]]<-g2
}
I1<-diag(length(pts))
I2<-0-I1
con<-rbind(I1,I2)
isub=c(1:length(pts))
estmat1<-matrix(nrow=nboot,ncol=length(isub))
estmat2<-matrix(nrow=nboot,ncol=length(isub))
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
est1=runmbo(x1,y1,pts=pts,pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...)
est2=runmbo(x2,y2,pts=pts,pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...)
mat[,4]<-est1-est2
for(ib in 1:nboot){
estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=pts,pyhat=TRUE,plotit=FALSE,
SEED=FALSE,est=est,...)
estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=pts,pyhat=TRUE,plotit=FALSE,
SEED=FALSE,est=est,...)
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
pvals=apply(dif,2,mean,na.rm=TRUE)+.5*apply(dif0,2,mean,na.rm=TRUE)
tmat<-rbind(pvals,1-pvals)
pvals=2*apply(tmat,2,min)
#
mat[,1]<-pts
mat[,2]<-n1
mat[,3]<-n2
mat[,7]<-pvals
for(ij in 1:length(pts)){
dif<-sort(estmat1[,ij]-estmat2[,ij])
dif<-elimna(dif)
nbad<-length(dif)
lo<-round(nbad*alpha/2)
hi<-nbad-lo
mat[ij,5]<-dif[lo]
mat[ij,6]<-dif[hi]
}
}
temp2<-order(0-pvals)
zvec=alpha/c(1:length(pvals))
if(flag)mat[temp2,7]=zvec
if(!flag)mat[temp2,8]=zvec
if(plotit)
runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,LP=LP,xout=FALSE,SCAT=SCAT,pch1=pch1,pch2=pch2,...) 
#                outliers already removed if argument xout=T
list(output=mat)
}
musto101/wilcox_R documentation built on May 23, 2019, 10:52 a.m.