R/Rallfun-v24.R

Defines functions DqdifMC difQMC_sub ancGparMC qcomhdMC qcom.sub smeanMC pb2genMC cbmhdMC cbmhd_subMC lintestMC lloc reg2g.p2plot regp2plot reg2plot ghdist wincor bivar mjse pbvar win hd mestse omega qse winval hdseb mestseb onestep trimse winvar mest hpsi hdci mestci sint b2ci ecdf kswsig binomci kssig meemul tsub deciles kstiesig yuen shifthd shiftdhd smmcrit trimparts wsumsq rfanova apanova box1way pairdepb johan t1waysub winall johansp kron rmanova trimpartt bptdmean bptdpsi bptdsub selby2 lindmsub lindm smmcrit01 pbtest tau elimna pball pbos tauall biloc relfun bicov bireg chreg regboot bmreg reglev winreg anctgen near regpres1 runhat sqfun absfun ancbootg errfun near3d run3hat idb hratio rung3d mbmreg rankisub pbcor rmanovab tsubrmanovab rmanovab1 mee ranki regts1 bptd twomanbt bootdep bootdepsub corb corbsub depreg tsgreg tsgregs1 lts1reg man2pb qhatds1 qhatd winmean kerden kdplot wband runcor pcorb twobici runmean pcorbsub pow1 stein1 stein2 ci2bin powt1est powt1an trimpb2 twolsreg twolsregsub bdanova1 comvar2 regi ancpb ancboot spear linchk trimci msmed selby med2way idealf lintests1 rdepth permg pb2gen tmean depth rtdep hdep depths1 outbox mscov runm3d rdplot rimul ifmest qmjci bootdpci relfun lsfitci wmve wmw lsfitNci pow2an powest twopcor indtall qhat disker lplot qci qint anova1 qest taureg correg.sub correg rmulnorm matsqrt ghmul yhall linconm mmean akerd sedm akerdmul cov2med covmmed msplit bdiag bootcov yuenbt trimcibt khomreg bootdse qdtest lincdm mwwmcp lincdtr sintv2 qdmcp bwmedbmcp qdmcpdif l2dci qdec2ci ancovam modgen locpre locpres1 med1way med1way.crit bpmed bpmedse exmed msmedsub cnorm twwmcp medpb rbbinom med2g twobinom lband.fun lband.fun2 qdec m2way b1way lintest tauloc tauvar gkcor covroc indt taulc trimww.sub trimww msmedci medcipb regtest reg2ci med2mcp anova1 twodcor8 twodcor10 matsplit tkmcp lstest4 twodcor10 twodcor8 lsfitNci4 hc4qtest lsqtest4 mrm1way rmul L1medcen spatcen olswbtest olswbtest.sub regpre push ancova miss2na bdanova2 medr rm2mcp acbinomci covmtrim bwwcovm bwwmatna bwwna bwwtrim bbwcovm bbwmatna bbwna bbwtrim bwwtrim.sub ghmean skew t3pval t1way t3wayv2 olshc4 hc4test trimpb standm t2way mcskew pull whimed calwork outtbs erho.bt chi.int chi.int2 cgen.bt erho.bt.lim erho.bt.lim.p rejpt.bt.lim chi.int.p chi.int2.p ksolve.bt rho.bt psi.bt psip.bt wt.bt v.bt rung3dlchk near3dl1 listm pbanova pbanovag bootse rananova linpbg lintpb t2waypb t2waypbg regout stsregp1 stsreg yuend rmmcppbtm mcppb20 comvar2d mom momci rmanogsub bd1way1 bicovm apdis onesampb pdep tsplit gvar mgvfreg indepth regbootg rregci pbcan ddep rmaseq rmanog ecor ocor rmdzero con2way outmve rundis bdm cori wsp1reg wreg mgvar outmgv outmgvf epow cmanova rimul signt signtpv sisplit rmmcppbd bdms1 r1mcp tamhane r2mcp spmcpa spmcpi sppbb spmcpb bwamcp pcor apgdis rd2plot depth2 fdepth unidepth opreg mgvdep fdepthv2 g2plot mulwmw mulwmwcrit dmean lsqs2 depthg2 hochberg trange lsqs3 kercon mscor dfried wrregfun spat.sub spat rungen pmodchk adpchk adrun riplot adtestv2 adtests1 runsm2g rung3hat lta.sub ltareg nelderv2 nelder splotg2 stein1.tr stein2.tr pdis runmbo run3bo ancom indt0 indt0sub smeancr rplotsm zdepth zdepth.sub opregpb kslope nearl nearr mgvmean smgvcr lts.sub ltsgreg qest smean2 locreg qreg.sub rmmcppb linconb pdclose adtestl adtestls1 adcom logadr qhomtsub qplotreg ancmpbpb qsm locvar smmval bwmedimcp bwmedbmcp gamplot rgvar rgvarseb covmve mvecov rgvar2g covmcd mcdcov ancdes stacklist smvar locvarsm mcp2atm mdifloc mdiflcr mwmw qreg qindbt.sub runmq ritest gvar2g grit stackit ancmg ancmg1 qhomtv2 qhomtsub2 rslope rslopesm m1way oancpb rqfitpv adtest rhom gk.sigmamu gk hard.rejection gkcov covogk ogk ogk.pairwise gk.sigmamu gk hard.rejection outogk splot outcov covout tbscor skiptbs skipogk rqfit rqtest.sub tbs erho.bt chi.int chi.int2 cgen.bt erho.bt.lim erho.bt.lim.p rejpt.bt.lim chi.int.p chi.int2.p ksolve.bt rho.bt psi.bt psip.bt wt.bt v.bt olstests1 kerreg attract bg2ci cav cci cgci cltv cmba2 conc2 concmv concsim corrsim covdgk covmba covmba2 covsim2 ctrviews ddcomp ddmv ddplot ddsim deav deltv diagplot ellipse essp ffL fflynx ffplot ffplot2 fysim gamper gamper2 llrdata llressp llrplot llrsim llrwtfrp lmsviews lrdata lressp lsviews maha mbalata mbamv mbamv2 mbareg med2ci medci MLRplot mlrplot2 mplot nav nltv oddata pifclean piplot pisim ratmn rmaha robci rrplot rrplot2 rstmn sir sirviews stmci symviews tmci Tplt trviews tvreg tvreg2 wddplot skipcov hc4wtest lscale ortho Mpca mgvcov spca sqmad mscale mscale.sub rmba tbscov erho.bt chi.int chi.int2 cgen.bt erho.bt.lim erho.bt.lim.p rejpt.bt.lim chi.int.p chi.int2.p ksolve.bt rho.bt psi.bt psip.bt wt.bt v.bt gvarg marpca marpca.sub bwimcp qregsm L1median llocv2 mcppb llocv2 NMpca ancbbpb L1medcen matl list2vec list2matrix Aband Bband iband disband scor cov.mba qregci covmba2 rmmcp snmreg.sub tstsreg tssnmreg gyreg bwrmcp ancbbmed miss2na rm2mcp dcov medr medind medindsub linplot lin2plot adrunl Rpca Rsq ols olstest qrchkv2 sm2str sm2str.sub akerdcdf epmod resdepth depthcom depthcomsub ancsm ts2str.sub ts2str tsplitbt ogkcor resdepth.sub tbs pcorhc4sub TWOpNOV TWOpov sm2str.sub sm2strv7 pcorhc4 regpreS akp.effect wwwtrim ltsR standmar qsmcobs Qdepthcom Qdepthcomsub mulgreg tsp1reg gplot trimpb cobs2g wwtrim dnormvar ebarplot Scov outproad mdepreg.sub l2drmci rmmismcp mulrank lincon poireg smcorcom tsreg lplotv2 yuendna rm2miss rm2miss.sub ydbt rmrvar bprm effectg.sub effectg winvarN covloc g2plotdifxy sumplot2g yuenv2 yuen.effect.ci interplot pbad2way t2way.no.p t2waybt t3way regciMC regbootMC rmdat2mat bd1way pdisMC IQRstand MADstand regtestMC pbadepth outproMC.sub outproMC.sub2 bdm2way mregdepth lband cov.ogk pbmcp bmpmul outproadMC kbcon smmvalv2 bwtrim rmmest lindep bwmcp bwwmcp bbwmcp bwbmcp out lintestMC yuen.effect bbbmcppb.sub bbbmcppb linhat bbwmcppb bbwmcppb.sub bwwmcppb.sub wwwmcppb.sub wwwmcppb bwwmcppb cjMAT con3way wmwloc2 regpord.sub t1way.effect snmreg snmregv2 larsR regvarp bwmcppb bwmcppb.sub D.akp.effect smean2v2 mulwmwv2 regpord mopreg robpcaS Ppca Ppca.sum.sub Ppca.summary mdepreg l2plot contab ODDSR.CI smean smeancrv2 rmdzeroOP mat2grp robpca plot_robpca regpca chi.test.ind anova_power outpca mcp2a t1wayF t1waybt cidM msmedse t1waybtv2 t2wayv2 lpindt gamindt gamplotv2 cidmul cidmulv2 fac2list MMreg ks bbw2list selbybbw selbybw bw2list rmc2list wlogregci wlogreg.sub logreg.plot medpb2 m2ci qsplit cohen2xi xi2cohen cid cidv2 bmp adjboxout Mreglde.sub pbtrmcp mcp3atm mcp3med bbtrim bbbtrim pb2trmcp pb3trmcp med2mcp med3mcp regplot olsplot tlist wmwaov wincov mgvreg opregpbMC opregMC twocor lplot2g rm3mcp tmcppb bbmcppb bbmcppb.sub ols.plot.inter gamplotINT reg.plot.inter bwrank rqtest runpd sppbi sppba outpro skerd bkreg logSM YYmanova logreg wlogreg phiBY3 rhoBY3 psiBY3 derpsiBY3 sigmaBY3 derphiBY3 der2phiBY3 GBY3Fs GBY3Fsm sterby3 long2mat longcov2mat is.wholenumber long2g longreg.plot hotel1.tr hotel1 wwmcp wwmcpbt wwmcppb wmcppb lindepbt lindep.sub mcp.nestAP outmgvad outmgv.v2 out3d ees.ci wwwtrimbt bwwtrimbt bbwtrimbt bwtrimbt bwtrimbt dtrimpb wwtrimbt sband yhbt mlrregCI mlrreg.est bmcppb mlrregWtest mlrreg.subest btrim linconMpb.sub mcdcen mvecen linconSpb.sub fac2Mlist fac2BBMlist regmediate regmed2 ogk.center sdwe MARest MARONNA.sub mcpOV COVreg dmedpb MAT2list linconMpb linconMpb.sub linconSpb linconSpb.sub MULtr.anova MULAOVp YYmcp loc2dif mlrreg Mreglde winse winci mlts MULtsreg t1wayv2 pool.a.list esfun esmcp l2v RANGE Quart olsMUL prplot adpchk qrchk qhomt runmean2g ancovamp Qancsm Kmeans TKmeans TKmeans.grp Kmeans.grp lplot2g smstrcom pcorbv4 mlrreg.Stest mlrGtest power.chisq.test bi2KMS binband tworegwb regpreCV locCV esI esImcp ESmainMCP bi2CR SPCA larsR regci M2m.loc skip ancmppb hc4wmc mcslope ZYmediate HuberTun robEst SErob MeanCov MLEst Dp vech getSE gethdot SEML BCI BpBCa RobRsq bi2KMSv2 disc2com wmwloc DEPanc DEPancB lplotPV M1M2 dbetabin regci.inter olshc4.inter ancovaG ancovampG mat2list regpecv idmatch rplotCV SMpre mch2num ddepv2 ddeptr qcomhd qhdplotsm outmah difQplot Dqcomhd Dqdif qwmwhd qwmwhd difQpci bsqrm bsqrmbt qregplots acbinomciv2 longreg psihat bwmarpb con1way logrsm coefalpha z.power hdpb vecnorm regYvar regYsub regYci regYband ols.pred.ci regYhat reg1way ancGpar ancts block.diag reg1wayMC CLASSanc anctsmp ancpar ols.coef reg2ciMC reg2difplot cbmhd reg1wayISO reg1wayISOMC difQpciMC tsregF outproMC olsJ2 ebarplot.med MULtsreg mlts ancCR tsregNW reg2cimcp epowv2 rmblo ols1way ols1wayISO corbMC corbsubMC scorci scorsubMC normTmm rplot Rfit regunstack ols1way2g olsW2g cov.roc reg1mcp qcor scorciMC olsLmcp olsWmcp anctsmcp chregF DregGOLS difregOLS Dancols Dancols_sub1 Dancols_sub2 Dancols_sub DancCR difregYvar difreg Dancts tshd tshdreg ltsreg DregG DregGMC difregMC qcipb Qreg qfun Rcoefalpha Dancova Dancovamp cov.funl rplotCI runse rplotpbCI Danctspb Danctspb.sub DanctspbMC Danctspb.sub anctspb idmatchv2 regcits qhdsm skip.cov skipSPR rmdzeroG rmdzeroGMC rmdG_sub yuendv2 qhdsm2g ancGLOB_sub3 ancGLOB_sub4 ancGLOB_sub5 ancGLOB_pv ancGLOB_sub2 ancGLOB_pv_pts ancGLOB_sub4 q2by2 bd1GLOB bd1GLOB1 rmdzD Dancovapb ancdifplot ancGLOB aov2depth ancovaWMW ghtrim ftrim DancovaV2 DancGLOB_sub ancovaV2 DancGLOB_sub regGmcp OGK wmean.cov medhd2g med.effect med.effect.sub outms wmean.cov

Documented in corb onesampb onestep out outbox outpro pb2gen pball pbcor sband trimpb tsreg twocor twopcor yuen

# TOP
#  Sept 12, 2013

DqdifMC<-function(x,y=NULL,q=.25,nboot=1000,plotit=TRUE,xlab='Group 1 - Group 2',SEED=TRUE,alpha=.05){
#
#  Compare two dependent groups by comparing the
#  q and 1-q quantiles of the difference scores
#
# q should be < .5
# if the groups do not differ, then the difference scores should be symmetric
# about zero.
# In particular, the sum of q and 1-q quantiles should be zero.
#
# q indicates the quantiles to be compared. By default, the .25 and .75 quantiles are used.
#
library(parallel)
if(SEED)set.seed(2)
if(q>=.5)stop('q should be less than .5')
if(!is.null(y)){
xy=elimna(cbind(x,y))
dif=xy[,1]-xy[,2]
}
if(is.null(y))dif=elimna(x)
x=as.matrix(x)
n=length(dif)
if(plotit)akerd(dif,xlab=xlab)
bvec=NA
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec<-mclapply(data,difQMC_sub,dif,q,mc.preschedule=TRUE)
bvec=matl(bvec)
est1=hd(dif,q=q)
est2=hd(dif,q=1-q)
pv=mean(bvec<0)+.5*mean(bvec==0)
p=2*min(c(pv,1-pv))
low<-round((alpha/2)*nboot)+1
up<-nboot-low
sbvec=sort(bvec)
ci=sbvec[low]
ci[2]=sbvec[up]
list(est.q=est1,est.1.minus.q=est2,conf.interval=ci,p.value=p)
}

difQMC_sub<-function(data,dif,q){
es=hd(dif[data],q)+hd(dif[data],1-q)
es
}


ancGparMC<-function(x1,y1,x2,y2,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,eout=FALSE,outfun=outpro,
STAND=TRUE,plotit=TRUE,xlab="X",ylab="Y",ISO=FALSE,...){
#
#  Test hypothesis that for two independent groups, all regression parameters are equal
#  By default the Theil--Sen estimator is used
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Johansen type test statistic.
#
#  ISO=TRUE, ignore intercept, test only the slope parameters.
#
x1=as.matrix(x1)
p=ncol(x1)
p1=p+1
xy=elimna(cbind(x1,y1))
x1=xy[,1:p]
y1=xy[,p1]
x2=as.matrix(x2)
p=ncol(x2)
p1=p+1
xy=elimna(cbind(x2,y2))
x2=xy[,1:p]
y2=xy[,p1]
if(plotit){
xx1=x1
yy1=y1
xx2=x2
yy2=y2
if(ncol(as.matrix(x1))==1){
if(eout){
flag=outfun(cbind(x1,y1),plotit=FALSE,...)$keep
xx1=x1[flag]
yy1=y1[flag]
flag=outfun(cbind(x2,y2),plotit=FALSE,...)$keep
xx2=x2[flag]
yy2=y2[flag]
}
if(xout){
flag=outfun(xx1,plotit=FALSE,...)$keep
xx1=x1[flag]
yy1=y1[flag]
flag=outfun(xx2,plotit=FALSE,...)$keep
xx2=x2[flag]
yy2=y2[flag]
}
plot(c(xx1,xx2),c(yy1,yy2),type="n",xlab=xlab,ylab=ylab)
points(xx1,yy1)
points(xx2,yy2,pch="+")
abline(regfun(xx1,yy1,...)$coef)
abline(regfun(xx2,yy2,...)$coef,lty=2)
}}
x=list()
y=list()
x[[1]]=x1
x[[2]]=x2
y[[1]]=y1
y[[2]]=y2
if(!ISO)output=reg1wayMC(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun,
SEED=SEED,STAND=STAND,...)
if(ISO)output=reg1wayISOMC(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun,
SEED=SEED,STAND=STAND,...)
output
}



qcomhdMC<-function(x,y,q=c(.1,.25,.5,.75,.9),nboot=2000,plotit=TRUE,SEED=TRUE,xlab="Group 1",ylab="Est.1-Est.2",alpha=.05){
#
# Compare quantiles using pb2gen
# via hd estimator. Tied values are allowed.
# 
# When comparing lower or upper quartiles, both power and the probability of Type I error
# compare well to other methods that have been derived.
# q: can be used to specify the quantiles to be compared
# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles
#
#   Function returns p-values and critical p-values based on Hochberg's method.
#
library(parallel)
if(SEED)set.seed(2)
pv=NULL
output=matrix(NA,nrow=length(q),ncol=10)
dimnames(output)<-list(NULL,c("q","n1","n2","est.1","est.2","est.1_minus_est.2","ci.low","ci.up","p_crit","p-value"))
for(i in 1:length(q)){
output[i,1]=q[i]
output[i,2]=length(elimna(x))
output[i,3]=length(elimna(y))
output[i,4]=hd(x,q=q[i])
output[i,5]=hd(y,q=q[i])
output[i,6]=output[i,4]-output[i,5]
temp=qcom.sub(x,y,nboot=nboot,q=q[i],SEED=FALSE,alpha=alpha)
output[i,7]=temp$ci[1]
output[i,8]=temp$ci[2]
output[i,10]=temp$p.value                                                                                                      
}                                                                                                                              
temp=order(output[,10],decreasing=TRUE)                                                                                        
zvec=alpha/c(1:length(q))                                                                                                      
output[temp,9]=zvec                                                                                                            
output <- data.frame(output)                                                                                                   
output$signif=rep("YES",nrow(output))
for(i in 1:nrow(output)){
if(output[temp[i],10]>output[temp[i],9])output$signif[temp[i]]="NO"                                                            
if(output[temp[i],10]<=output[temp[i],9])break                                                                                 
}                                                                                                                              
if(plotit){                                                                                                                    
xax=rep(output[,4],3)                                                                                                          
yax=c(output[,6],output[,7],output[,8])                                                                                        
plot(xax,yax,xlab=xlab,ylab=ylab,type="n")                                                                                     
points(output[,4],output[,6],pch="*")                                                                                          
lines(output[,4],output[,6])                                                                                                   
points(output[,4],output[,7],pch="+")                                                                                          
points(output[,4],output[,8],pch="+")                                                                                          
}                                                                                                                              
output                                                                                                                         
}                     

qcom.sub<-function(x,y,q,alpha=.05,nboot=2000,SEED=TRUE){
#
x<-x[!is.na(x)] # Remove any missing values in x
y<-y[!is.na(y)] # Remove any missing values in y
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot)
datax=listm(t(datax))
datay=listm(t(datay))
bvecx<-mclapply(datax,hd,q,mc.preschedule=TRUE)
bvecy<-mclapply(datay,hd,q,mc.preschedule=TRUE)
bvecx=as.vector(matl(bvecx))
bvecy=as.vector(matl(bvecy))
bvec<-sort(bvecx-bvecy)
low<-round((alpha/2)*nboot)+1
up<-nboot-low
temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot)
sig.level<-2*(min(temp,1-temp))
se<-var(bvec)
list(est.1=hd(x,q),est.2=hd(y,q),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y))
}



smeanMC<-function(m,cop=6,MM=F,op=1,outfun=outogk,cov.fun=rmba,...){
#
# m is an n by p matrix
#
# Compute a multivariate skipped measure of location
#
# op=1:
# Eliminate outliers using a projection method
# That is, first determine center of data using:
# if op=1, a multi-core processor is used via the
# package multicore
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
#  cop=4 uses MVE center
#  cop=5 uses TBS
#  cop=6 uses rmba (Olive's median ball algorithm)
#
# For each point
# consider the line between it and the center,
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# op=2 use mgv (function outmgv) method to eliminate outliers
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# op=3 use outlier method indicated by outfun
#
# Eliminate any outliers and compute means
#  using remaining data.
#
m<-elimna(m)
if(op==1){
temp<-outproMC(m,plotit=F,cop=cop,MM=MM)$keep
}
if(op==2)temp<-outmgv(m,plotit=F,cov.fun=cov.fun)$keep
if(op==3)temp<-outfun(m,plotit=F,...)$keep
val<-apply(m[temp,],2,mean)
val
}

 pb2genMC<-function(x,y,alpha=.05,nboot=2000,est=onestep,SEED=TRUE,pr=TRUE,...){
#
#   Compute a bootstrap confidence interval for the
#   the difference between any two parameters corresponding to
#   independent groups.
#   By default, M-estimators are compared.
#   Setting est=mean, for example, will result in a percentile
#   bootstrap confidence interval for the difference between means.
#   Setting est=onestep will compare M-estimators of location.
#   The default number of bootstrap samples is nboot=2000
#
library(parallel)
x<-x[!is.na(x)] # Remove any missing values in x
y<-y[!is.na(y)] # Remove any missing values in y
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot)
#
datax=listm(datax)
datay=listm(datay)
bvecx<-mclapply(datax,est,mc.preschedule=TRUE,...)
bvecy<-mclapply(datay,est,mc.preschedule=TRUE,...)
bvec=sort(matl(bvecx)-matl(bvecy))
low<-round((alpha/2)*nboot)+1
up<-nboot-low
temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot)
sig.level<-2*(min(temp,1-temp))
se<-var(bvec)
list(est.1=est(x,...),est.2=est(y,...),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y))
}

cbmhdMC<-function(x,y,alpha=.05,q=.25,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){
#
#  Compute a confidence interval for the sum of the qth and (1-q)th quantiles
#  of the distribution of D=X-Y, where X and Y are two independent random variables.
#  The Harrell-Davis estimator is used
#  If the distribution of X and Y are identical, then in particular the
#  distribution of D=X-Y is symmetric about zero.
#
#  plotit=TRUE causes a plot of the difference scores to be created
#  pop=0 adaptive kernel density estimate
#  pop=1 results in the expected frequency curve.
#  pop=2 kernel density estimate (Rosenblatt's shifted histogram)
#  pop=3 boxplot
#  pop=4 stem-and-leaf
#  pop=5 histogram
#
library(parallel)
if(SEED)set.seed(2)
if(q>=.5)stop("q should be less than .5")
if(q<=0)stop("q should be greater than 0")
x<-x[!is.na(x)]
y<-y[!is.na(y)]
n1=length(x)
n2=length(y)
m<-outer(x,y,FUN="-")
q2=1-q
est1=hd(m,q)
est2=hd(m,q2)
data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot)
data=cbind(data1,data2)
data=listm(t(data))
bvec=NA
bvec<-mclapply(data,cbmhd_subMC,x=x,y=y,q=q,q2=q2,n1=n1,n2=n2,mc.preschedule=TRUE)
bvec=list2vec(bvec)
p=mean(bvec>0)+.5*mean(bvec==0)
p=2*min(c(p,1-p))
sbv=sort(bvec)
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
ci=sbv[ilow]
ci[2]=sbv[ihi]
if(plotit){
if(pop==1 || pop==0){
if(length(x)*length(y)>2500){
print("Product of sample sizes exceeds 2500.")
print("Execution time might be high when using pop=0 or 1")
print("If this is case, might consider changing the argument pop")
print("pop=2 might be better")
}}
MM=as.vector(m)
if(pop==0)akerd(MM,xlab=xlab,ylab=ylab)
if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab)
if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab)
if(pop==3)boxplot(MM)
if(pop==4)stem(MM)
if(pop==5)hist(MM,xlab=xlab)
if(pop==6)skerd(MM)
}
list(q=q,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p)
}

cbmhd_subMC<-function(data,cbmhd_subMC,x,y,q,q2,n1,n2){
np1=n1+1
nall=n1+n2
mb=outer(x[data[1:n1]],y[data[np1:nall]],"-")
est=hd(mb,q)+hd(mb,q2)
est
}

lintestMC<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=F,outfun=out,...){
#
# Test the hypothesis that the regression surface is a plane.
# Stute et al. (1998, JASA, 93, 141-149).
#
library(parallel)
set.seed(2)
x<-as.matrix(x)
d<-ncol(x)
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
x<-as.matrix(x)
y<-temp[,d+1]
if(xout){
flag<-outfun(x)$keep
x<-x[flag,]
x<-as.matrix(x)
y<-y[flag]
}
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
reg<-regfun(x,y,...)
yhat<-y-reg$residuals
print("Taking bootstrap sample, please wait.")
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-sqrt(12)*(data-.5) # standardize the random numbers.
data=listm(t(data))
rvalb<-mclapply(data,lintests1,yhat,reg$residuals,mflag,x,regfun,mc.preschedule=TRUE,...)
# An n x nboot matrix of R values
rvalb=matl(rvalb)
rvalb<-rvalb/sqrt(length(y))
dstatb<-apply(abs(rvalb),2,max)
wstatb<-apply(rvalb^2,2,mean)
# compute test statistic
v<-c(rep(1,length(y)))
rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...)
rval<-rval/sqrt(length(y))
dstat<-max(abs(rval))
wstat<-mean(rval^2)
ib<-round(nboot*(1-alpha))
p.value.d<-1-sum(dstat>=dstatb)/nboot
p.value.w<-1-sum(wstat>=wstatb)/nboot
list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w)
}


 lloc<-function(x,est=tmean,...){
if(is.data.frame(x)){
x=as.matrix(x)
x=apply(x,2,as.numeric) # earlier versions of R require this command
}
if(!is.list(x))val<-est(x,...)
if(is.list(x))val=lapply(x,est)
if(is.matrix(x))val<-apply(x,2,est,...)
val
}

reg2g.p2plot<-function(x1,y1,x2,y2,xout=FALSE,outfun=out,xlab="Var 1",ylab="Var 2",zlab="Var 3",regfun=tsreg,COLOR=TRUE,STAND=FALSE,
tick.marks=TRUE,type="p",pr=TRUE,...){
#
# Create a 3D plot of points and plot regression surface for two groups.
#
#  Assumes that the package scatterplot3d has been installed.
#  If not, use the command install.packages("scatterplot3d")
#  assuming you are connected to the web.
#
# The regression method used is specified with the argument
#  regfun.
#
#  type="p", points will be plotted. Use type="n" to get only regression planes plotted
#
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=2)stop("Argument x1 must be stored in a matrix with 2 columns.")
if(ncol(x2)!=2)stop("Argument x2 must be stored in a matrix with 2 columns.")
xy1<-elimna(cbind(x1,y1))
xy2<-elimna(cbind(x2,y2))
if(xout){
if(!STAND)flag1=outfun(xy1[,1:2],plotit=FALSE,...)$keep
if(STAND)flag1=outpro(xy1[,1:2],plotit=FALSE,STAND=TRUE,...)$keep
if(!STAND)flag2=outfun(xy2[,1:2],plotit=FALSE,...)$keep
if(STAND)flag2=outpro(xy2[,1:2],plotit=FALSE,STAND=TRUE,...)$keep
xy1=xy1[flag1,]
xy2=xy2[flag2,]
}
x1=xy1[,1:2]
x2=xy2[,1:2]
y1=xy1[,3]
y2=xy2[,3]
library(scatterplot3d)
temp<-scatterplot3d(rbind(xy1,xy2),xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=tick.marks,type=type)
vals1<-regfun(x1,y1,...)$coef
vals2<-regfun(x2,y2,...)$coef
if(COLOR){
if(pr)print("First group is blue")
temp$plane(vals1,col="blue")
temp$plane(vals2,col="red")
}
if(!COLOR){
temp$plane(vals1)
temp$plane(vals2)
}
list(coef.group.1=vals1,coef.group.2=vals2)
}


regp2plot<-function(x,y,xout=FALSE,outfun=out,xlab="Var 1",ylab="Var 2",zlab="Var 3",regfun=tsreg,COLOR=FALSE,tick.marks=TRUE,...){
#
# Create a 3D plot of points and plot regression surface.
#
#  Assumes that the package scatterplot3d has been installed.
#  If not, use the command install.packages("scatterplot3d")
#  assuming you are connected to the web.
#
# The regression method used is specified with the argument
#  regfun.
#
#  Package scatterplot3d is required. To install it, use the command
#  install.packages("scatterplot3d")
#  while connected to the web
#
x=as.matrix(x)
if(ncol(x)!=2)stop("Argument x must be stored in a matrix with 2 columns.")
xy<-elimna(cbind(x,y))
if(xout){
flag=outfun(xy[,1:2])$keep
xy=xy[flag,]
}
x=xy[,1:2]
y=xy[,3]
library(scatterplot3d)
temp<-scatterplot3d(xy,xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=tick.marks)
vals<-regfun(x,y,...)$coef
if(COLOR)temp$plane(vals,col="blue")
if(!COLOR)temp$plane(vals)
}


reg2plot<-function(x1,y1,x2,y2,regfun=tsreg,xlab="X",ylab="Y",xout=FALSE,outfun=out,
STAND=FALSE,...){
#
#  For convenience
#  plot two regression lines
#
xy=elimna(cbind(x1,y1))
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
if(xout){
if(!STAND)flag=outfun(cbind(x1,y1))$keep
if(STAND)flag=outpro(cbind(x1,y1),STAND=TRUE)$keep
x1=x1[flag]
y1=y1[flag]
flag=outfun(cbind(x2,y2))$keep
x2=x2[flag]
y2=y2[flag]
}
plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab)
points(x1,y1)
points(x2,y2,pch="+")
abline(regfun(x1,y1,...)$coef)
abline(regfun(x2,y2,...)$coef,lty=2)
}

ghdist<-function(n,g=0,h=0){
#
# generate n observations from a g-and-h dist.
#
x<-rnorm(n)
if (g>0){
ghdist<-(exp(g*x)-1)*exp(h*x^2/2)/g
}
if(g==0)ghdist<-x*exp(h*x^2/2)
ghdist
}

wincor<-function(x,y=NULL,tr=.2){
#   Compute the Winsorized correlation between x and y.
#
#   tr is the amount of Winsorization
#   This function also returns the Winsorized covariance
#
#    Pairwise deletion of missing values is performed.
#
if(is.null(y[1])){
y=x[,2]
x=x[,1]
}
sig<-NA
if(length(x)!=length(y))stop("Lengths of vectors are not equal")
m1=cbind(x,y)
m1<-elimna(m1)
nval=nrow(m1)
x<-m1[,1]
y<-m1[,2]
g<-floor(tr*length(x))
xvec<-winval(x,tr)
yvec<-winval(y,tr)
wcor<-cor(xvec,yvec)
wcov<-var(xvec,yvec)
if(sum(x==y)!=length(x)){
test<-wcor*sqrt((length(x)-2)/(1.-wcor^2))
sig<-2*(1-pt(abs(test),length(x)-2*g-2))
}
list(cor=wcor,cov=wcov,siglevel=sig,n=nval)
}

bivar<-function(x){
# compute biweight midvariance of x
m<-median(x)
u<-abs((x-m)/(9*qnorm(.75)*mad(x)))
av<-ifelse(u<1,1,0)
top<-length(x)*sum(av*(x-m)^2*(1-u^2)^4)
bot<-sum(av*(1-u^2)*(1-5*u^2))
bi<-top/bot^2
bi
}

mjse<-function(x,q=.5,na.rm=FALSE){
#
#    Compute the Maritz-Jarrett estimate of the standard error of
#    X sub m, m=[qn+.5]
#    The default value for q is .5
#
if(na.rm)x=elimna(x)
n<-length(x)
m<-floor(q*n+.5)
vec<-seq(along=x)
w<-pbeta(vec/n,m-1,n-m)-pbeta((vec-1)/n,m-1,n-m)  # W sub i values
y<-sort(x)
c1<-sum(w*y)
c2<-sum(w*y*y)
mjse<-sqrt(c2-c1^2)
mjse
}

pbvar<-function(x,beta=.2){
#   Compute the percentage bend midvariance
#
#   beta is the bending constant for omega sub N.
#
pbvar=0
x=elimna(x)
w<-abs(x-median(x))
w<-sort(w)
m<-floor((1-beta)*length(x)+.5)
omega<-w[m]
if(omega>0){
y<-(x-median(x))/omega
z<-ifelse(y>1,1,y)
z<-ifelse(z<(-1),-1,z)
pbvar<-length(x)*omega^2*sum(z^2)/(length(x[abs(y)<1]))^2
}
pbvar
}

win<-function(x,tr=.2){
#
#  Compute the gamma Winsorized mean for the data in the vector x.
#
#  tr is the amount of Winsorization
#
y<-sort(x)
n<-length(x)
ibot<-floor(tr*n)+1
itop<-length(x)-ibot+1
xbot<-y[ibot]
xtop<-y[itop]
y<-ifelse(y<=xbot,xbot,y)
y<-ifelse(y>=xtop,xtop,y)
win<-mean(y)
win
}

hd<-function(x,q=.5,na.rm=TRUE,STAND=NULL){
#
#  Compute the Harrell-Davis estimate of the qth quantile
#
#  The vector x contains the data,
#  and the desired quantile is q
#  The default value for q is .5.
#
if(na.rm)x=elimna(x)
n<-length(x)
m1<-(n+1)*q
m2<-(n+1)*(1-q)
vec<-seq(along=x)
w<-pbeta(vec/n,m1,m2)-pbeta((vec-1)/n,m1,m2)  # W sub i values
y<-sort(x)
hd<-sum(w*y)
hd
}

mestse<-function(x,bend=1.28,op=2){
#
#   Estimate the standard error of M-estimator using Huber's Psi
#   using estimate of influence function
#
n<-length(x)
mestse<-sqrt(sum((ifmest(x,bend,op=2)^2))/(n*(n-1)))
mestse
}

omega<-function(x,beta=.1){
#   Compute the estimate of the measure omega as described in
#   chapter 3.
#   The default value is beta=.1 because this function is used to
#   compute the percentage bend midvariance.
#
y<-abs(x-median(x))
y<-sort(y)
m<-floor((1-beta)*length(x)+.5)
omega<-y[m]/qnorm(1-beta/2) # omega is rescaled to equal sigma
#                             under normality
omega
}

qse<-function(x,q=.5,op=3){
#
#  Compute the standard error of qth sample quantile estimator
#  based on the single order statistic, x sub ([qn+.5]) (See Ch 3)
#
#  Store the data in vector
#  x, and the desired quantile in q
#  The default value for q is .5
#
# op=1 Use Rosenblatt's shifted histogram
# op=2 Use expected frequency curve
# op=3 Use adaptive kernel density estimator
#
y <- sort(x)
n <- length(x)
iq <- floor(q * n + 0.5)
qest <- y[iq]
fhat<-NA
if(op==1)fhat<-kerden(x,q)
if(op==2)fhat<-rdplot(x,pts=qest,pyhat=TRUE,plotit=FALSE)
if(op==3)fhat<-akerd(x,pts=qest,pyhat=TRUE,plotit=FALSE)
if(is.na(fhat[1]))stop("Something wrong, op should be 1 or 2 or 3")
qse<-1/(2*sqrt(length(x))*fhat)
qse
}

winval<-function(x,tr=.2){
#
#  Winsorize the data in the vector x.
#  tr is the amount of Winsorization which defaults to .2.
#
#  This function is used by several other functions that come with this book.
#
y<-sort(x)
n<-length(x)
ibot<-floor(tr*n)+1
itop<-length(x)-ibot+1
xbot<-y[ibot]
xtop<-y[itop]
winval<-ifelse(x<=xbot,xbot,x)
winval<-ifelse(winval>=xtop,xtop,winval)
winval
}

hdseb<-function(x,q=.5,nboot=100,SEED=TRUE){
#
#   Compute bootstrap estimate of the standard error of the
#   Harrell-Davis estimator of the qth quantile.
#   The default quantile is the median, q=.5
#   The default number of bootstrap samples is nboot=100
#
if(SEED)set.seed(2) # set seed of random number generator so that
#   results can be duplicated.
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,hd,q)
hdseb<-sqrt(var(bvec))
hdseb
}

mestseb<-function(x,nboot=1000,bend=1.28,SEED=TRUE){
#
#   Compute bootstrap estimate of the standard error of the
#   M-estimators with Huber's Psi.
#   The default percentage bend is bend=1.28
#   The default number of bootstrap samples is nboot=100
#
if(SEED)set.seed(1) # set seed of random number generator so that
#   results can be duplicated.
data<-matrix(sample(x,size=length(x)*nboot,replace=T),nrow=nboot)
bvec<-apply(data,1,mest)
mestseb<-sqrt(var(bvec))
mestseb
}

onestep<-function(x,bend=1.28,na.rm=FALSE){
#
#  Compute one-step M-estimator of location using Huber's Psi.
#  The default bending constant is 1.28
#
if(na.rm)x<-x[!is.na(x)]
y<-(x-median(x))/mad(x)  #mad in splus is madn in the book.
A<-sum(hpsi(y,bend))
B<-length(x[abs(y)<=bend])
onestep<-median(x)+mad(x)*A/B
onestep
}


trimse<-function(x,tr=.2,na.rm=FALSE){
#
#  Estimate the standard error of the gamma trimmed mean
#  The default amount of trimming is tr=.2.
#
if(na.rm)x<-x[!is.na(x)]
trimse<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x)))
trimse
}

winvar<-function(x,tr=.2,na.rm=FALSE,STAND=NULL){
#
#  Compute the gamma Winsorized variance for the data in the vector x.
#  tr is the amount of Winsorization which defaults to .2.
#
if(na.rm)x<-x[!is.na(x)]
y<-sort(x)
n<-length(x)
ibot<-floor(tr*n)+1
itop<-length(x)-ibot+1
xbot<-y[ibot]
xtop<-y[itop]
y<-ifelse(y<=xbot,xbot,y)
y<-ifelse(y>=xtop,xtop,y)
winvar<-var(y)
winvar
}

mest<-function(x,bend=1.28,na.rm=FALSE){
#
#  Compute M-estimator of location using Huber's Psi.
#  The default bending constant is 1.28
#
if(na.rm)x<-x[!is.na(x)]
if(mad(x)==0)stop("MAD=0. The M-estimator cannot be computed.")
y<-(x-median(x))/mad(x)  #mad in splus is madn in the book.
A<-sum(hpsi(y,bend))
B<-length(x[abs(y)<=bend])
mest<-median(x)+mad(x)*A/B
repeat{
y<-(x-mest)/mad(x)
A<-sum(hpsi(y,bend))
B<-length(x[abs(y)<=bend])
newmest<-mest+mad(x)*A/B
if(abs(newmest-mest) <.0001)break
mest<-newmest
}
mest
}


hpsi<-function(x,bend=1.28){
#
#   Evaluate Huber`s Psi function for each value in the vector x
#   The bending constant defaults to 1.28.
#
hpsi<-ifelse(abs(x)<=bend,x,bend*sign(x))
hpsi
}

hdci<-function(x,q=.5,alpha=.05,nboot=100,SEED=TRUE,pr=TRUE){
#
#   Compute a 1-alpha confidence for qth quantile using the
#   Harrell-Davis estimator in conjunction with the
#   bootstrap estimate of the standard error.
#
#   The default quantile is .5.
#   The default value for alpha is .05.
#
if(alpha!=.05)stop("Use the function qcipb. Generally works well even when alpha is not equal to .05")
x=elimna(x)
if(pr){
if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb")
}
se<-hdseb(x,q,nboot,SEED=SEED)
crit<-.5064/(length(x)^(.25))+1.96
if(q<=.2 || q>=.8){
if(length(x) <=20)crit<-(-6.23)/length(x)+5.01
}
if(q<=.1 || q>=.9){
if(length(x) <=40)crit<-36.2/length(x)+1.31
}
if(length(x)<=10){
print("The number of observations is less than 11.")
print("Accurate critical values have not been determined for this case.")
}
low<-hd(x,q)-crit*se
hi<-hd(x,q)+crit*se
list(ci=c(low,hi),crit=crit,se=se)
}

mestci<-function(x,alpha=.05,nboot=399,bend=1.28,os=F){
#
#   Compute a bootstrap, .95 confidence interval for the
#   M-estimator of location based on Huber's Psi.
#   The default percentage bend is bend=1.28
#   The default number of bootstrap samples is nboot=399
#
#   By default, the fully iterated M-estimator is used. To use the
#   one-step M-estimator instead, set os=T
#
os<-as.logical(os)
if(length(x) <=19)
print("The number of observations is less than 20.")
print("This function might fail due to division by zero,")
print("which in turn causes an error in function hpsi")
print("having to do with a missing value.")
set.seed(1) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(x,size=length(x)*nboot,replace=T),nrow=nboot)
if(!os)bvec<-apply(data,1,mest,bend)
if(os)bvec<-apply(data,1,onestep,bend)
bvec<-sort(bvec)
low<-round((alpha/2)*nboot)
up<-nboot-low
low<-low+1
list(ci=c(bvec[low],bvec[up]))
}


sint<-function(x,alpha=.05,pr=TRUE){
#
#   Compute a 1-alpha confidence interval for the median using
#   the Hettmansperger-Sheather interpolation method.
#
#   The default value for alpha is .05.
#
x=elimna(x)
if(pr){
if(sum(duplicated(x)>0))print("Duplicate values detected; hdpb might have more power")
}
k<-qbinom(alpha/2,length(x),.5)
gk<-pbinom(length(x)-k,length(x),.5)-pbinom(k-1,length(x),.5)
if(gk >= 1-alpha){
gkp1<-pbinom(length(x)-k-1,length(x),.5)-pbinom(k,length(x),.5)
kp<-k+1
}
if(gk < 1-alpha){
k<-k-1
gk<-pbinom(length(x)-k,length(x),.5)-pbinom(k-1,length(x),.5)
gkp1<-pbinom(length(x)-k-1,length(x),.5)-pbinom(k,length(x),.5)
kp<-k+1
}
xsort<-sort(x)
nmk<-length(x)-k
nmkp<-nmk+1
ival<-(gk-1+alpha)/(gk-gkp1)
lam<-((length(x)-k)*ival)/(k+(length(x)-2*k)*ival)
low<-lam*xsort[kp]+(1-lam)*xsort[k]
hi<-lam*xsort[nmk]+(1-lam)*xsort[nmkp]
sint<-c(low,hi)
sint
}



b2ci<-function(x,y,alpha=.05,nboot=2000,est=bivar,...){
#
#   Compute a bootstrap confidence interval for the
#   the difference between any two parameters corresponding to
#   independent groups.
#   By default, biweight midvariances are compared.
#   Setting est=mean, for example, will result in a percentile
#   bootstrap confidence interval for the difference between means.
#   The default number of bootstrap samples is nboot=399
#
x<-x[!is.na(x)] # Remove any missing values in x
y<-y[!is.na(y)] # Remove any missing values in y
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(x,size=length(x)*nboot,replace=T),nrow=nboot)
datay<-matrix(sample(y,size=length(y)*nboot,replace=T),nrow=nboot)
bvecx<-apply(datax,1,est,...)
bvecy<-apply(datay,1,est,...)
bvec<-sort(bvecx-bvecy)
low <- round((alpha/2) * nboot) + 1
up <- nboot - low
temp <- sum(bvec < 0)/nboot + sum(bvec == 0)/(2 * nboot)
sig.level <- 2 * (min(temp, 1 - temp))
list(ci = c(bvec[low], bvec[up]), p.value = sig.level)
}

ecdf<-function(x,val){
#  compute empirical cdf for data in x evaluated at val
#  That is, estimate P(X <= val)
#
ecdf<-length(x[x<=val])/length(x)
ecdf
}

kswsig<-function(m,n,val){
#
#    Compute significance level of the weighted
#    Kolmogorov-Smirnov test statistic
#
#    m=sample size of first group
#    n=sample size of second group
#    val=observed value of test statistic
#
mpn<-m+n
cmat<-matrix(0,m+1,n+1)
umat<-matrix(0,m+1,n+1)
for (i in 1:m-1){
for (j in 1:n-1)cmat[i+1,j+1]<-abs(i/m-j/n)*sqrt(m*n/((i+j)*(1-(i+j)/mpn)))
}
cmat<-ifelse(cmat<=val,1,0)
for (i in 0:m){
for (j in 0:n)if(i*j==0)umat[i+1,j+1]<-cmat[i+1,j+1]
else umat[i+1,j+1]<-cmat[i+1,j+1]*(umat[i+1,j]+umat[i,j+1])
}
term<-lgamma(m+n+1)-lgamma(m+1)-lgamma(n+1)
kswsig<-1.-umat[m+1,n+1]/exp(term)
kswsig
}


binomci<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05){
#  Compute a 1-alpha confidence interval for p, the probability of
#  success for a binomial distribution, using Pratt's method
#
#  y is a vector of 1s and 0s.
#  x is the number of successes observed among n trials
#
if(!is.null(y)){
y=elimna(y)
nn=length(y)
}
if(nn==1)stop("Something is wrong: number of observations is only 1")
n<-nn
if(x!=n && x!=0){
z<-qnorm(1-alpha/2)
A<-((x+1)/(n-x))^2
B<-81*(x+1)*(n-x)-9*n-8
C<-(0-3)*z*sqrt(9*(x+1)*(n-x)*(9*n+5-z^2)+n+1)
D<-81*(x+1)^2-9*(x+1)*(2+z^2)+1
E<-1+A*((B+C)/D)^3
upper<-1/E
A<-(x/(n-x-1))^2
B<-81*x*(n-x-1)-9*n-8
C<-3*z*sqrt(9*x*(n-x-1)*(9*n+5-z^2)+n+1)
D<-81*x^2-9*x*(2+z^2)+1
E<-1+A*((B+C)/D)^3
lower<-1/E
}
if(x==0){
lower<-0
upper<-1-alpha^(1/n)
}
if(x==1){
upper<-1-(alpha/2)^(1/n)
lower<-1-(1-alpha/2)^(1/n)
}
if(x==n-1){
lower<-(alpha/2)^(1/n)
upper<-(1-alpha/2)^(1/n)
}
if(x==n){
lower<-alpha^(1/n)
upper<-1
}
phat<-x/n
list(phat=phat,ci=c(lower,upper),n=n)
}



kssig<-function(m,n,val){
#
#    Compute significance level of the  Kolmogorov-Smirnov test statistic
#    m=sample size of first group
#    n=sample size of second group
#    val=observed value of test statistic
#
cmat<-matrix(0,m+1,n+1)
umat<-matrix(0,m+1,n+1)
for (i in 0:m){
for (j in 0:n)cmat[i+1,j+1]<-abs(i/m-j/n)
}
cmat<-ifelse(cmat<=val,1e0,0e0)
for (i in 0:m){
for (j in 0:n)if(i*j==0)umat[i+1,j+1]<-cmat[i+1,j+1]
else umat[i+1,j+1]<-cmat[i+1,j+1]*(umat[i+1,j]+umat[i,j+1])
}
term<-lgamma(m+n+1)-lgamma(m+1)-lgamma(n+1)
kssig<-1.-umat[m+1,n+1]/exp(term)
kssig
}

meemul<-function(x,alpha=.05){
#
#  Perform Mee's method for all pairs of J independent groups.
#  The familywise type I error probability is controlled by using
#  a critical value from the Studentized maximum modulus distribution.
#
#  The data are assumed to be stored in $x$ in list mode.
#  Length(x) is assumed to correspond to the total number of groups, J
#  It is assumed all groups are independent.
#
#  Missing values are automatically removed.
#
#  The default value for alpha is .05. Any other value results in using
#  alpha=.01.
#
if(!is.list(x))stop("Data must be stored in list mode.")
J<-length(x)
CC<-(J^2-J)/2
test<-matrix(NA,CC,5)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
}
dimnames(test)<-list(NULL,c("Group","Group","phat","ci.lower","ci.upper"))
jcom<-0
crit<-smmcrit(200,CC)
if(alpha!=.05)crit<-smmcrit01(200,CC)
alpha<-1-pnorm(crit)
for (j in 1:J){
for (k in 1:J){
if (j < k){
temp<-mee(x[[j]],x[[k]],alpha)
jcom<-jcom+1
test[jcom,1]<-j
test[jcom,2]<-k
test[jcom,3]<-temp$phat
test[jcom,4]<-temp$ci[1]
test[jcom,5]<-temp$ci[2]
}}}
list(test=test)
}

tsub<-function(isub,x,y,tr){
#
#  Compute test statistic for trimmed means
#  when comparing dependent groups.
#  By default, 20% trimmed means are used.
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by ydbt
#
tsub<-yuend(x[isub],y[isub],tr=tr)$teststat
tsub
}

deciles<-function(x){
#
#  Estimate the deciles for the data in vector x
#  using the Harrell-Davis estimate of the qth quantile
#
xs<-sort(x)
n<-length(x)
vecx<-seq(along=x)
xq<-0
for (i in 1:9){
q<-i/10
m1<-(n+1)*q
m2<-(n+1)*(1-q)
wx<-pbeta(vecx/n,m1,m2)-pbeta((vecx-1)/n,m1,m2)  # W sub i values
xq[i]<-sum(wx*xs)
}
xq
}


kstiesig<-function(x,y,val){
#
#    Compute significance level of the  Kolmogorov-Smirnov test statistic
#    for the data in x and y.
#    This function allows ties among the  values.
#    val=observed value of test statistic
#
m<-length(x)
n<-length(y)
z<-c(x,y)
z<-sort(z)
cmat<-matrix(0,m+1,n+1)
umat<-matrix(0,m+1,n+1)
for (i in 0:m){
for (j in 0:n){
if(abs(i/m-j/n)<=val)cmat[i+1,j+1]<-1e0
k<-i+j
if(k > 0 && k<length(z) && z[k]==z[k+1])cmat[i+1,j+1]<-1
}
}
for (i in 0:m){
for (j in 0:n)if(i*j==0)umat[i+1,j+1]<-cmat[i+1,j+1]
else umat[i+1,j+1]<-cmat[i+1,j+1]*(umat[i+1,j]+umat[i,j+1])
}
term<-lgamma(m+n+1)-lgamma(m+1)-lgamma(n+1)
kstiesig<-1.-umat[m+1,n+1]/exp(term)
kstiesig
}

yuen<-function(x,y,tr=.2,alpha=.05){
#
#  Perform Yuen's test for trimmed means on the data in x and y.
#  The default amount of trimming is 20%
#  Missing values (values stored as NA) are automatically removed.
#
#  A confidence interval for the trimmed mean of x minus the
#  the trimmed mean of y is computed and returned in yuen$ci.
#  The p-value is returned in yuen$p.value
#
#  For an omnibus test with more than two independent groups,
#  use t1way.
#  This function uses winvar from chapter 2.
#
if(tr==.5)stop("Using tr=.5 is not allowed; use a method designed for medians")
if(tr>.25)print("Warning: with tr>.25 type I error control might be poor")
x<-x[!is.na(x)]  # Remove any missing values in x
y<-y[!is.na(y)]  # Remove any missing values in y
h1<-length(x)-2*floor(tr*length(x))
h2<-length(y)-2*floor(tr*length(y))
q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1))
q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1))
df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1)))
crit<-qt(1-alpha/2,df)
dif<-mean(x,tr)-mean(y,tr)
low<-dif-crit*sqrt(q1+q2)
up<-dif+crit*sqrt(q1+q2)
test<-abs(dif/sqrt(q1+q2))
yuen<-2*(1-pt(test,df))
list(n1=length(x),n2=length(y),est.1=mean(x,tr),est.2=mean(y,tr),ci=c(low,up),p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test,crit=crit,df=df)
}

shifthd<-function(x,y,nboot=200,plotit=TRUE,plotop=FALSE,SEED=TRUE){
#
#   Compute confidence intervals for the difference between deciles
#   of two independent groups. The simultaneous probability coverage is .95.
#   The Harrell-Davis estimate of the qth quantile is used.
#   The default number of bootstrap samples is nboot=200
#
#   The results are stored and returned in a 9 by 3 matrix,
#   the ith row corresponding to the i/10 quantile.
#   The first column is the lower end of the confidence interval.
#   The second column is the upper end.
#   The third column is the estimated difference between the deciles
#   (second group minus first).
#
plotit<-as.logical(plotit)
x<-x[!is.na(x)]
y<-y[!is.na(y)]
if(SEED)set.seed(2) # set seed of random number generator so that
#   results can be duplicated.
crit<-80.1/(min(length(x),length(y)))^2+2.73
m<-matrix(0,9,3)
for (i in 1:9){
q<-i/10
print("Working on quantile")
print(q)
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,hd,q)
sex<-var(bvec)
data<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,hd,q)
sey<-var(bvec)
dif<-hd(y,q)-hd(x,q)
m[i,3]<-dif
m[i,1]<-dif-crit*sqrt(sex+sey)
m[i,2]<-dif+crit*sqrt(sex+sey)
}
dimnames(m)<-list(NULL,c("ci.lower","ci.upper","Delta.hat"))
if(plotit){
if(plotop){
xaxis<-c(1:9)/10
xaxis<-c(xaxis,xaxis)
}
if(!plotop)xaxis<-c(deciles(x),deciles(x))
par(pch="+")
yaxis<-c(m[,1],m[,2])
if(!plotop)plot(xaxis,yaxis,ylab="delta",xlab="x (first group)")
if(plotop)plot(xaxis,yaxis,ylab="delta",xlab="Deciles")
par(pch="*")
if(!plotop)points(deciles(x),m[,3])
if(plotop)points(c(1:9)/10,m[,3])
}
m
}

shiftdhd<-function(x,y,nboot=200,plotit=TRUE,plotop=FALSE,SEED=TRUE,pr=TRUE){
#
#   Compute confidence intervals for the difference between deciles
#   of two dependent groups. The simultaneous probability coverage is .95.
#   The Harrell-Davis estimate of the qth quantile is used.
#   The default number of bootstrap samples is nboot=100
#
#   The results are stored and returned in a 9 by 4 matrix,
#   the ith row corresponding to the i/10 quantile.
#   The first column is the lower end of the confidence interval.
#   The second column is the upper end.
#   The third column is the estimated difference between the deciles
#   (second group minus first).
#   The fourth column contains the estimated standard error.
#
#   No missing values are allowed.
#
if(pr){
print("NOTE: for higher power when sampling from a heavy-tailed dist.")
print("or if the goal is to use an alpha value different from .05")
print("use the function qdec2ci")
}
plotit<-as.logical(plotit)
if(SEED)set.seed(2) # set seed of random number generator so that
#   results can be duplicated.
crit<-37/length(x)^(1.4)+2.75
if(pr)print("The approximate .05 critical value is")
if(pr)print(crit)
m<-matrix(0,9,4)
if(pr)print("Taking Bootstrap Samples. Please wait.")
data<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot)
xmat<-matrix(x[data],nrow=nboot,ncol=length(x))
ymat<-matrix(y[data],nrow=nboot,ncol=length(x))
for (i in 1:9){
q<-i/10
bvec<-apply(xmat,1,hd,q)-apply(ymat,1,hd,q)
se<-sqrt(var(bvec))
dif<-hd(y,q)-hd(x,q)
m[i,3]<-dif
m[i,1]<-dif-crit*se
m[i,2]<-dif+crit*se
m[i,4]<-se
}
dimnames(m)<-list(NULL,c("lower","upper","Delta.hat","se"))
if(plotit){
if(plotop){
xaxis<-c(1:9)/10
xaxis<-c(xaxis,xaxis)
}
if(!plotop)xaxis<-c(deciles(x),deciles(x))
par(pch="+")
yaxis<-c(m[,1],m[,2])
if(!plotop)plot(xaxis,yaxis,ylab="delta",xlab="x (first group)")
if(plotop)plot(xaxis,yaxis,ylab="delta",xlab="Deciles")
par(pch="*")
if(!plotop)points(deciles(x),m[,3])
if(plotop)points(c(1:9)/10,m[,3])
}
m
}


smmcrit<-function(nuhat,C){
#
#  Determine the .95 quantile of the C-variate Studentized maximum
#  modulus distribution using linear interpolation on inverse
#  degrees of freedom
#  If C=1, this function returns the .975 quantile of Student's t
#  distribution.
#
if(C-round(C)!=0)stop("The number of contrasts, C, must be an  integer")
if(C>=29)stop("C must be less than or equal to 28")
if(C<=0)stop("C must be greater than or equal to 1")
if(nuhat<2)stop("The degrees of freedom must be greater than or equal to 2")
if(C==1)smmcrit<-qt(.975,nuhat)
if(C>=2){
C<-C-1
m1<-matrix(0,20,27)
m1[1,]<-c(5.57,6.34,6.89,7.31,7.65,7.93,8.17,8.83,8.57,
8.74,8.89,9.03,9.16,9.28,9.39,9.49,9.59, 9.68,
9.77,9.85,9.92,10.00,10.07,10.13,10.20,10.26,10.32)
m1[2,]<-c(3.96,4.43,4.76,5.02,5.23,5.41,5.56,5.69,5.81,
5.92,6.01,6.10,6.18,6.26,6.33,6.39,6.45,6.51,
6.57,6.62,6.67,6.71,6.76,6.80,6.84,6.88, 6.92)
m1[3,]<-c(3.38,3.74,4.01,4.20,4.37,4.50,4.62,4.72,4.82,
4.89,4.97,5.04,5.11,5.17,5.22,5.27,5.32, 5.37,
5.41,5.45,5.49,5.52,5.56,5.59,5.63,5.66,5.69)
m1[4,]<-c(3.09,3.39,3.62,3.79,3.93,4.04,4.14,4.23,4.31,
4.38,4.45,4.51,4.56,4.61,4.66,4.70,4.74,4.78,
4.82,4.85,4.89,4.92,4.95,4.98,5.00,5.03,5.06)
m1[5,]<-c(2.92,3.19,3.39,3.54,3.66,3.77,3.86,3.94,4.01,
4.07,4.13,4.18,4.23,4.28,4.32,4.36,4.39,4.43,
4.46,4.49,4.52,4.55,4.58,4.60,4.63,4.65,4.68)
m1[6,]<-c(2.80,3.06,3.24,3.38,3.49,3.59,3.67,3.74,3.80,
3.86,3.92,3.96,4.01,4.05,4.09,4.13,4.16,4.19,
4.22,4.25,4.28,4.31,4.33,4.35,4.38,4.39,4.42)
m1[7,]<-c(2.72,2.96,3.13,3.26,3.36,3.45,3.53,3.60,3.66,
3.71,3.76,3.81,3.85,3.89,3.93,3.96,3.99, 4.02,
4.05,4.08,4.10,4.13,4.15,4.18,4.19,4.22,4.24)
m1[8,]<-c(2.66,2.89,3.05,3.17,3.27,3.36,3.43,3.49,3.55,
3.60,3.65,3.69,3.73,3.77,3.80,3.84,3.87,3.89,
3.92,3.95,3.97,3.99,4.02,4.04,4.06,4.08,4.09)
m1[9,]<-c(2.61,2.83,2.98,3.10,3.19,3.28,3.35,3.41,3.47,
3.52,3.56,3.60,3.64,3.68,3.71,3.74,3.77,3.79,
3.82,3.85,3.87,3.89,3.91,3.94,3.95, 3.97,3.99)
m1[10,]<-c(2.57,2.78,2.93,3.05,3.14,3.22,3.29,3.35,3.40,
3.45,3.49,3.53,3.57,3.60,3.63,3.66,3.69,3.72,
3.74,3.77,3.79,3.81,3.83,3.85,3.87,3.89,3.91)
m1[11,]<-c(2.54,2.75,2.89,3.01,3.09,3.17,3.24,3.29,3.35,
3.39,3.43,3.47,3.51,3.54,3.57,3.60,3.63,3.65,
3.68,3.70,3.72,3.74,3.76,3.78,3.80,3.82,3.83)
m1[12,]<-c(2.49,2.69,2.83,2.94,3.02,3.09,3.16,3.21,3.26,
3.30,3.34,3.38,3.41,3.45,3.48,3.50,3.53,3.55,
3.58,3.59,3.62,3.64,3.66,3.68,3.69,3.71,3.73)
m1[13,]<-c(2.46,2.65,2.78,2.89,2.97,3.04,3.09,3.15,3.19,
3.24,3.28,3.31,3.35,3.38,3.40,3.43,3.46,3.48,
3.50,3.52,3.54,3.56,3.58,3.59,3.61,3.63,3.64)
m1[14,]<-c(2.43,2.62,2.75,2.85,2.93,2.99,3.05,3.11,3.15,
3.19,3.23,3.26,3.29,3.32,3.35,3.38,3.40,3.42,
3.44,3.46,3.48,3.50,3.52,3.54,3.55,3.57,3.58)
m1[15,]<-c(2.41,2.59,2.72,2.82,2.89,2.96,3.02,3.07,3.11,
3.15,3.19,3.22,3.25,3.28,3.31,3.33,3.36,3.38,
3.39,3.42,3.44,3.46,3.47,3.49,3.50,3.52,3.53)
m1[16,]<-c(2.38,2.56,2.68,2.77,2.85,2.91,2.97,3.02,3.06,
3.09,3.13,3.16,3.19,3.22,3.25,3.27,3.29,3.31,
3.33,3.35,3.37,3.39,3.40,3.42,3.43,3.45,3.46)
m1[17,]<-c(2.35,2.52,2.64,2.73,2.80,2.87,2.92,2.96,3.01,
3.04,3.07,3.11,3.13,3.16,3.18,3.21,3.23,3.25,
3.27,3.29,3.30,3.32,3.33,3.35,3.36,3.37,3.39)
m1[18,]<-c(2.32,2.49,2.60,2.69,2.76,2.82,2.87,2.91,2.95,
2.99,3.02,3.05,3.08,3.09,3.12,3.14,3.17, 3.18,
3.20,3.22,3.24,3.25,3.27,3.28,3.29,3.31,3.32)
m1[19,]<-c(2.29,2.45,2.56,2.65,2.72,2.77,2.82,2.86,2.90,
2.93,2.96,2.99,3.02,3.04,3.06,3.08,3.10, 3.12,
3.14,3.16,3.17,3.19,3.20,3.21,3.23,3.24,3.25)
m1[20,]<-c(2.24,2.39,2.49,2.57,2.63,2.68,2.73,2.77,2.79,
2.83,2.86,2.88,2.91,2.93,2.95,2.97,2.98, 3.01,
3.02,3.03,3.04,3.06,3.07,3.08,3.09,3.11,3.12)
if(nuhat>=200)smmcrit<-m1[20,C]
if(nuhat<200){
nu<-c(2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,30,40,60,200)
temp<-abs(nu-nuhat)
find<-order(temp)
if(temp[find[1]]==0)smmcrit<-m1[find[1],C]
if(temp[find[1]]!=0){
if(nuhat>nu[find[1]]){
smmcrit<-m1[find[1],C]-
(1/nu[find[1]]-1/nuhat)*(m1[find[1],C]-m1[find[1]+1,C])/
(1/nu[find[1]]-1/nu[find[1]+1])
}
if(nuhat<nu[find[1]]){
smmcrit<-m1[find[1]-1,C]-
(1/nu[find[1]-1]-1/nuhat)*(m1[find[1]-1,C]-m1[find[1],C])/
(1/nu[find[1]-1]-1/nu[find[1]])
}
}}
}
smmcrit
}

trimparts<-function(x,tr=.2){
#
#  Compute the trimmed mean, effective sample size, and squared standard error.
#  The default amount of trimming is tr=.2.
#
#  This function is used by other functions described in chapter 6.
#
tm<-mean(x,tr)
h1<-length(x)-2*floor(tr*length(x))
sqse<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1))
trimparts<-c(tm,sqse)
trimparts
}

wsumsq<-function(x,nval){
#
#  Compute the weighted sum of squared differences from the mean.
#  This function is used by b1way
#
wsumsq<-sum(nval*(x-mean(x))^2)/sum(nval)
wsumsq
}

rfanova<-function(x,grp=0){
#
#  Perform Rust-Fligner anova using ranks.
#  x is assumed to have list mode. x[[j]] contains data for jth group.
#  or x is a matrix with columns corresponding to groups.
#
#  missing values are automatically removed.
#  Tied values are a ssumed to occur with probability zero.
#
library(MASS)  # Needed for the function ginv
if(!is.list(x))x<-listm(x)
chk=tlist(x)
if(chk!=0)print("Warning: tied values detected")
xall<-NA
if(sum(grp)==0){
J<-length(x)
grp<-c(1:J)
}
if(sum(grp)>0)J<-length(grp)
nval<-1
nrat<-1
nmax<-0
rbar<-1
mrbar<-0
for (j in grp){
temp<-x[[j]]
temp<-temp[!is.na(temp)] #Missing values are removed.
nrat[j]<-(length(temp)-1)/length(temp)
nval[j]<-length(temp)
if(j==grp[1])xall<-temp
if(j!=grp[1])xall<-c(xall,temp)
if(length(temp)>nmax)nmax<-length(temp)
}
pv<-array(NA,c(J,nmax,J))
tv<-matrix(NA,J,nmax)
rv<-matrix(0,J,nmax)
for (i in 1:J){
data<-x[[i]]
data<-data[!is.na(data)]
for (j in 1:length(data)){
tempr<-data[j]-xall
rv[i,j]<-length(tempr[tempr>=0])
for (l in 1:J){
templ<-x[[l]]
templ<-templ[!is.na(templ)]
temp<-data[j]-templ
pv[i,j,l]<-length(temp[temp>=0])
}
tv[i,j]<-sum(pv[i,j,])-pv[i,j,i]
}
rbar[i]<-sum(rv[i,])/nval[i]
mrbar<-mrbar+sum(rv[i,])
}
amat<-matrix(0,J,J)
for(i in 1:J){
temptv<-tv[i,]
temptv<-temptv[!is.na(temptv)]
amat[i,i]<-(length(temptv)-1)*var(temptv)
for (l in 1:J){
tempp<-pv[l,,i]
tempp<-tempp[!is.na(tempp)]
if(l!=i){
amat[i,i]<-amat[i,i]+(length(tempp)-1)*var(tempp)
}}
for (j in 1:J){
if(j>i){
for (l in 1:J){
temp1<-pv[l,,i]
temp2<-pv[l,,j]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
#if(i!=l && l!=j)amat[i,j]<-(length(temp1)-1)*var(temp1,temp2)
if(i!=l && l!=j)amat[i,j]<-amat[i,j]+(length(temp1)-1)*var(temp1,temp2)
}
temp1<-pv[i,,j]
temp2<-tv[i,]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
amat[i,j]<-amat[i,j]-(length(temp1)-1)*var(temp1,temp2)
temp1<-pv[j,,i]
temp2<-tv[j,]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
amat[i,j]<-amat[i,j]-(length(temp1)-1)*var(temp1,temp2)
}
amat[j,i]<-amat[i,j]
}}
N<-sum(nval)
amat<-amat/N^3
amati<-ginv(amat)
uvec<-1
mrbar<-mrbar/N
for (i in 1:J)uvec[i]<-nval[i]*(rbar[i]-mrbar)/(N*(N+1))
testv<-N*prod(nrat)*uvec%*%amati%*%uvec
test<-testv[1,1]
df<-J-1
siglevel<-1-pchisq(test,df)
list(test=test,siglevel=siglevel,df=df)
}

apanova<-function(data,grp=0){
#
#  Perform Agresti-Pendergast rank test for J dependent groups
#  The data are assumed to be stored in an n by J matrix or
#  in list mode. In the latter case, length(data)=J.
#
if(is.list(data)){
x<-matrix(0,length(data[[1]]),length(data))
for (j in 1:length(data))x[,j]<-data[[j]]
}
if(is.matrix(data))x<-data
if(sum(grp==0))grp<-c(1:ncol(x))
x<-x[,grp]
J<-ncol(x)
n<-nrow(x)
if(n<=20)print("With n<=20, suggest using bprm")
rm<-matrix(rank(x),n,J)
rv<-apply(rm,2,mean)
sm<-(n-1)*winall(rm,tr=0)$cov/(n-J+1)
jm1<-J-1
cv<-diag(1,jm1,J)
for (i in 2:J){
k<-i-1
cv[k,i]<--1
}
cr<-cv%*%rv
ftest<-n*t(cr)%*%solve(cv%*%sm%*%t(cv))%*%cr/(J-1)
df1<-J-1
df2<-(J-1)*(n-1)
siglevel<-1-pf(ftest,df1,df2)
list(FTEST=ftest,df1=df1,df2=df2,siglevel=siglevel)
}
box1way<-function(x,tr=.2,grp=c(1:length(x))){
#
#  A heteroscedastic one-way ANOVA for trimmed means
#  using a generalization of Box's method.
#
#  The data are assumed to be stored in $x$ in list mode.
#  Length(x) is assumed to correspond to the total number of groups.
#  By default, the null hypothesis is that all groups have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
#  Missing values are automatically removed.
#
J<-length(grp)  # The number of groups to be compared
print("The number of groups to be compared is")
print(J)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
svec<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # h is the number of observations in the jth group after trimming.
svec[j]<-((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr))/(h[j]-1)
xbar[j]<-mean(x[[grp[j]]],tr)
}
xtil<-sum(h*xbar)/sum(h)
fval<-h/sum(h)
TEST<-sum(h*(xbar-xtil)^2)/sum((1-fval)*svec)
nu1<-sum((1-fval)*svec)
nu1<-nu1^2/((sum(svec*fval))^2+sum(svec^2*(1-2*fval)))
nu2<-(sum((1-fval)*svec))^2/sum(svec^2*(1-fval)^2/(h-1))
sig<-1-pf(TEST,nu1,nu2)
list(TEST=TEST,nu1=nu1,nu2=nu2,siglevel=sig)
}



pairdepb<-function(x,tr=.2,alpha=.05,grp=0,nboot=599){
#
#   Using the percentile t bootstrap method,
#   compute a .95 confidence interval for all pairwise differences between
#   the trimmed means of dependent groups.
#   By default, 20% trimming is used with B=599 bootstrap samples.
#
#   x can be an n by J matrix or it can have list mode
#
if(is.data.frame(x)) x <- as.matrix(x)
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(sum(grp)==0)grp<-c(1:length(x))
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(grp))
for (j in 1:length(grp))mat[,j]<-x[[grp[j]]]
}
if(is.matrix(x)){
if(sum(grp)==0)grp<-c(1:ncol(x))
mat<-x[,grp]
}
if(sum(is.na(mat)>=1))stop("Missing values are not allowed.")
J<-ncol(mat)
connum<-(J^2-J)/2
bvec<-matrix(0,connum,nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot)
xcen<-matrix(0,nrow(mat),ncol(mat))
for (j in 1:J)xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data
it<-0
for (j in 1:J){
for (k in 1:J){
if(j<k){
it<-it+1
bvec[it,]<-apply(data,1,tsub,xcen[,j],xcen[,k],tr)
# bvec is a connum by nboot matrix containing the bootstrap test statistics.
}}}
bvec<-abs(bvec)  #Doing two-sided confidence intervals
icrit<-round((1-alpha)*nboot)
critvec<-apply(bvec,2,max)
critvec<-sort(critvec)
crit<-critvec[icrit]
psihat<-matrix(0,connum,5)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper"))
test<-matrix(NA,connum,4)
dimnames(test)<-list(NULL,c("Group","Group","test","se"))
it<-0
for (j in 1:J){
for (k in 1:J){
if(j<k){
it<-it+1
estse<-yuend(mat[,j],mat[,k])$se
dif<-mean(mat[,j],tr)-mean(mat[,k],tr)
psihat[it,1]<-grp[j]
psihat[it,2]<-grp[k]
psihat[it,3]<-dif
psihat[it,4]<-dif-crit*estse
psihat[it,5]<-dif+crit*estse
test[it,1]<-grp[j]
test[it,2]<-grp[k]
test[it,3]<-yuend(mat[,j],mat[,k])$teststat
test[it,4]<-estse
}}}
list(test=test,psihat=psihat,crit=crit)
}


johan<-function(cmat,vmean,vsqse,h,alpha=.05){
#
#  This function is used by other functions that come with this book,
#  and it can be used to test hypothesis not covered in the text.
#
#  The function performs Johansen's test of C mu = 0 for p independent groups,
#  where C is a k by p matrix of rank k and mu is a p by 1 matrix of
#  of unknown trimmed means.
#  The argument cmat contains the matrix C.
#  vmean is a vector of length p containing the p trimmed means
#  vsqe is a diagonal matrix containing the squared standard errors of the
#  the trimmed means in vmean.
#  h is a vector containing the effective sample sizes
#
yvec<-matrix(vmean,length(vmean),1)
if(!is.matrix(vsqse))vsqse<-diag(vsqse)
test<-cmat%*%vsqse%*%t(cmat)
invc<-solve(test)
test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec
R<-vsqse%*%t(cmat)%*%invc%*%cmat
A<-sum(diag((diag(R))^2/diag(h-1)))
df<-nrow(cmat)
crit<-qchisq(1-alpha,df)
crit<-crit+(crit/(2*df))*A*(1+3*crit/(df+2))
list(teststat=test[1],crit=crit[1])
}



t1waysub<-function(tm,sqse,hval){
#
#  Used by t1waybt to compute Welch test statistic based on trimmed means
#  and squared standard errors stored in tm and sqse
#
w<-1/sqse
uval<-sum(w)
xtil<-sum(w*tm)/uval
A<-sum(w*(tm-xtil)^2)/(length(tm)-1)
B<-sum((1-w/uval)^2/(hval-1))
t1waysub<-A/(B+1)
t1waysub
}

winall<-function(m,tr=.2){
#
#    Compute the Winsorized correlation and covariance matrix for the
#    data in the n by p matrix m.
#
#    This function also returns the two-sided significance level
#
if(is.data.frame(m))m=as.matrix(m)
if(!is.matrix(m))stop("The data must be stored in a n by p matrix")
wcor<-matrix(1,ncol(m),ncol(m))
wcov<-matrix(0,ncol(m),ncol(m))
siglevel<-matrix(NA,ncol(m),ncol(m))
for (i in 1:ncol(m)){
ip<-i
for (j in ip:ncol(m)){
val<-wincor(m[,i],m[,j],tr)
wcor[i,j]<-val$cor
wcor[j,i]<-wcor[i,j]
if(i==j)wcor[i,j]<-1
wcov[i,j]<-val$cov
wcov[j,i]<-wcov[i,j]
if(i!=j){
siglevel[i,j]<-val$siglevel
siglevel[j,i]<-siglevel[i,j]
}
}
}
list(cor=wcor,cov=wcov,p.values=siglevel)
}



johansp<-function(cmat,vmean,vsqse,h,J,K){
#
#  This function is used by other functions that come with this book,
#  and it can be used to test hypotheses not covered in the text.
#
#  The function performs Johansen's test of C mu = 0 for
#  a split-plot design where the first factor has independent groups,
#  while the second factor is within subjects,
#  C is a k by p matrix of rank k and mu is a p by 1 matrix of
#  of unknown trimmed means.
#  The argument cmat contains the matrix C.
#  vmean is a vector of length p containing the p trimmed means
#  vsqe is a block diagonal matrix, the jth block being the
#  estimated covariances among the trimmed means
#  in the jth level of factor A,
#  the trimmed means are in vmean,
#  h is a vector of length J containing the effective sample sizes for
#  the jth level of factor A.
#
p<-J*K
yvec<-matrix(vmean,length(vmean),1)
test<-cmat%*%vsqse%*%t(cmat)
invc<-solve(test)
test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec
temp<-0
klim<-1-K
kup<-0
for (j in 1:J){
klim<-klim+K
kup<-kup+K
Q<-matrix(0,p,p) #  create Q sub j
for (k in klim:kup)Q[k,k]<-1
mtem<-vsqse%*%t(cmat)%*%invc%*%cmat%*%Q
temp[j]<-(sum(diag(mtem%*%mtem))+(sum(diag(mtem)))^2)/(h[j]-1)
}
A<-.5*sum(temp)
df1<-nrow(cmat)
df2<-nrow(cmat)*(nrow(cmat)+2)/(3*A)
cval<-nrow(cmat)+2*A-6*A/(nrow(cmat)+2)
test<-test/cval
sig<-1-pf(test,df1,df2)
list(teststat=test[1],siglevel=sig)
}


kron<-function(m1,m2){
#  compute the Kronecker product of the two matrices m1 and m2.
#
m1<-as.matrix(m1) # Vectors of length p are converted to a p by 1 matrix
m2<-as.matrix(m2)
kron<-vector(mode="numeric",length=0)
for(i in 1:nrow(m1)){
m3<-m1[i,1]*m2
for(j in 2:ncol(m1))m3<-cbind(m3,m1[i,j]*m2)
if(i==1)kron<-m3
if(i>=2)kron<-rbind(kron,m3)
}
kron
}

rmanova<-function(x,tr=.2,grp=c(1:length(x))){
#
#  A heteroscedastic one-way repeated measures ANOVA for trimmed means.
#
#  The data are assumed to be stored in $x$ which can
#  be either an n by J matrix, or an R variable having list mode.
#  If the data are stored in list mode,
#  length(x) is assumed to correspond to the total number of groups.
#  By default, the null hypothesis is that all group have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
J<-length(grp)  # The number of groups to be compared
print("The number of groups to be compared is")
print(J)
m1<-matrix(x[[grp[1]]],length(x[[grp[1]]]),1)
for(i in 2:J){     # Put the data into an n by J matrix
m2<-matrix(x[[grp[i]]],length(x[[i]]),1)
m1<-cbind(m1,m2)
}
}
if(is.matrix(x)){
if(length(grp)<ncol(x))m1<-as.matrix(x[,grp])
if(length(grp)>=ncol(x))m1<-as.matrix(x)
J<-ncol(x)
print("The number of groups to be compared is")
print(J)
}
#
#  Raw data are now in the matrix m1
#
m2<-matrix(0,nrow(m1),ncol(m1))
xvec<-1
g<-floor(tr*nrow(m1))  #2g is the number of observations trimmed.
for(j in 1:ncol(m1)){  # Putting Winsorized values in m2
m2[,j]<-winval(m1[,j],tr)
xvec[j]<-mean(m1[,j],tr)
}
xbar<-mean(xvec)
qc<-(nrow(m1)-2*g)*sum((xvec-xbar)^2)
m3<-matrix(0,nrow(m1),ncol(m1))
m3<-sweep(m2,1,apply(m2,1,mean))  # Sweep out rows
m3<-sweep(m3,2,apply(m2,2,mean))  # Sweep out columns
m3<-m3+mean(m2)  # Grand Winsorized mean swept in
qe<-sum(m3^2)
test<-(qc/(qe/(nrow(m1)-2*g-1)))
#
#  Next, estimate the adjusted degrees of freedom
#
v<-winall(m1,tr=tr)$cov
vbar<-mean(v)
vbard<-mean(diag(v))
vbarj<-1
for(j in 1:J){
vbarj[j]<-mean(v[j,])
}
A<-J*J*(vbard-vbar)^2/(J-1)
B<-sum(v*v)-2*J*sum(vbarj^2)+J*J*vbar^2
ehat<-A/B
etil<-(nrow(m2)*(J-1)*ehat-2)/((J-1)*(nrow(m2)-1-(J-1)*ehat))
etil<-min(1.,etil)
df1<-(J-1)*etil
df2<-(J-1)*etil*(nrow(m2)-2*g-1)
siglevel<-1-pf(test,df1,df2)
list(test=test,df=c(df1,df2),siglevel=siglevel,tmeans=xvec,ehat=ehat,etil=etil)
}



trimpartt<-function(x,con){
#
#  This function is used by other functions described in chapter 6.
#
trimpartt<-sum(con*x)
trimpartt
}

bptdmean<-function(isub,x,tr){
#
#  Compute  trimmed means
#  when comparing dependent groups.
#  By default, 20% trimmed means are used.
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by bptd.
#
bptdmean<-mean(x[isub],tr)
bptdmean
}


bptdpsi<-function(x,con){
# Used by bptd to compute bootstrap psihat values
#
bptdpsi<-sum(con*x)
bptdpsi
}
bptdsub<-function(isub,x,tr,con){
#
#  Compute test statistic for trimmed means
#  when comparing dependent groups.
#  By default, 20% trimmed means are used.
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#  con is a J by c matrix. The cth column contains
#  a vector of contrast coefficients.
#
#  This function is used by bptd.
#
h1 <- nrow(x) - 2 * floor(tr * nrow(x))
se<-0
for(j in 1:ncol(x)){
for(k in 1:ncol(x)){
djk<-(nrow(x) - 1) * wincor(x[isub,j],x[isub,k], tr)$cov
se<-se+con[j]*con[k]*djk
}
}
se/(h1*(h1-1))
}

selby2<-function(m,grpc,coln=NA){
# Create categories according to the grpc[1] and grpc[2] columns
# of the matrix m. The function puts the values in column coln into
# a vector having list mode.
#
if(is.na(coln))stop("The argument coln is not specified")
if(length(grpc)>4)stop("The argument grpc must have length less than or equal to 4")
x<-vector("list")
ic<-0
if(length(grpc)==2){
cat1<-selby(m,grpc[1],coln)$grpn
cat2<-selby(m,grpc[2],coln)$grpn
for (i1 in 1:length(cat1)){
for (i2 in 1:length(cat2)){
temp<-NA
it<-0
for (i in 1:nrow(m)){
if(sum(m[i,c(grpc[1],grpc[2])]==c(cat1[i1],cat2[i2]))==2){
it<-it+1
temp[it]<-m[i,coln]
}
}
if(!is.na(temp[1])){
ic<-ic+1
x[[ic]]<-temp
if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2]),1,2)
if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2]))
}
}}
}
if(length(grpc)==3){
cat1<-selby(m,grpc[1],coln)$grpn
cat2<-selby(m,grpc[2],coln)$grpn
cat3<-selby(m,grpc[3],coln)$grpn
x<-vector("list")
ic<-0
for (i1 in 1:length(cat1)){
for (i2 in 1:length(cat2)){
for (i3 in 1:length(cat3)){
temp<-NA
it<-0
for (i in 1:nrow(m)){
if(sum(m[i,c(grpc[1],grpc[2],grpc[3])]==c(cat1[i1],cat2[i2],cat3[i3]))==3){
it<-it+1
temp[it]<-m[i,coln]
}}
if(!is.na(temp[1])){
ic<-ic+1
x[[ic]]<-temp
if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3]),1,3)
if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3]))
}}}}
}
if(length(grpc)==4){
cat1<-selby(m,grpc[1],coln)$grpn
cat2<-selby(m,grpc[2],coln)$grpn
cat3<-selby(m,grpc[3],coln)$grpn
cat4<-selby(m,grpc[4],coln)$grpn
x<-vector("list")
ic<-0
for (i1 in 1:length(cat1)){
for (i2 in 1:length(cat2)){
for (i3 in 1:length(cat3)){
for (i4 in 1:length(cat4)){
temp<-NA
it<-0
for (i in 1:nrow(m)){
if(sum(m[i,c(grpc[1],grpc[2],grpc[3],grpc[4])]==c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]))==4){
it<-it+1
temp[it]<-m[i,coln]
}}
if(!is.na(temp[1])){
ic<-ic+1
x[[ic]]<-temp
if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]),1,4)
if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]))
}}}}}
}
list(x=x,grpn=grpn)
}


lindmsub<-function(isub,x,est,...){
#
# isub is a vector of length n containing integers between
# randomly sampled with replacement from 1,...,n.
#
# Used by lindm to convert an n by B matrix of bootstrap values,
# randomly sampled from 1, ..., n, with replacement, to a
# J by B matrix of measures of location.
#
#
lindmsub<-est(x[isub],...)
lindmsub
}
lindm<-function(x,con=0,est=onestep,grp=0,alpha=.05,nboot=399,...){
#
#   Compute a 1-alpha confidence interval for a set of d linear contrasts
#   involving M-estimators associated with the marginal distributions
#   using a bootstrap method.
#   Dependent groups are assumed.
#
#   The data are assumed to be stored in x in list mode.  Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#
#   con is a J by d matrix containing the contrast coefficents of interest.
#   If unspecified, all pairwise comparisons are performed.
#   For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first two trimmed means is
#   equal to the sum of the second two, and (2) the difference between
#   the first two is equal to the difference between the trimmed means of
#   groups 5 and 6.
#
#   The default number of bootstrap samples is nboot=399
#
#   This function uses the function trimpartt written for this
#   book.
#
#
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(sum(grp)==0)grp<-c(1:length(x))
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(grp))
for (j in 1:length(grp))mat[,j]<-x[[grp[j]]]
}
if(is.matrix(x)){
if(sum(grp)==0)grp<-c(1:ncol(x))
mat<-x[,grp]
}
mat<-elimna(mat)
J<-ncol(mat)
Jm<-J-1
d<-(J^2-J)/2
if(sum(con^2)==0){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
if(nrow(con)!=ncol(mat))stop("The number of groups does not match the number of contrast coefficients.")
m1<-matrix(0,J,nboot)
m2<-1 # Initialize m2
mval<-1
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot)
#    data is B by n matrix
xcen<-matrix(0,nrow(mat),ncol(mat)) #An n by J matrix
for (j in 1:J){xcen[,j]<-mat[,j]-est(mat[,j],...) #Center data
mval[j]<-est(mat[,j],...)
}
for (j in 1:J)m1[j,]<-apply(data,1,lindmsub,xcen[,j],est,...) # A J by nboot matrix.
m2<-var(t(m1)) # A J by J covariance matrix corresponding to the nboot values.
boot<-matrix(0,ncol(con),nboot)
bot<-1
for (d in 1:ncol(con)){
top<-apply(m1,2,trimpartt,con[,d])
#            A vector of length nboot containing psi hat values
consq<-con[,d]^2
bot[d]<-trimpartt(diag(m2),consq)
for (j1 in 1:J){
for (j2 in 1:J){
if(j1<j2)bot[d]<-bot[d]+2*con[j1,d]*con[j2,d]*m2[j1,j2]
}}
boot[d,]<-abs(top)/sqrt(bot[d])
}
testb<-apply(boot,2,max)
ic<-round((1-alpha)*nboot)
testb<-sort(testb)
psihat<-matrix(0,ncol(con),5)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper","se"))
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-trimpartt(mval,con[,d])
psihat[d,3]<-psihat[d,2]-testb[ic]*sqrt(bot[d])
psihat[d,4]<-psihat[d,2]+testb[ic]*sqrt(bot[d])
psihat[d,5]<-sqrt(bot[d])
}
list(psihat=psihat,crit=testb[ic],con=con)
}


smmcrit01<-function(nuhat,C){
#
#  Determine the .99 quantile of the C-variate Studentized maximum
#  modulus distribution using linear interpolation on inverse
#  degrees of freedom
#  If C=1, this function returns the .995 quantile of Student's t
#  distribution.
#
if(C-round(C)!=0)stop("The number of contrasts, C, must be an  integer")
if(C>=29)stop("C must be less than or equal to 28")
if(C<=0)stop("C must be greater than or equal to 1")
if(nuhat<2)stop("The degrees of freedom must be greater than or equal to 2")
if(C==1)smmcrit01<-qt(.995,nuhat)
if(C>=2){
C<-C-1
m1<-matrix(0,20,27)
m1[1,]<-c(12.73,14.44,15.65,16.59,17.35,17.99,18.53,19.01,19.43,
19.81,20.15,20.46,20.75,20.99,20.99,20.99,20.99,20.99,
22.11,22.29,22.46,22.63,22.78,22.93,23.08,23.21,23.35)
m1[2,]<-c(7.13,7.91,8.48,8.92,9.28,9.58,9.84,10.06,10.27,
10.45,10.61,10.76,10.90,11.03,11.15,11.26,11.37,11.47,
11.56,11.65,11.74,11.82,11.89,11.97,12.07,12.11,12.17)
m1[3,]<-c(5.46,5.99,6.36,6.66,6.89,7.09,7.27,7.43,7.57,
7.69,7.80,7.91,8.01,8.09,8.17,8.25,8.32,8.39,
8.45,8.51,8.57,8.63,8.68,8.73,8.78,8.83,8.87)
m1[4,]<-c(4.70,5.11,5.39,5.63,5.81,5.97,6.11,6.23,6.33,
6.43,6.52,6.59,6.67,6.74,6.81,6.87,6.93,6.98,
7.03,7.08,7.13,7.17,7.21,7.25,7.29,7.33,7.36)
m1[5,]<-c(4.27,4.61,4.85,5.05,5.20,5.33,5.45,5.55,5.64,
5.72,5.79,5.86,5.93,5.99,6.04,6.09,6.14,6.18,
6.23,6.27,6.31,6.34,6.38,6.41,6.45,6.48,6.51)
m1[6,]<-c(3.99,4.29,4.51,4.68,4.81,4.93,5.03,5.12,5.19,
5.27,5.33,5.39,5.45,5.50,5.55,5.59,5.64,5.68,
5.72,5.75,5.79,5.82,5.85,5.88,5.91,5.94,5.96)
m1[7,]<-c(3.81,4.08,4.27,4.42,4.55,4.65,4.74,4.82,4.89,
4.96,5.02,5.07,5.12,5.17,5.21,5.25,5.29, 5.33,
5.36,5.39,5.43,5.45,5.48,5.51,5.54,5.56,5.59)
m1[8,]<-c(3.67,3.92,4.10,4.24,4.35,4.45,4.53,4.61,4.67,
4.73,4.79,4.84,4.88,4.92,4.96,5.01,5.04,5.07,
5.10,5.13,5.16,5.19,5.21,5.24,5.26,5.29,5.31)
m1[9,]<-c(3.57,3.80,3.97,4.09,4.20,4.29,4.37,4.44,4.50,
4.56,4.61,4.66,4.69,4.74,4.78,4.81,4.84,4.88,
4.91,4.93,4.96,4.99,5.01,5.03,5.06,5.08,5.09)
m1[10,]<-c(3.48,3.71,3.87,3.99,4.09,4.17,4.25,4.31,4.37,
4.42,4.47,4.51,4.55,4.59,4.63,4.66,4.69,4.72,
4.75,4.78,4.80,4.83,4.85,4.87,4.89,4.91,4.93)
m1[11,]<-c(3.42,3.63,3.78,3.89,.99,4.08,4.15,4.21,4.26,
4.31,4.36,4.40,4.44,4.48,4.51,4.54,4.57,4.59,
4.62,4.65,4.67,4.69,4.72,4.74,4.76,4.78,4.79)
m1[12,]<-c(3.32,3.52,3.66,3.77,3.85,3.93,3.99,.05,4.10,
4.15,4.19,4.23,4.26,4.29,4.33,4.36,4.39,4.41,
4.44,4.46,4.48,4.50,4.52,4.54,4.56,4.58,4.59)
m1[13,]<-c(3.25,3.43,3.57,3.67,3.75,3.82,3.88,3.94,3.99,
4.03,4.07,4.11,4.14,4.17,4.19,4.23,4.25,4.28,
4.29,4.32,4.34,4.36,4.38,4.39,4.42,4.43,4.45)
m1[14,]<-c(3.19,3.37,3.49,3.59,3.68,3.74,3.80,3.85,3.89,
3.94,3.98,4.01,4.04,4.07,4.10,4.13,4.15,4.18,
4.19,4.22,4.24,4.26,4.28,4.29,4.31,4.33,4.34)
m1[15,]<-c(3.15,3.32,3.45,3.54,3.62,3.68,3.74,3.79,3.83,
3.87,3.91,3.94,3.97,3.99,4.03,4.05,4.07,4.09,
4.12,4.14,4.16,4.17,4.19,4.21,4.22,4.24,4.25)
m1[16,]<-c(3.09,3.25,3.37,3.46,3.53,3.59,3.64,3.69,3.73,
3.77,3.80,3.83,3.86,3.89,3.91,3.94,3.96,3.98,
4.00,4.02,4.04,4.05,4.07,4.09,4.10,4.12,4.13)
m1[17,]<-c(3.03,3.18,3.29,3.38,3.45,3.50,3.55,3.59,3.64,
3.67,3.70,3.73,3.76,3.78,3.81,3.83,3.85,3.87,
3.89,3.91,3.92,3.94,3.95,3.97,3.98,4.00,4.01)
m1[18,]<-c(2.97,3.12,3.22,3.30,3.37,3.42,3.47,3.51,3.55,
3.58,3.61,3.64,3.66,3.68,3.71,3.73,3.75,3.76,
3.78,3.80,3.81,3.83,3.84,3.85,3.87,3.88,3.89)
m1[19,]<-c(2.91,3.06,3.15,3.23,3.29,3.34,3.38,3.42,3.46,
3.49,3.51,3.54,3.56,3.59,3.61,3.63,3.64,3.66,
3.68,3.69,3.71,3.72,3.73,3.75,3.76,3.77,3.78)
m1[20,]<-c(2.81,2.93,3.02,3.09,3.14,3.19,3.23,3.26,3.29,
3.32,3.34,3.36,3.38,3.40,.42,.44,3.45,3.47,
3.48,3.49,3.50,3.52,3.53,3.54,3.55,3.56,3.57)
if(nuhat>=200)smmcrit01<-m1[20,C]
if(nuhat<200){
nu<-c(2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,30,40,60,200)
temp<-abs(nu-nuhat)
find<-order(temp)
if(temp[find[1]]==0)smmcrit01<-m1[find[1],C]
if(temp[find[1]]!=0){
if(nuhat>nu[find[1]]){
smmcrit01<-m1[find[1],C]-
(1/nu[find[1]]-1/nuhat)*(m1[find[1],C]-m1[find[1]+1,C])/
(1/nu[find[1]]-1/nu[find[1]+1])
}
if(nuhat<nu[find[1]]){
smmcrit01<-m1[find[1]-1,C]-
(1/nu[find[1]-1]-1/nuhat)*(m1[find[1]-1,C]-m1[find[1],C])/
(1/nu[find[1]-1]-1/nu[find[1]])
}
}}
}
smmcrit01
}

pbtest<-function(m,beta=.1){
#
#    Test H0: R(pb)=I, the hypothesis that the percentage
#    bend correlation matrix equal the identity matrix
#    for the data stored in the n by p matrix m
#
#
n<-nrow(m)
nu<-n-2
a<-nu-.5
b<-48*a^2
nmm<-ncol(m)-1
c<-matrix(0,ncol(m),ncol(m))
for (i in 1:nmm){
ip1<-i+1
for (j in ip1:ncol(m)){
tjk<-tjk*sqrt(nu/(1.-tjk^2))
c[i,j]<-sqrt(a*log(1+tjk^2/nu))
c[i,j]<-c[i,j]+(c[i,j]^3+3*c[i,j])/b
c[i,j]<-c[i,j]-(4*c[i,j]^7+33*c[i,j]^5+240*c[i,j]^3+855*c[i,j])/(10*b^2+8*b*c[i,j]^4+1000*b)
}
}
h<-sum(c)
sig<-1-pchisq(h,ncol(m)*(ncol(m)-1)/2)
list(teststat=h,siglevel=sig)
}

tau<-function(x,y=NULL,alpha=.05){
#
#   Compute Kendall's tau plus a 1-alpha confidence interval
#   using the method recommended by Long and Cliff (1997).
#
#   y=NULL, assume x is a matrix with two columsn
#
if(is.null(y))m=elimna(x) 
if(!is.null(y))m=elimna(cbind(x,y)) # casewise deletion of missing values.
x=m[,1]
y=m[,2]
xdif<-outer(x,x,FUN="-")
ydif<-outer(y,y,FUN="-")
tv<-sign(xdif)*sign(ydif)
dbar<-apply(tv,1,mean)
n<-length(x)
tau<-sum(tv)/(n*(n-1))
A<-sum((dbar-tau)^2)/(n-1)
B<-(n*(n-1)*(-1)*tau^2+sum(tv^2))/(n^2-n-1)
C<-(4*(n-2)*A+2*B)/(n*(n-1))
crit<-qnorm(alpha/2)
cilow<-tau+crit*sqrt(C)
cihi<-tau-crit*sqrt(C)
test<-tau/sqrt((2*(2*n+5))/(9*n*(n-1)))
siglevel<-2*(1-pnorm(abs(test)))
list(cor=tau,ci=c(cilow,cihi),siglevel=siglevel)
}

elimna<-function(m){
#
# remove any rows of data having missing values
#
if(is.null(dim(m)))m<-as.matrix(m)
ikeep<-c(1:nrow(m))
for(i in 1:nrow(m))if(sum(is.na(m[i,])>=1))ikeep[i]<-0
elimna<-m[ikeep[ikeep>=1],]
elimna
}

pball<-function(m,beta=.2){
#
#    Compute the percentage bend correlation matrix for the
#    data in the n by p matrix m.
#
#    This function also returns the two-sided significance level
#    for all pairs of variables, plus a test of zero correlations
#    among all pairs. (See chapter 6 for details.)
#
if(!is.matrix(m))stop("Data must be stored in an n by p matrix")
pbcorm<-matrix(0,ncol(m),ncol(m))
temp<-matrix(1,ncol(m),ncol(m))
siglevel<-matrix(NA,ncol(m),ncol(m))
cmat<-matrix(0,ncol(m),ncol(m))
for (i in 1:ncol(m)){
ip1<-i
for (j in ip1:ncol(m)){
if(i<j){
pbc<-pbcor(m[,i],m[,j],beta)
pbcorm[i,j]<-pbc$cor
temp[i,j]<-pbcorm[i,j]
temp[j,i]<-pbcorm[i,j]
siglevel[i,j]<-pbc$siglevel
siglevel[j,i]<-siglevel[i,j]
}
}
}
tstat<-pbcorm*sqrt((nrow(m)-2)/(1-pbcorm^2))
cmat<-sqrt((nrow(m)-2.5)*log(1+tstat^2/(nrow(m)-2)))
bv<-48*(nrow(m)-2.5)^2
cmat<-cmat+(cmat^3+3*cmat)/bv-(4*cmat^7+33*cmat^5+240^cmat^3+855*cmat)/(10*bv^2+8*bv*cmat^4+1000*bv)
H<-sum(cmat^2)
df<-ncol(m)*(ncol(m)-1)/2
h.siglevel<-1-pchisq(H,df)
list(pbcorm=temp,siglevel=siglevel,H=H,H.siglevel=h.siglevel)
}






pbos<-function(x,beta=.2){
#
#    Compute the one-step percentage bend measure of location
#
#
temp<-sort(abs(x-median(x)))
omhatx<-temp[floor((1-beta)*length(x))]
psi<-(x-median(x))/omhatx
i1<-length(psi[psi<(-1)])
i2<-length(psi[psi>1])
sx<-ifelse(psi<(-1),0,x)
sx<-ifelse(psi>1,0,sx)
pbos<-(sum(sx)+omhatx*(i2-i1))/(length(x)-i1-i2)
pbos
}


tauall<-function(m){
#
#    Compute Kendall's tau for the
#    data in the n by p matrix m.
#
#    This function also returns the two-sided significance level
#    for all pairs of variables, plus a test of zero correlations
#    among all pairs. (See chapter 6 for details.)
#
if(!is.matrix(m))stop("Data must be stored in an n by p matrix")
taum<-matrix(0,ncol(m),ncol(m))
siglevel<-matrix(NA,ncol(m),ncol(m))
for (i in 1:ncol(m)){
ip1<-i
for (j in ip1:ncol(m)){
if(i<j){
pbc<-tau(m[,i],m[,j])
taum[i,j]<-pbc$cor
taum[j,i]<-pbc$cor
siglevel[i,j]<-pbc$siglevel
siglevel[j,i]<-siglevel[i,j]
}
}
}
list(taum=taum,siglevel=siglevel)
}

biloc<-function(x){
#
# compute biweight measure of location
# This function is used by relplot
#
m<-median(x)
u<-abs((x-m)/(9*mad(x)*qnorm(.75)))
top<-sum((x[u<=1]-m)*(1-u[u<=1]^2)^2)
bot<-sum((1-u[u<=1]^2)**2)
bi<-m+top/bot^2
bi
}

relfun<-function(xv,yv,C=36,epsilon=.0001,plotit=TRUE,xlab="X",ylab="Y"){
#   Compute the measures of location, scale and correlation used in the
#   bivariate boxplot of Goldberg and Iglewicz,
#   Technometrics, 1992, 34, 307-320.
#
#   The code in relplot plots the boxplot.
#
#   This code assumes the data are in xv and yv
#
#   This code uses the function biloc and
#   bivar
#
plotit<-as.logical(plotit)
tx<-biloc(xv)
ty<-biloc(yv)
sx<-sqrt(bivar(xv))
sy<-sqrt(bivar(yv))
z1<-(xv-tx)/sx+(yv-ty)/sy
z2<-(xv-tx)/sx-(yv-ty)/sy
ee<-((z1-biloc(z1))/sqrt(bivar(z1)))^2+
((z2-biloc(z2))/sqrt(bivar(z2)))^2
w<-(1-ee/C)^2
if(length(w[w==0])>=length(xv)/2)warning("More than half of the w values equal zero")
sumw<-sum(w[ee<C])
tempx<-w*xv
txb<-sum(tempx[ee<C])/sumw
tempy<-w*yv
tyb<-sum(tempy[ee<C])/sumw
tempxy<-w*(xv-txb)*(yv-tyb)
tempx<-w*(xv-txb)^2
tempy<-w*(yv-tyb)^2
sxb<-sum((tempx[ee<C]))/sumw
syb<-sum((tempy[ee<C]))/sumw
rb<-sum(tempxy[ee<C])/(sqrt(sxb*syb)*sumw)
z1<-((xv-txb)/sqrt(sxb)+(yv-tyb)/sqrt(syb))/sqrt(2*(1+rb))
z2<-((xv-txb)/sqrt(sxb)-(yv-tyb)/sqrt(syb))/sqrt(2*(1-rb))
wo<-w
ee<-z1^2+z2^2
w<-(1-ee/C)^2
sumw<-sum(w[ee<C])
tempx<-w*xv
txb<-sum(tempx[ee<C])/sumw
tempy<-w*yv
tyb<-sum(tempy[ee<C])/sumw
tempxy<-w*(xv-txb)*(yv-tyb)
tempx<-w*(xv-txb)^2
tempy<-w*(yv-tyb)^2
sxb<-sum((tempx[ee<C]))/sumw
syb<-sum((tempy[ee<C]))/sumw
rb<-sum(tempxy[ee<C])/(sqrt(sxb*syb)*sumw)
z1<-((xv-txb)/sqrt(sxb)+(yv-tyb)/sqrt(syb))/sqrt(2*(1+rb))
z2<-((xv-txb)/sqrt(sxb)-(yv-tyb)/sqrt(syb))/sqrt(2*(1-rb))
iter<-0
while(iter<=10){
iter<=iter+1
ee<-z1^2+z2^2
w<-(1-ee/C)^2
sumw<-sum(w[ee<C])
tempx<-w*xv
txb<-sum(tempx[ee<C])/sumw
tempy<-w*yv
tyb<-sum(tempy[ee<C])/sumw
tempxy<-w*(xv-txb)*(yv-tyb)
tempx<-w*(xv-txb)^2
tempy<-w*(yv-tyb)^2
sxb<-sum((tempx[ee<C]))/sumw
syb<-sum((tempy[ee<C]))/sumw
rb<-sum(tempxy[ee<C])/(sqrt(sxb*syb)*sumw)
z1<-((xv-txb)/sqrt(sxb)+(yv-tyb)/sqrt(syb))/sqrt(2*(1+rb))
z2<-((xv-txb)/sqrt(sxb)-(yv-tyb)/sqrt(syb))/sqrt(2*(1-rb))
wo<-w
ee<-z1^2+z2^2
w<-(1-ee/C)^2
dif<-w-wo
crit<-sum(dif^2)/(mean(w))^2
if(crit <=epsilon)break
}
if(plotit){
em<-median(sqrt(ee))
r1<-em*sqrt((1+rb)/2)
r2<-em*sqrt((1-rb)/2)
temp<-c(0:179)
thet<-2*3.141593*temp/180
theta1<-r1*cos(thet)
theta2<-r2*sin(thet)
xplot1<-txb+(theta1+theta2)*sqrt(sxb)
yplot1<-tyb+(theta1-theta2)*sqrt(syb)
emax<-max(sqrt(ee[ee<7*em^2]))
r1<-emax*sqrt((1+rb)/2)
r2<-emax*sqrt((1-rb)/2)
theta1<-r1*cos(thet)
theta2<-r2*sin(thet)
xplot<-txb+(theta1+theta2)*sqrt(sxb)
yplot<-tyb+(theta1-theta2)*sqrt(syb)
totx<-c(xv,xplot,xplot1)
toty<-c(yv,yplot,yplot1)
plot(totx,toty,type="n",xlab=xlab,ylab=ylab)
points(xv,yv)
points(xplot,yplot,pch=".")
points(xplot1,yplot1,pch=".")
}
list(mest=c(txb,tyb),mvar=c(sxb,syb),mrho=rb)
}

bicov<-function(x,y){
#
# compute biweight midcovariance of x and y
#
mx<-median(x)
my<-median(y)
ux<-abs((x-mx)/(9*qnorm(.75)*mad(x)))
uy<-abs((y-my)/(9*qnorm(.75)*mad(y)))
aval<-ifelse(ux<=1,1,0)
bval<-ifelse(uy<=1,1,0)
top<-sum(aval*(x-mx)*(1-ux^2)^2*bval*(y-my)*(1-uy^2)^2)
top<-length(x)*top
botx<-sum(aval*(1-ux^2)*(1-5*ux^2))
boty<-sum(bval*(1-uy^2)*(1-5*uy^2))
bi<-top/(botx*boty)
bi
}

bireg<-function(x,y,iter=20,bend=1.28){
#
# Compute a biweight midregression equation
# The predictors are assumed to be stored in the n by p matrix x.
#
x<-as.matrix(x)
ma<-matrix(0,ncol(x),1)
m<-matrix(0,ncol(x),ncol(x))
mvals<-apply(x,2,mest,bend)
for (i in 1:ncol(x)){
ma[i,1]<-bicov(x[,i],y)
for (j in 1:ncol(x))m[i,j]<-bicov(x[,i],x[,j])
}
slope<-solve(m,ma)
b0<-mest(y,bend)-sum(slope%*%mvals)
for(it in 1:iter){
res<-y-x%*%slope-b0
for (i in 1:ncol(x))ma[i,1]<-bicov(x[,i],res)
slopeadd<-solve(m,ma)
b0add<-mest(res,bend)-sum(slopeadd%*%mvals)
if(max(abs(slopeadd),abs(b0add)) <.0001)break
slope<-slope+slopeadd
b0<-b0+b0add
}
if(max(abs(slopeadd),abs(b0add)) >=.0001)
paste("failed to converge in",iter,"iterations")
list(coef=c(b0,slope),residuals=res)
}

chreg<-function(x,y,bend=1.345,SEED=TRUE,xout=FALSE,outfun=out,pr=TRUE,...){
#
# Compute Coakley Hettmansperger robust regression estimators
# JASA, 1993, 88, 872-880
#
# x is a n by p matrix containing the predictor values.
#
# No missing values are allowed
#
#  Comments in this function follow the notation used
#  by Coakley and Hettmansperger
#
library(MASS)
# with old version of R, need library(lqs) when using ltsreg
# as the initial estimate.
#
if(pr)print('If using chreg with a bootstrap method, use chregF instead')
if(SEED)set.seed(12) # Set seed so that results are always duplicated.
x<-as.matrix(x)
p<-ncol(x)
m<-elimna(cbind(x,y))
x<-m[,1:p]
p1<-p+1
y<-m[,p1]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
x<-as.matrix(x)
cutoff<-bend
mve<-vector("list")
if(ncol(x)==1){
mve$center<-median(x)
mve$cov<-mad(x)^2
}
if(ncol(x)>=2)mve<-cov.mve(x)  # compute minimum volume ellipsoid measures of
                 # location and scale and store in mve.
reg0<-ltsgreg(x,y) # compute initial regression est using least trimmed
                 # squares.
# Next, compute the rob-md2(i) values and store in rob
rob<-1  # Initialize vector rob
mx<-mve$center
rob<-mahalanobis(x,mx,mve$cov)
k21<-qchisq(.95,p)
c62<-k21/rob
vecone<-c(rep(1,length(y))) # Initialize vector vecone to 1
c30<-pmin(vecone,c62)  # mallows weights put in c30
k81<-median(abs(reg0$residuals)) # median of absolute residuals
k72<-1.4826*(1+(5/(length(y)-p-1)))*k81 # lms scale
c60<-reg0$residuals/(k72*c30) # standardized residuals
#  compute psi and store in c27
cvec<-c(rep(cutoff,length(y))) # Initialize vector cvec to cutoff
c27<-pmin(cvec,c60)
c27<-pmax(-1*cutoff,c27)  #c27 contains psi values
#
# compute B matrix and put in c66.
#  Also, transform B so that i th diag elem = 0 if c27[i] is
#  between -cutoff and cutoff, 1 otherwise.
#
c66<-ifelse(abs(c27)<=bend,1,0) # Have derivative of psi in c66
m1<-cbind(1,x)  # X matrix with col of 1's added
m2<-t(m1)   #X transpose
m5<-diag(c30) # matrix W, diagonal contains weights
m4<-diag(c66) # B matrix
m6<-m4%*%m1   # BX
m7<-m2%*%m6   # X'BX (nD=X'BX)
m8<-solve(m7)  #m8 = (X'-B-X)inverse
m9<-m8%*%m2 #m9=X prime-B-X inverse X'
m9<-m9%*%m5 # m9=X prime-B-X inverse X'W
m10<-m9%*%c27
c20<-m10*k72
c21<-reg0$coef+c20 #update initial estimate of parameters.
res<-y-m1%*%c21
list(coef=t(c21),residuals=res)
}

regboot<-function(isub,x,y,regfun,...){
#
#  Perform regression using x[isub] to predict y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  regfun is some regression method already stored in R
#  It is assumed that regfun$coef contains the  intercept and slope
#  estimates produced by regfun.  The regression methods written for
#  this  book, plus regression functions in R, have this property.
#
#  x is assumed to be a matrix containing values of the predictors.
#
xmat<-matrix(x[isub,],nrow(x),ncol(x))
vals<-regfun(xmat,y[isub],...)$coef
vals
}


bmreg<-function(x,y,iter=20,bend=2*sqrt((ncol(x)+1)/nrow(x))){
# compute a bounded M regression using Huber Psi and Schweppe weights.
# The predictors are assumed to be stored in the n by p matrix x.
#
x<-as.matrix(x)
init<-lsfit(x,y)
resid<-init$residuals
x1<-cbind(1,x)
nu<-sqrt(1-hat(x1))
low<-ncol(x)+1
for(it in 1:iter){
ev<-sort(abs(resid))
scale<-median(ev[c(low:length(y))])/qnorm(.75)
rov<-(resid/scale)/nu
psi<-ifelse(abs(rov)<=bend,rov,bend*sign(rov))  # Huber Psi
wt<-nu*psi/(resid/scale)
new<-lsfit(x,y,wt)
if(max(abs(new$coef-init$coef))<.0001)break
init$coef<-new$coef
resid<-new$residuals
}
resid<-y-x1%*%new$coef
if(max(abs(new$coef-init$coef))>=.0001)
paste("failed to converge in",iter,"steps")
list(coef=new$coef,residuals=resid,w=wt)
}


reglev<-function(x,y,plotit=TRUE,SEED=TRUE){
#
#  Search for good and bad leverage points using the
#  Rousseuw and van Zomeren method.
#
#  x is an n by p matrix
#
#  The function returns the number of the rows in x that are identified
#  as outliers. (The row numbers are stored in outliers.)
#  It also returns the distance of the points identified as outliers
#  in the variable dis.
#
library(MASS)
plotit<-as.logical(plotit)
if(SEED)set.seed(12)
x<-as.matrix(x)
res<-lmsreg(x,y)$resid
sighat<-sqrt(median(res^2))
sighat<-1.4826*(1+(5/(length(y)-ncol(x)-1)))*sighat
stanres<-res/sighat
set.seed(12)
if(ncol(x)>=2)mve<-cov.mve(x)
if(ncol(x)==1){
mve<-vector("list")
mve$center<-median(x)
mve$cov<-mad(x)^2
}
dis<-mahalanobis(x,mve$center,mve$cov)
dis<-sqrt(dis)
crit<-sqrt(qchisq(.975,ncol(x)))
chk<-ifelse(dis>crit,1,0)
vec<-c(1:nrow(x))
id<-vec[chk==1]
chkreg<-ifelse(abs(stanres)>2.5,1,0)
idreg<-vec[chkreg==1]
if(plotit){
plot(dis,stanres,xlab="Robust distances",ylab="standardized residuals")
abline(-2.5,0)
abline(2.5,0)
abline(v=crit)
}
list(levpoints=id,regout=idreg,dis=dis,stanres=stanres,crit=crit)
}

winreg<-function(x,y,iter=20,tr=.2){
#
# Compute a Winsorized regression estimator
# The predictors are assumed to be stored in the n by p matrix x.
#
x<-as.matrix(x)
ma<-matrix(0,ncol(x),1)
m<-matrix(0,ncol(x),ncol(x))
mvals<-apply(x,2,win,tr)
for (i in 1:ncol(x)){
ma[i,1]<-wincor(x[,i],y,tr=tr)$cov
for (j in 1:ncol(x))m[i,j]<-wincor(x[,i],x[,j],tr=tr)$cov
}
slope<-solve(m,ma)
b0<-win(y,tr)-sum(slope%*%mvals)
for(it in 1:iter){
res<-y-x%*%slope-b0
for (i in 1:ncol(x))ma[i,1]<-wincor(x[,i],res,tr=tr)$cov
slopeadd<-solve(m,ma)
b0add<-win(res,tr)-sum(slopeadd%*%mvals)
if(max(abs(slopeadd),abs(b0add)) <.0001)break
slope<-slope+slopeadd
b0<-b0+b0add
}
if(max(abs(slopeadd),abs(b0add)) >=.0001)
paste("failed to converge in",iter,"iterations")
list(coef=c(b0,slope),resid=res)
}


anctgen<-function(x1,y1,x2,y2,pts,fr1=1,fr2=1,tr=.2){
#
# Compare two independent  groups using the ancova method
# in chapter 9. No assumption is made about the form of the regression
# lines--a running interval smoother is used.
#
#  Assume data are in x1 y1 x2 and y2
#  Comparisons are made at the design points contained in the vector
#  pts
#
#  Comparisons can be made using at most 28 design points, otherwise
#  a critical value for controlling the experimentwise type I error cannot
#  be computed.
#
if(length(pts)>=29)stop("At most 28 points can be compared")
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),8)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi"))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test<-yuen(g1,g2,tr=tr)
mat[i,1]<-pts[i]
mat[i,2]<-length(g1)
mat[i,3]<-length(g2)
mat[i,4]<-test$dif
mat[i,5]<-test$teststat
mat[i,6]<-test$se
if(length(pts)>=2)critv<-smmcrit(test$df,length(pts))
if(length(pts)==1)critv<-qt(.975,test$df)
cilow<-test$dif-critv*test$se
cihi<-test$dif+critv*test$se
mat[i,7]<-cilow
mat[i,8]<-cihi
}
list(output=mat,crit=critv)
}

near<-function(x,pt,fr=1){
# determine which values in x are near pt
# based on fr * mad
m<-mad(x)
if(m==0){
temp<-idealf(x)
m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25))
}
if(m==0)m<-sqrt(winvar(x)/.4129)
if(m==0)stop("All measures of dispersion are equal to 0")
dis<-abs(x-pt)
dflag<-dis <= fr*m
dflag
}

regpres1<-function(isub,x,y,regfun,mval){
#
#  Perform regression using x[isub] to predict y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  regfun is some regression method already stored in R
#  It is assumed that regfun$coef contains the  intercept and slope
#  estimates produced by regfun.  The regression methods written for
#  this  book, plus regression functions in R, have this property.
#
#  x is assumed to be a matrix containing values of the predictors.
#
xmat<-matrix(x[isub,],mval,ncol(x))
regboot<-regfun(xmat,y[isub])
regboot<-regboot$coef
regboot
}

runhat<-function(x,y,pts=x,est=onestep,fr=1,nmin=1,...){
#
# running  interval smoother that can  be used  with any measure
# of location or scale. By default, a modified one-step M-estimator is used.
# This function computes an estimate of y for each x value stored in pts
#
# fr controls amount of smoothing
rmd<-rep(NA,length(pts))
for(i in 1:length(pts)){
val<-y[near(x,pts[i],fr)]
if(length(val)>=nmin)rmd[i]<-est(val,...)
}
rmd
}

sqfun<-function(y){
#
sqfun<-sum(y^2)
sqfun
}

absfun<-function(y){
absfun<-sum(abs(y))
absfun
}

ancbootg<-function(x1,y1,x2,y2,pts,fr1=1,fr2=1,tr=.2,nboot=599){
#
# Compare two independent  groups using the ancova method
# in chapter 9. No assumption is made about the form of the regression
# lines--a running interval smoother is used.
#
#  Assume data are in x1 y1 x2 and y2
#  Comparisons are made at the design points contained in the vector
#  pts
#
m1=elimna(cbind(x1,y1))
x1=m1[,1]
y1=m1[,2]
m1=elimna(cbind(x2,y2))
x2=m1[,1]
y2=m1[,2]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),8)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi"))
gv<-vector("list",2*length(pts))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
j<-i+length(pts)
gv[[i]]<-g1
gv[[j]]<-g2
}
I1<-diag(length(pts))
I2<-0-I1
con<-rbind(I1,I2)
test<-linconb(gv,con=con,tr=tr,nboot=nboot)
mat[,1]<-pts
mat[,2]<-n1
mat[,3]<-n2
mat[,4]<-test$psihat[,2]
mat[,5]<-test$test[,2]
mat[,6]<-test$test[,3]
mat[,7]<-test$psihat[,3]
mat[,8]<-test$psihat[,4]
list(output=mat,crit=test$crit)
}

errfun<-function(yhat,y,error=sqfun){
#
#   Compute error terms for regpre
#
#    yhat is an n by nboot matrix
#    y is n by 1.
#
ymat<-matrix(y,nrow(yhat),ncol(yhat))
blob<-yhat-ymat
errfun<-error(blob)
errfun
}

near3d<-function(x,pt,fr=.8,m){
# determine which values in x are near pt
# based on fr * cov.mve
#
# x is assumed to be an n by p matrix
# pt is a vector of length p (a point in p-space).
# m is cov.mve(x) computed by runm3d
#
library(MASS)
if(!is.matrix(x))stop("Data are not stored in a matrix.")
dis<-sqrt(mahalanobis(x,pt,m$cov))
dflag<-dis < fr
dflag
}

run3hat<-function(x,y,pts,fr=.8,tr=.2){
#
# Compute y hat for each row of data in the matrix pts
# using a running  interval method
#
# fr controls amount of smoothing
# tr is the amount of trimming
# x is an n by p matrix of predictors.
# pts is an m by p matrix, m>=1.
#
library(MASS)
set.seed(12)
if(!is.matrix(x))stop("Predictors are not stored in a matrix.")
if(!is.matrix(pts))stop("The third argument, pts, must be a matrix.")
m<-cov.mcd(x)
rmd<-1 # Initialize rmd
nval<-1
for(i in 1:nrow(pts)){
rmd[i]<-mean(y[near3d(x,pts[i,],fr,m)],tr)
nval[i]<-length(y[near3d(x,pts[i,],fr,m)])
}
list(rmd=rmd,nval=nval)
}


idb<-function(x,n){
#
#  Determine whether a  sequence of integers contains a 1, 2, ..., n.
#  Return idb[i]=1 if the value i is in x; 0 otherwise.
#  This function is used by regpre
#
m1<-matrix(0,n,n)
m1<-outer(c(1:n),x,"-")
m1<-ifelse(m1==0,1,0)
idb<-apply(m1,1,sum)
idb<-ifelse(idb>=1,0,1)
idb
}

hratio<-function(x,y,regfun=bmreg){
#
#   Compute a p by p matrix of half-slope ratios
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimate of
#   the first predictor, etc.
#
#  OUTPUT:
#The first row reports the half-slope
#ratios when the data are divided into two groups using the first predictor.
#The first column is the half-slope ratio for the first predictor, the
#second column is the half-slope ratio for the second predictor, and so forth.
#The second row contains the half-slope ratios when the data are divided
#into two groups using the second predictor, and so on.
#
x<-as.matrix(x)
xmat<-matrix(0,nrow(x),ncol(x))
mval<-floor(length(y)/2)
mr<-length(y)-mval
xmatl<-matrix(0,mval,ncol(x))
xmatr<-matrix(0,mr,ncol(x))
hmat<-matrix(NA,ncol(x),ncol(x))
isub<-c(1:length(y))
ksub<-c(1:ncol(x))+1
for (k in 1:ncol(x)){
xord<-order(x[,k])
yord<-y[xord]
yl<-yord[isub<=mval]
yr<-yord[isub>mval]
for (j in 1:ncol(x)){
xmat[,j]<-x[xord,j]
xmatl[,j]<-xmat[isub<=mval,j]
xmatr[,j]<-xmat[isub>mval,j]
}
coefl<-regfun(xmatl,yl)$coef
coefr<-regfun(xmatr,yr)$coef
hmat[k,]<-coefr[ksub[ksub>=2]]/coefl[ksub[ksub>=2]]
}
hmat
}



rung3d<-function(x,y,est=onestep,fr=1,plotit=TRUE,theta=50,phi=25,pyhat=FALSE,LP=FALSE,
expand=.5,scale=FALSE,zscale=TRUE,nmin=0,xout=FALSE,eout=FALSE,outfun=out,SEED=TRUE,STAND=FALSE,
xlab="X",ylab="Y",zlab="",pr=TRUE,duplicate="error",ticktype="simple",...){
#
# running mean using interval method
#

# fr (the span) controls amount of smoothing
# est is the measure of location.
# (Goal is to determine est(y) given x.)
# x is an n by p matrix of predictors.
#
# pyhat=T, predicted values are returned.
#
library(MASS)
library(akima)
if(SEED)set.seed(12) # set seed for cov.mve
if(eout && xout)stop("Not allowed to have eout=xout=T")
if(!is.matrix(x))stop("Data are not stored in a matrix.")
if(nrow(x) != length(y))stop("Number of rows in x does not match length of y")
temp<-cbind(x,y)
p<-ncol(x)
p1<-p+1
temp<-elimna(temp) # Eliminate any rows with missing values.
if(eout){
keepit<-outfun(temp,plotit=FALSE)$keep
x<-x[keepit,]
y<-y[keepit]
}
if(xout){
keepit<-outfun(x,plotit=FALSE,STAND=STAND,...)$keep
x<-x[keepit,]
y<-y[keepit]
}
if(zscale){
for(j in 1:p1){
temp[,j]<-(temp[,j]-median(temp[,j]))/mad(temp[,j])
}}
x<-temp[,1:p]
y<-temp[,p1]
m<-cov.mve(x)
iout<-c(1:nrow(x))
rmd<-1 # Initialize rmd
nval<-1
for(i in 1:nrow(x))rmd[i]<-est(y[near3d(x,x[i,],fr,m)],...)
for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)])
if(ncol(x)==2){
if(plotit){
if(pr){
if(!scale)print("With dependence, suggest using scale=T")
}
fitr<-rmd[nval>nmin]
y<-y[nval>nmin]
x<-x[nval>nmin,]
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
if(LP)fitr=lplot(x[iout>=1,],fitr,pyhat=TRUE,pr=FALSE)$yhat
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fit,theta=theta,phi=phi,expand=expand,
scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype)
}}
if(pyhat)last<-rmd
if(!pyhat)last <- "Done"
        last
}

mbmreg<-function(x,y,iter=20,bend=2*sqrt(ncol(x)+1)/nrow(x)){
#
# Compute a bounded M regression estimator using
# Huber Psi and Schweppe weights with
# regression outliers getting a weight of zero.
#
# This is the modified M-regression estimator in Chapter 8
#
# The predictors are assumed to be stored in the n by p matrix x.
#
x<-as.matrix(x)
if(is.matrix(y)){
if(ncol(y)==1)y=as.vector(y)
}
x1<-cbind(1,x)
library(MASS)
reslms<-lmsreg(x,y)$resid
sighat<-sqrt(median(reslms^2))
sighat<-1.4826*(1+(5/(length(y)-ncol(x)-1)))*sighat
if(sighat==0)warning("The estimated measure of scale, based on the residuals using lms regression, is zero")
temp<-ifelse(sighat*reslms>0,abs(reslms)/sighat,0*reslms)
wt<-ifelse(temp<=2.5,1,0)
init<-lsfit(x,y,wt)
resid<-init$residuals
nu<-sqrt(1-hat(x1))
low<-ncol(x)+1
for(it in 1:iter){
ev<-sort(abs(resid))
scale<-median(ev[c(low:length(y))])/qnorm(.75)
rov<-(resid/scale)/nu
psi<-ifelse(abs(rov)<=bend,rov,bend*sign(rov))  # Huber Psi
wt<-nu*psi/(resid/scale)
wt<-ifelse(temp<=2.5,wt,0)
new<-lsfit(x,y,wt)
if(abs(max(new$coef-init$coef)<.0001))break
init$coef<-new$coef
resid<-new$residuals
}
resid<-y-x1%*%new$coef
if(abs(max(new$coef-init$coef)>=.0001))
paste("failed to converge in",iter,"steps")
list(coef=new$coef,residuals=resid,w=wt)
}

rankisub<-function(x,y){
#
#  compute phat and an estimate of its variance
#
x<-x[!is.na(x)]  # Remove missing values from x
y<-y[!is.na(y)]  # Remove missing values from y
u<-outer(x,y,FUN="<")
p1<-0
p2<-0
for (j in 1:length(y)){
temp<-outer(u[,j],u[,j])
p1<-p1+sum(temp)-sum(u[,j]*u[,j])
}
for (i in 1: length(x)){
temp<-outer(u[i,],u[i,])
p2<-p2+sum(temp)-sum(u[i,]*u[i,])
}
p<-sum(u)/(length(x)*length(y))
pad<-p
if(p==0)pad<-.5/(length(x)*length(y))
if(p==1)pad<-(1-.5)/(length(x)*length(y))
p1<-p1/(length(x)*length(y)*(length(x)-1))
p2<-p2/(length(x)*length(y)*(length(y)-1))
var<-pad*(1.-pad)*(((length(x)-1)*(p1-p^2)/(pad*(1-pad))+1)/(1-1/length(y))+
((length(y)-1)*(p2-p^2)/(pad*(1-pad))+1)/(1-1/length(x)))
var<-var/(length(x)*length(y))
list(phat=p,sqse=var)
}

pbcor<-function(x,y,beta=.2){
#   Compute the percentage bend correlation between x and y.
#
#   beta is the bending constant for omega sub N.
#
if(length(x)!=length(y))stop("The vectors do not have equal lengths")
m1=cbind(x,y)
m1<-elimna(m1)
nval=nrow(m1)
x<-m1[,1]
y<-m1[,2]
#  Have eliminated missing values
temp<-sort(abs(x-median(x)))
omhatx<-temp[floor((1-beta)*length(x))]
temp<-sort(abs(y-median(y)))
omhaty<-temp[floor((1-beta)*length(y))]
a<-(x-pbos(x,beta))/omhatx
b<-(y-pbos(y,beta))/omhaty
a<-ifelse(a<=-1,-1,a)
a<-ifelse(a>=1,1,a)
b<-ifelse(b<=-1,-1,b)
b<-ifelse(b>=1,1,b)
pbcor<-sum(a*b)/sqrt(sum(a^2)*sum(b^2))
test<-pbcor*sqrt((length(x) - 2)/(1 - pbcor^2))
sig<-2*(1 - pt(abs(test),length(x)-2))
list(cor=pbcor,test=test,siglevel=sig,n=nval)
}

rmanovab<-function(x,tr=.2,alpha=.05,grp=0,nboot=599){
#
#   A bootstrap-t for comparing the trimmed means of dependent groups.
#   By default, 20% trimming is used with B=599 bootstrap samples.
#
#   The optional argument grp is used to select a subset of the groups
#   and exclude the rest.
#
#   x can be an n by J matrix or it can have list mode
#
if(is.data.frame(x))x=as.matrix(x)
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x))mat=matl(x)
#{
#if(sum(grp)==0)grp<-c(1:length(x))
# put the data in an n by J matrix
#mat<-matrix(0,length(x[[1]]),length(grp))
#for (j in 1:length(grp))mat[,j]<-x[[grp[j]]]
#}
if(is.matrix(x)){
if(sum(grp)==0)grp<-c(1:ncol(x))
mat<-x[,grp]
}
mat=elimna(mat)
#if(sum(is.na(mat)>=1))stop("Missing values are not allowed.")
J<-ncol(mat)
connum<-(J^2-J)/2
bvec<-matrix(0,connum,nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(nrow(mat),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot)
xcen<-matrix(0,nrow(mat),ncol(mat))
for (j in 1:J)xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data
bvec<-apply(data,1,tsubrmanovab,xcen,tr)
# bvec is vector of nboot  bootstrap test statistics.
icrit<-round((1-alpha)*nboot)
bvec<-sort(bvec)
crit<-bvec[icrit]
test<-rmanova(mat,tr,grp)$test
list(teststat=test,crit=crit)
}


tsubrmanovab<-function(isub,x,tr){
#
#  Compute test statistic for trimmed means
#  when comparing dependent groups.
#  By default, 20% trimmed means are used.
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by rmanovab
#
tsub<-rmanovab1(x[isub,],tr=tr)$test
tsub
}





rmanovab1<-function(x,tr=.2,grp=c(1:length(x))){
#
#  A heteroscedastic one-way repeated measures ANOVA for trimmed means.
#
#  The data are assumed to be stored in $x$ which can
#  be either an n by J matrix, or an R variable having list mode.
#  If the data are stored in list mode,
#  length(x) is assumed to correspond to the total number of groups.
#  By default, the null hypothesis is that all group have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
J<-length(grp)  # The number of groups to be compared
m1<-matrix(x[[grp[1]]],length(x[[grp[1]]]),1)
for(i in 2:J){     # Put the data into an n by J matrix
m2<-matrix(x[[grp[i]]],length(x[[i]]),1)
m1<-cbind(m1,m2)
}
}
if(is.matrix(x)){
if(length(grp)<ncol(x))m1<-as.matrix(x[,grp])
if(length(grp)>=ncol(x))m1<-as.matrix(x)
J<-ncol(x)
}
#
#  Raw data are now in the matrix m1
#
m2<-matrix(0,nrow(m1),ncol(m1))
xvec<-1
g<-floor(tr*nrow(m1))  #2g is the number of observations trimmed.
for(j in 1:ncol(m1)){  # Putting Winsorized values in m2
m2[,j]<-winval(m1[,j],tr)
xvec[j]<-mean(m1[,j],tr)
}
xbar<-mean(xvec)
qc<-(nrow(m1)-2*g)*sum((xvec-xbar)^2)
m3<-matrix(0,nrow(m1),ncol(m1))
m3<-sweep(m2,1,apply(m2,1,mean))  # Sweep out rows
m3<-sweep(m3,2,apply(m2,2,mean))  # Sweep out columns
m3<-m3+mean(m2)  # Grand Winsorized mean swept in
qe<-sum(m3^2)
test<-(qc/(qe/(nrow(m1)-2*g-1)))
#
#  Next, estimate the adjusted degrees of freedom
#
v<-winall(m1)$cov
vbar<-mean(v)
vbard<-mean(diag(v))
vbarj<-1
for(j in 1:J){
vbarj[j]<-mean(v[j,])
}
A<-J*J*(vbard-vbar)^2/(J-1)
B<-sum(v*v)-2*J*sum(vbarj^2)+J*J*vbar^2
ehat<-A/B
etil<-(nrow(m2)*(J-1)*ehat-2)/((J-1)*(nrow(m2)-1-(J-1)*ehat))
etil<-min(1.,etil)
df1<-(J-1)*etil
df2<-(J-1)*etil*(nrow(m2)-2*g-1)
siglevel<-1-pf(test,df1,df2)
list(test=test,df=c(df1,df2),siglevel=siglevel,tmeans=xvec,ehat=ehat,etil=etil)
}




mee<-function(x,y,alpha=.05){
#
#  For two independent groups, compute a 1-\alpha confidence interval
#  for p=P(X<Y) using Mee's method, which assumes there are no ties.
#  If ties are detected among the pooled observations, a warning message is
#  printed. The type I error probability might exceed the nominal level by
#  an unacceptable amount. Also, mee(x,y) might give  different  results than
#  mee(y,x). (One might reject and the other might not.)
#
x<-x[!is.na(x)]  # Remove missing values from x
y<-y[!is.na(y)]  # Remove missing values from y
xy<-c(x,y)
tiexy<-sum(abs(c(1:length(xy))-sort(rank(xy))))
if(tiexy > 0){print("Warning: Tied values detected")
print("so even if distributions are identical,")
print("P(X<Y) is not necessarily equal to .5")
}
u<-outer(x,y,FUN="<")
p1<-0
p2<-0
for (j in 1:length(y)){
temp<-outer(u[,j],u[,j])
p1<-p1+sum(temp)-sum(u[,j]*u[,j])
}
for (i in 1: length(x)){
temp<-outer(u[i,],u[i,])
p2<-p2+sum(temp)-sum(u[i,]*u[i,])
}
p<-sum(u)/(length(x)*length(y))
p1<-p1/(length(x)*length(y)*(length(x)-1))
p2<-p2/(length(x)*length(y)*(length(y)-1))
b1<-(p1-p^2)/(p-p^2)
b2<-(p2-p^2)/(p-p^2)
A<-((length(x)-1)*b1+1)/(1-1/length(y))+((length(y)-1)*b2+1)/(1-1/length(x))
nhat<-length(x)*length(y)/A
crit<-(qnorm(1-alpha/2))^2/nhat
D<-sqrt(crit*(p*(1-p)+.25*crit))
low<-(p+.5*crit-D)/(1+crit)
hi<-(p+.5*crit+D)/(1+crit)
list(phat=p,ci=c(low,hi))
}

ranki<-function(J,K,x,grp=c(1:p),alpha=.05,p=J*K){
#
#  Compute a confidence interval for all interaction terms
#  in J by K (two-way) anova using the modified Patel-Hoel method.
#
#  This method is not recommended if there are tied observations among the
#  pooled data.
#
#  All JK groups are independent.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode.
#  If grp is unspecified, it is assumed x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#  x[[j+1]] is the data for level 2,1, etc.
#  If the data are in wrong order, grp can be used to rearrange the
#  groups. For example, for a two by two design, grp<-c(2,4,3,1)
#  indicates that the second group corresponds to level 1,1;
#  group 4 corresponds to level 1,2; group 3 is level 2,1;
#  and group 1 is level 2,2.
#
#  It is assumed that the input variable x has length JK, the total number of
#  groups being tested. If not, a warning message is printed.
#
#  The estimated standard error is based on Sen's Jackknife as used by
#  Mee (1990).
#
if(!is.list(x))stop("Data are not stored in list mode")
if(p!=length(x)){
print("Warning: The number of groups in your data is not equal to JK")
}
jtk<-J*K
tl<-0
com<-x[[1]]
for(i in 1:jtk)tl<-tl+length(x[[i]])
for(i in 2:jtk)com<-c(com,x[[i]])
tiex<-sum(abs(c(1:tl)-sort(rank(com))))
if(tiex > 0)
print("Tied values detected. Interchanging columns might give different results. That is, comparing rows based on P(X<Y) is not necessarily the same as comparing rows based on P(X>Y)")
ck<-(K^2-K)/2
cj<-(J^2-J)/2
tc<-ck*cj
if(tc>28){
print("Warning: The number of contrasts exceeds 28.")
print("The critical value being used is based on 28 contrasts")
tc<-28
}
idmat<-matrix(NA,nrow=tc,ncol=8)
dimnames(idmat)<-list(NULL,c("row","row","col","col","ci.lower","ci.upper","estimate","test.stat"))
crit<-smmcrit(300,tc)
if(alpha != .05){
crit<-smmcrit01(300,tc)
if(alpha != .01){print("Warning: Only alpha = .05 and .01 are allowed,")
print("alpha = .01 is being assumed.")
}
}
phatsqse<-0
phat<-0
allit<-0
jcount<-0-K
it<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
for(k in 1:K){
for(kk in 1:K){
if(k < kk){
it<-it+1
idmat[it,1:4]<-c(j,jj,k,kk)
}}}}}
jcount<-jcount+K
for(k in 1:K){
for(kk in 1:K){
if(k < kk){
allit<-allit+1
xx<-x[[grp[k+jcount]]]
yy<-x[[grp[kk+jcount]]]
temp<-rankisub(xx,yy)
phat[allit]<-temp$phat
phatsqse[allit]<-temp$sqse
}}}}
#
# Compute the contrast matrix. Each row contains a 1, -1 and the rest 0
#  That is, all pairwise comparisons among K groups.
#
con<-matrix(0,cj,J)
id<-0
Jm<-J-1
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[id,j]<-1
con[id,k]<-0-1
}}
IK<-diag(ck)
B<-kron(con,IK)
ntest<-ck*(J^2-J)/2
test<-0
civecl<-0
civecu<-0
for (itest in 1:ntest){
temp1<-sum(B[itest,]*phat)
idmat[itest,7]<-temp1
idmat[itest,8]<-temp1/sqrt(sum(B[itest,]^2*phatsqse))
idmat[itest,5]<-temp1-crit*sqrt(sum(B[itest,]^2*phatsqse))
idmat[itest,6]<-temp1+crit*sqrt(sum(B[itest,]^2*phatsqse))
}
nsig<-sum((abs(idmat[,8])>crit))
list(phat=phat,ci=idmat,crit=crit,nsig=nsig)
}


regts1<-function(vstar,yhat,res,mflag,x,tr){
ystar<-yhat+res*vstar
bres<-ystar-mean(ystar,tr)
rval<-0
for (i in 1:nrow(x)){
rval[i]<-sum(bres[mflag[,i]])
}
rval
}

bptd<-function(x,tr=.2,alpha=.05,con=0,nboot=599){
#
#   Using the percentile t bootstrap method,
#   compute a .95 confidence interval for all linear contasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#
#   If con is not specified, all pairwise comparisons are performed.
#
#   The trimmed means of dependent groups are being compared.
#   By default, 20% trimming is used with B=599 bootstrap samples.
#
#   x can be an n by J matrix or it can have list mode
#
if(is.data.frame(x))x=as.matrix(x)
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
               }}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat=x
J<-ncol(mat)
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
if(is.matrix(x)){
if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
mat<-x
}
if(sum(is.na(mat)>=1))stop("Missing values are not allowed.")
J<-ncol(mat)
connum<-ncol(con)
bvec<-matrix(0,connum,nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# data is an nboot by n matrix
xcen<-matrix(0,nrow(mat),ncol(mat)) #An n by J matrix
xbars<-matrix(0,nboot,ncol(mat))
psihat<-matrix(0,connum,nboot)
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(nrow(xcen),size=nrow(mat)*nboot,replace=TRUE),nrow=nboot)
for (j in 1:J){
xcen[,j]<-mat[,j]-mean(mat[,j],tr) #Center data
xbars[,j]<-apply(data,1,bptdmean,xcen[,j],tr)
}
for (ic in 1:connum){
paste("Working on contrast number",ic)
bvec[ic,]<-apply(data,1,bptdsub,xcen,tr,con[,ic])
# bvec is a connum by nboot matrix containing the bootstrap sq standard error
psihat[ic,]<-apply(xbars,1,bptdpsi,con[,ic])
}
bvec<-psihat/sqrt(bvec)  #bvec now contains bootstrap test statistics
bvec<-abs(bvec)  #Doing two-sided confidence intervals
icrit<-round((1-alpha)*nboot)
critvec<-apply(bvec,2,max)
critvec<-sort(critvec)
crit<-critvec[icrit]
psihat<-matrix(0,connum,4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(NA,connum,3)
dimnames(test)<-list(NULL,c("con.num","test","se"))
isub<-c(1:nrow(mat))
tmeans<-apply(mat,2,mean,trim=tr)
sqse<-1
psi<-1
for (ic in 1:ncol(con)){
sqse[ic]<-bptdsub(isub,mat,tr,con[,ic])
psi[ic]<-sum(con[,ic]*tmeans)
psihat[ic,1]<-ic
psihat[ic,2]<-psi[ic]
psihat[ic,3]<-psi[ic]-crit*sqrt(sqse[ic])
psihat[ic,4]<-psi[ic]+crit*sqrt(sqse[ic])
test[ic,1]<-ic
test[ic,2]<-psi[ic]/sqrt(sqse[ic])
test[ic,3]<-sqrt(sqse[ic])
}
list(test=test,psihat=psihat,crit=crit,con=con)
}

twomanbt<-function(x,y,tr=.2,alpha=.05,nboot=599){
#
#   Two-sample Behrens-Fisher problem.
#
#   For each of two independent groups,
#   have p measures for each subject. The goal is to compare the
#   trimmed means of the first measure, the trimmed means for the second
#   and so on.   So there are a total of p comparisons between the two
#   groups, one for each measure.
#
#   The percentile t bootstrap method is used to
#   compute a .95 confidence interval.
#
#   By default, 20% trimming is used with B=599 bootstrap samples.
#
#   x contains the data for the first group; it
#    can be an n by J matrix or it can have list mode.
#   y contains the data for the second group.
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(!is.list(y) && !is.matrix(y))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
# put the data in an n by p matrix
matx<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))matx[,j]<-x[[j]]
}
if(is.list(y)){
# put the data in an n by p matrix
maty<-matrix(0,length(y[[1]]),length(y))
for (j in 1:length(y))maty[,j]<-y[[j]]
}
if(is.matrix(x)){
matx<-x
}
if(is.matrix(y)){
maty<-y
}
if(ncol(matx)!=ncol(maty))stop("The number of variables for group one is not equal to the number for group 2")
if(sum(is.na(mat)>=1))stop("Missing values are not allowed.")
J<-ncol(mat)
connum<-ncol(matx)
bvec<-matrix(0,connum,nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xcen<-matrix(0,nrow(matx),ncol(matx))
ycen<-matrix(0,nrow(maty),ncol(maty))
for (j in 1:connum)xcen[,j]<-matx[,j]-mean(matx[,j],tr) #Center data
for (j in 1:connum)ycen[,j]<-maty[,j]-mean(maty[,j],tr) #Center data
print("Taking bootstrap samples. Please wait.")
bootx<-sample(nrow(matx),size=nrow(matx)*nboot,replace=TRUE)
booty<-sample(nrow(maty),size=nrow(maty)*nboot,replace=TRUE)
matval<-matrix(0,nrow=nboot,ncol=connum)
for (j in 1:connum){
datax<-matrix(xcen[bootx,j],ncol=nrow(matx))
datay<-matrix(ycen[booty,j],ncol=nrow(maty))
paste("Working on variable", j)
top<- apply(datax, 1., mean, tr) - apply(datay, 1., mean, tr)
botx <- apply(datax, 1., trimse, tr)
boty <- apply(datay, 1., trimse, tr)
matval[,j]<-abs(top)/sqrt(botx^2. + boty^2.)
}
bvec<-apply(matval,1,max)
icrit<-round((1-alpha)*nboot)
bvec<-sort(bvec)
crit<-bvec[icrit]
psihat<-matrix(0,ncol=4,nrow=connum)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol=3,nrow=connum)
dimnames(test)<-list(NULL,c("con.num","test","se"))
for(j in 1:ncol(matx)){
temp<-yuen(matx[,j],maty[,j],tr=tr)
test[j,1]<-j
test[j,2]<-abs(temp$test)
test[j,3]<-temp$se
psihat[j,1]<-j
psihat[j,2]<-mean(matx[,j],tr)-mean(maty[,j])
psihat[j,3]<-mean(matx[,j],tr)-mean(maty[,j])-crit*temp$se
psihat[j,4]<-mean(matx[,j],tr)-mean(maty[,j])+crit*temp$se
}
list(psihat=psihat,teststat=test,critical.value=crit)
}



bootdep<-function(x,tr=.2,nboot=500){
#
# x is a matrix (n by p) or has list mode
# Goal: Obtain boostrap samples and compute
# the trimmed each for each of the p variables.
# Return the bootstrap means in a matrix
#
# tr is the amount of trimming
# nboot is the number of bootstrap samples
#
if(is.matrix(x))m1<-x
if(is.list(x)){
# put the data into a matrix
m1<-matrix(NA,ncol=length(x))
for(j in 1:length(x))m1[,j]<-x[[j]]
}
data<-matrix(sample(nrow(m1),size=nrow(m1)*nboot,replace=TRUE),nrow=nboot)
bvec<-matrix(NA,ncol=ncol(m1),nrow=nboot)
for(j in 1:ncol(m1)){
temp<-m1[,j]
bvec[,j]<-apply(data, 1., bootdepsub,temp,tr)
}
# return a nboot by p matrix of bootstrap trimmed means.
bvec
}

bootdepsub<-function(isub,x,tr){
tsub<-mean(x[isub],tr)
tsub
}
corb<-function(x,y,corfun=pbcor,nboot=599,SEED=TRUE,...){
#
#   Compute a .95 confidence interval for a correlation.
#   The default correlation is the percentage bend.
#
#   The function corfun is any R function that returns a
#   correlation coefficient in corfun$cor. The functions pbcor and
#   wincor follow this convention.
#
#   When using Pearson's correlation, and when n<250, use
#   lsfitci instead.
#
#   The default number of bootstrap samples is nboot=599
#
m1=cbind(x,y)
m1<-elimna(m1)  # Eliminate rows with missing values
nval=nrow(m1)
x<-m1[,1]
y<-m1[,2]
est<-corfun(x,y,...)$cor
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,corbsub,x,y,corfun,...) # A 1 by nboot matrix.
ihi<-floor(.975*nboot+.5)
ilow<-floor(.025*nboot+.5)
bsort<-sort(bvec)
corci<-1
corci[1]<-bsort[ilow]
corci[2]<-bsort[ihi]
phat <- sum(bvec < 0)/nboot
sig <- 2 * min(phat, 1 - phat)
list(cor.ci=corci,p.value=sig,cor.est=est)
}

corbsub<-function(isub,x,y,corfun,...){
#
#  Compute correlation for x[isub] and y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  corfun is some correlation function already stored in R
#
corbsub<-corfun(x[isub],y[isub],...)$cor
corbsub
}



depreg<-function(x,y,xout=FALSE,outfun=out,...){
#
# Compute the depth regression estimator.
# Only a single predictor is allowed in this version
#
if(is.matrix(x)){
if(ncol(x)>=2)stop("Only a single predicor is allowed")
x<-as.vector(x)
}
xy=cbind(x,y)
xy=elimna(xy)
if(xout){
flag<-outfun(xy[,1],plotit=FALSE,...)$keep
xy<-xy[flag,]
}
x=xy[,1]
y=xy[,2]
ord<-order(x)
xs<-x[ord]
ys<-y[ord]
vec1<-outer(ys,ys,"-")
vec2<-outer(xs,xs,"-")
v1<-vec1[vec2>0]
v2<-vec2[vec2>0]
slope<-v1/v2
vec3<-outer(ys,ys,"+")
vec4<-outer(xs,xs,"+")
v3<-vec3[vec2>0]
v4<-vec4[vec2>0]
deep<-NA
inter<-v3/2-slope*v4/2
temp<-matrix(c(inter,slope),ncol=2)
deep<-apply(temp,1,rdepth,x,y)
best<-max(deep)
coef<-NA
coef[2]<-mean(slope[deep==best])
coef[1]<-mean(inter[deep==best])
res<-y-coef[2]*x-coef[1]
list(coef=coef,residuals=res)
}

tsgreg<-function(x,y,tries=(length(y)^2-length(y))/2){
#
#
x<-as.matrix(x)
if(nrow(x)!=length(y))stop("Length of y must match the number of rows of x")
# eliminate any rows with missing values.
m1<-cbind(x,y)
m1<-elimna(m1)
x<-m1[,1:ncol(x)]
y<-m1[,ncol(x)+1]
set.seed(2)
data<-matrix(NA,ncol=ncol(x)+1,nrow=tries)
for(i in 1:tries){
data[i,]<-sample(length(y),size=ncol(x)+1,replace=FALSE)
}
bvec <- apply(data, 1,tsgregs1,x,y)
coef<-0
numzero<-0
loc<-0
for (i in 1:ncol(x)){
ip<-i+1
temp<-bvec[ip,]
loc[i]<-median(x[,i])
coef[i+1]<-median(temp[temp!=0])
numzero[i]<-length(temp[temp==0])
}
ip<-ncol(x)+1
coef[1]<-median(y)-sum(coef[2:ip]*loc)
res<-y-x %*% coef[2:ip] - coef[1]
list(coef=coef,residuals=res,numzero=numzero)
}
tsgregs1<-function(isub,x,y){
#
#  This function is used by tsgreg
#
#  Perform regression using x[isub,] to predict y[isub]
#  isub is a vector of length nsub, determined by tsgreg
#
tsgregs1<-lsfit(x[isub,],y[isub])$coef
}

lts1reg<-function(x,y,tr=.2,h=NA){
#
# Compute the least trimmed squares regression estimator.
# Only a single predictor is allowed in this version
#
if(is.na(h))h<-length(x)-floor(tr * length(x))
ord<-order(x)
xs<-x[ord]
ys<-y[ord]
vec1<-outer(ys,ys,"-")
vec2<-outer(xs,xs,"-")
v1<-vec1[vec2>0]
v2<-vec2[vec2>0]
slope<-v1/v2
vec3<-outer(ys,ys,"+")
vec4<-outer(xs,xs,"+")
v3<-vec3[vec2>0]
v4<-vec4[vec2>0]
val<-NA
inter<-v3/2-slope*v4/2
for(i in 1:length(slope)){
#risk<-(y[vec2>0]-slope[i]*x[vec2>0]-inter[i])^2
risk<-(y-slope[i]*x-inter[i])^2
risk<-sort(risk)
val[i]<-sum(risk[1:h])
}
best<-min(val)
coef<-NA
coef[2]<-mean(slope[val==best])
coef[1]<-mean(inter[val==best])
res<-y-coef[2]*x-coef[1]
list(coef=coef,residuals=res)
}

man2pb<-function(x,y,alpha=.05,nboot=NA,crit=NA){
#
#   Two-sample Behrens-Fisher problem.
#
#   For each of two independent groups,
#   have P measures for each subject. The goal is to compare the 20%
#   trimmed means of the first group to the trimmed means for the second;
#   this is done for each of the  P measures.
#
#   The percentile bootstrap method is used to
#   compute a .95, or .975, or .99 confidence interval.
#
#   Only 20% trimming is allowed.
#
#   x contains the data for the first group; it
#    can be an n by J matrix or it can have list mode.
#   y contains the data for the second group.
#
#   Vectors with missing values are eliminated from the analysis.
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(!is.list(y) && !is.matrix(y))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
# put the data in an n by p matrix
matx<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))matx[,j]<-x[[j]]
}
if(is.list(y)){
# put the data in an n by p matrix
maty<-matrix(0,length(y[[1]]),length(y))
for (j in 1:length(y))maty[,j]<-y[[j]]
}
if(is.matrix(x)){
matx<-x
}
if(is.matrix(y)){
maty<-y
}
if(ncol(matx)!=ncol(maty))stop("The number of variables for group 1 is not equal to the number for group 2")
if(sum(is.na(matx)>=1))matx<-elimna(matx)
if(sum(is.na(maty)>=1))maty<-elimna(maty)
J<-ncol(matx)
connum<-ncol(matx)
if(is.na(nboot)){
if(ncol(matx)<=4)nboot<-2000
if(ncol(matx)>4)nboot<-5000
}
#
#  Determine critical value
#
if(ncol(matx)==2){
if(alpha==.05)crit<-.0125
if(alpha==.025)crit<-.0060
if(alpha==.01)crit<-.0015
}
if(ncol(matx)==3){
if(alpha==.05)crit<-.007
if(alpha==.025)crit<-.003
if(alpha==.01)crit<-.001
}
if(ncol(matx)==4){
if(alpha==.05)crit<-.0055
if(alpha==.025)crit<-.0020
if(alpha==.01)crit<-.0005
}
if(ncol(matx)==5){
if(alpha==.05)crit<-.0044
if(alpha==.025)crit<-.0016
if(alpha==.01)crit<-.0005
}
if(ncol(matx)==6){
if(alpha==.05)crit<-.0038
if(alpha==.025)crit<-.0018
if(alpha==.01)crit<-.0004
}
if(ncol(matx)==7){
if(alpha==.05)crit<-.0028
if(alpha==.025)crit<-.0010
if(alpha==.01)crit<-.0002
}
if(ncol(matx)==8){
if(alpha==.05)crit<-.0026
if(alpha==.025)crit<-.001
if(alpha==.01)crit<-.0002
}
if(ncol(matx)>8){
# Use an approximation of the critical value
if(alpha==.025)warning("Can't determine a critical value when alpha=.025 and the number of groups exceeds 8.")
nmin<-min(nrow(matx),nrow(maty))
if(alpha==.05){
if(nmin<100)wval<-smmcrit(60,ncol(matx))
if(nmin>=100)wval<-smmcrit(300,ncol(matx))
wval<-0-wval
crit<-pnorm(wval)
}
if(alpha==.01){
if(nmin<100)wval<-smmcrit01(60,ncol(matx))
if(nmin>=100)wval<-smmcrit01(300,ncol(matx))
wval<-0-wval
crit<-pnorm(wval)
}
}
if(is.na(crit))warning("Critical values can be determined for alpha=.05, .025 and .01 only")
icl<-ceiling(crit*nboot)
icu<-ceiling((1-crit)*nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
bootx<-bootdep(matx,tr=.2,nboot)
booty<-bootdep(maty,tr=.2,nboot)
        #
        # Now have an nboot by J matrix of bootstrap values.
        #
test<-1
for (j in 1:connum){
test[j]<-sum(bootx[,j]<booty[,j])/nboot
if(test[j]>.5)test[j]<-1-test[j]
}
output <- matrix(0, connum, 5)
        dimnames(output) <- list(NULL, c("variable #", "psihat", "test",
                "ci.lower", "ci.upper"))
        tmeanx <- apply(matx, 2, mean, trim = .2)
        tmeany <- apply(maty, 2, mean, trim = .2)
        psi <- 1
        for(ic in 1:connum) {
                output[ic, 2] <- tmeanx[ic]-tmeany[ic]
                output[ic, 1] <- ic
                output[ic, 3] <- test[ic]
                temp <- sort(bootx[,ic]-booty[,ic])
print(length(temp))
                output[ic, 4] <- temp[icl]
                output[ic, 5] <- temp[icu]
        }
        list(output = output, crit.value = crit)
}


qhatds1<-function(isubx,x,y){
#
#  function used by qhat  when working on bootstrap estimates.
#
xx<-x[isubx]
yy<-y[isubx]
group<-disker(xx,yy,x,op=2)$zhat
group
}
qhatd<-function(x,y,nboot=50){
#
#   Estimate Q, a nonparametric measure of effect size, using
#   the .632 method of estimating prediction error.
#   (See Efron and Tibshirani, 1993, pp. 252--254)
#
#   The default number of bootstrap samples is nboot=100
#
#   This function is for dependent groups. For independent groups, use
#   qhati
#
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot)
#    data is an nboot by n matrix containing subscripts for bootstrap sample
bid<-apply(data,1,idb,length(x))
#  bid is a n by nboot matrix. If the jth bootstrap sample from
#  1, ..., n contains the value i, bid[i,j]=0; otherwise bid[i,j]=1
yhat<-apply(data,1,qhatds1,x,y)
bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253
temp<-(bid*yhat)
diff<-apply(temp,1,sum)
temp<-diff/bi
ep0<-sum(temp[!is.na(temp)])/length(y)
aperror<-disker(x,y)$phat  # apparent error
regpre<-.368*aperror+.632*ep0
list(app.error=aperror,qhat.632=regpre)
}


winmean<-function(x,tr=.2,na.rm=TRUE){
if(na.rm)elimna(x)
winmean<-mean(winval(x,tr))
winmean
}


kerden<-function(x,q=.5,xval=0){
#   Compute the kernel density estimator of the
#   probability density function evaluated at the qth quantile.
#
#   x contains vector of observations
#   q is the quantile of interest, the default is the median.
#   If you want to evaluate f hat at xval rather than at the
#   q th quantile, set q=0 and xval to desired value.
#
y<-sort(x)
n<-length(x)
temp<-idealf(x)
h<-1.2*(temp$qu-temp$ql)/n^(.2)
iq<-floor(q*n+.5)
qhat<-y[iq]
if (q==0) qhat<-xval
xph<-qhat+h
A<-length(y[y<=xph])
xmh<-qhat-h
B<-length(y[y<xmh])
fhat<-(A-B)/(2*n*h)
fhat
}

kdplot<-function(x,rval=15,xlab="X",ylab="Y"){
#
#   Compute the kernel density estimator for a range of values
#   and plot results.
#
#   x contains vector of observations
#
x<-x[!is.na(x)]  #  Remove any missing values
y<-sort(x)
z<-1
temp<-floor(.01*length(x))
if(temp==0)temp<-5
ibot<-y[temp]
itop<-y[floor(.99*length(x))]
xaxis<-seq(ibot,itop,length=rval)
for(i in 1:rval)z[i]<-kerden(x,0,xaxis[i])
plot(xaxis,z,xlab=xlab,ylab=ylab)
lines(xaxis,z)
}

wband<-function(x,y,
crit=(max(length(x),length(y))-5)*.48/95+2.58+abs(length(x)-length(y))*.44/95,
flag=FALSE,plotit=FALSE)
{
#  Compute a confidence band for the shift function
#  Assuming two independent groups are being compared
#
#  The default critical value is the approximate .05 critical value
#
#  If flag equals F, for false, the exact probability coverage is not computed
#
x<-x[!is.na(x)]  # Remove missing values from x.
y<-y[!is.na(y)]  # Remove missing values from y.
plotit<-as.logical(plotit)
flag<-as.logical(flag)
pc<-NA
if(flag){
print("Computing the exact value of the probability coverage")
pc<-1-kswsig(length(x),length(y),crit)
}
xsort<-sort(x)
ysort<-sort(y)
l<-0
u<-0
ysort[0]<-NA
ysort[length(y)+1]<-NA
m<-length(x)*length(y)/(length(x)+length(y))
lambda<-length(x)/(length(x)+length(y))
cc<-crit^2/m
temp1<-1+cc*(1-lambda)^2
for(ivec in 1:length(x))
{
uu<-ivec/length(x)
temp<-.5*sqrt(cc^2*(1-lambda)^2+4*cc*uu*(1-uu))
hminus<-(uu+.5*cc*(1-lambda)*(1-2*lambda*uu)-temp)/temp1
hplus<-(uu+.5*cc*(1-lambda)*(1-2*lambda*uu)+temp)/temp1
isub<-max(0,floor(length(y)*hminus)+1)
l[ivec]<-ysort[isub+1]-xsort[ivec]
if(hminus<0)l[ivec]=NA
isub<-max(0,floor(length(y)*hplus)+1)
u[ivec]<-ysort[isub+1]-xsort[ivec]
}
num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)])
qhat<-c(1:length(x))/length(x)
m<-matrix(c(qhat,l,u),length(x),3)
dimnames(m)<-list(NULL,c("qhat","lower","upper"))
if(plotit){
temp2 <- m[, 2]
temp2 <- temp2[!is.na(temp2)]
xsort<-sort(x)
ysort<-sort(y)
del<-0
for (i in 1:length(x))del[i]<-ysort[round(length(y)*i/length(x))]-xsort[i]
xaxis<-c(xsort,xsort,xsort)
yaxis<-c(del,m[,2],m[,3])
plot(xaxis,yaxis,type="n",ylab="delta",xlab="x (first group)")
lines(xsort,del)
lines(xsort,m[,2],lty=2)
lines(xsort,m[,3],lty=2)
temp <- summary(x)
                text(temp[3], min(temp2), "+")
                text(temp[2], min(temp2), "o")
                text(temp[5], min(temp2), "o")
}
list(m=m,crit=crit,numsig=num,pc=pc)
}

runcor<-function(x,y,z,fr=1,corflag=FALSE,corfun=pbcor,plotit=TRUE,rhat=FALSE){
#
# Estimate how the correlation between  x and y varies with  z
#
# running correlation using interval method
#
# fr controls amount of smoothing
#
# corfun is the  correlation to be used. It is assumed that
# corfun is an R function that returns a correlation coefficient
# in corfun$cor
#
# To use Pearsons correlation, set corflag=T
#
temp<-cbind(x,y,z) # Eliminate any rows with missing values
temp<-elimna(temp)
x<-temp[,1]
y<-temp[,2]
z<-temp[,3]
plotit<-as.logical(plotit)
rmd<-NA
if(!corflag){
for(i in 1:length(x)){
flag<-near(z,z[i],fr)
if(sum(flag)>2)rmd[i]<-corfun(x[flag],y[flag])$cor
}}
if(corflag){
for(i in 1:length(x)){
flag<-near(z,z[i],fr)
if(sum(flag)>2)rmd[i]<-cor(x[flag],y[flag])
}}
if(plotit){
plot(c(max(z),min(z),z),c(1,-1,rmd),xlab="Modifier",ylab="Correlation",type="n")
sz<-sort(z)
zorder<-order(z)
sysm<-rmd[zorder]
lines(sz,sysm)
}
if(!rhat)rmd<-"Done"
rmd
}


pcorb<-function(x,y,SEED=TRUE){
#   Compute a .95 confidence interval for Pearson's correlation coefficient.
#
#   This function uses an adjusted percentile bootstrap method that
#   gives good results when the error term is heteroscedastic.
#
nboot<-599  #Number of bootstrap samples
xy<-elimna(cbind(x,y))
x<-xy[,1]
y<-xy[,2]
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples; please wait")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,pcorbsub,x,y) # A 1 by nboot matrix.
ilow<-15
ihi<-584
if(length(y) < 250){
ilow<-14
ihi<-585
}
if(length(y) < 180){
ilow<-11
ihi<-588
}
if(length(y) < 80){
ilow<-8
ihi<-592
}
if(length(y) < 40){
ilow<-7
ihi<-593
}
bsort<-sort(bvec)
r<-cor(x,y)
ci<-c(bsort[ilow],bsort[ihi])
list(r=r,ci=ci)
}


twobici<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),x=NA,y=NA,alpha=.05){
#
# Compute confidence interval for p1-p2,
# the difference between probabilities of
# success for a two binomials using Beal's method.
#
# r is number of successes
# n is sample size
# if x contains data, r1 is taken to be the
# number of 1s in x and n1 is length(x)
#
if(length(r1)>1)stop("r1 must be a single number, not a vector")
if(length(n1)>1)stop("n1 must be a single number, not a vector")
if(length(r2)>1)stop("r2 must be a single number, not a vector")
if(!is.na(sum(r1)) || !is.na(sum(n1)) || !is.na(sum(r2)) || !is.na(sum(n2))){
if(r1<0 || n1<0)stop("Both r1 and n1 must be greater than 0")
if(r1 > n1)stop("r1 can't be greater than n1")
if(r2<0 || n2<0)stop("Both r2 and n2 must be greater than 0")
if(r2 > n2)stop("r2 can't be greater than n2")
}
if(!is.na(sum(x))){
r1<-sum(x)
n1<-length(x)
}
if(!is.na(sum(y))){
r2<-sum(y)
n2<-length(y)
}
a<-(r1/n1)+(r2/n2)
b<-(r1/n1)-(r2/n2)
u<-.25*((1/n1)+(1/n2))
v<-.25*((1/n1)-(1/n2))
V<-u*((2-a)*a-b^2)+2*v*(1-a)*b
crit<-qchisq(1-alpha/2,1)
A<-sqrt(crit*(V+crit*u^2*(2-a)*a+crit*v^2*(1-a)^2))
B<-(b+crit*v*(1-a))/(1+crit*u)
ci<-NA
ci[1]<-B-A/(1+crit*u)
ci[2]<-B+A/(1+crit*u)
p1<-r1/n1
p2<-r2/n2
list(ci=ci,p1=p1,p2=p2)
}

runmean<-function(x,y,fr=1,tr=.2,pyhat=FALSE,eout=FALSE,outfun=out,plotit=TRUE,xout=FALSE,
xlab="x",ylab="y"){
#
# running mean using interval method
#
# fr controls amount of smoothing
# tr is the amount of trimming
#
# Missing values are automatically removed.
#
if(eout && xout)xout<-F
temp<-cbind(x,y)
temp<-elimna(temp) # Eliminate any rows with missing values
if(eout){
flag<-outfun(temp,plotit=FALSE)$keep
temp<-temp[flag,]
}
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
temp<-temp[flag,]
}
x<-temp[,1]
y<-temp[,2]
pyhat<-as.logical(pyhat)
rmd<-c(1:length(x))
for(i in 1:length(x))rmd[i]<-mean(y[near(x,x[i],fr)],tr)
if(pyhat)return(rmd)
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
sx<-sort(x)
xorder<-order(x)
sysm<-rmd[xorder]
tempx<-(!duplicated(sx))
lines(sx[tempx], sysm[tempx])
}}

pcorbsub<-function(isub, x, y)
{
        #
        #  Compute Pearson's correlation using x[isub] and y[isub]
        #  isub is a vector of length n,
        #  a bootstrap sample from the sequence of integers
        #  1, 2, 3, ..., n
        #
        pcorbsub<-cor(x[isub],y[isub])
        pcorbsub
}

pow1<-function(n,Del,alpha){
#
#  Determine power of Student's T in the
#  one-sided, one-sample case where
#
#  n=sample size
#  Del=(mu0-mu1)/sigma
#  alpha=Type I error probability
#  mu0 is hypothesized value
#  mu1 is some non-null value for the mean.
#
Del<-abs(Del)
if(alpha<=0 || alpha>=1)stop("alpha must be between 0 and 1")
K11<-1-alpha
K5<-sqrt(n)*Del
#  Next, use the Kraemer-Paik (1979, Technometrics, 21, 357-360)
#  approximation of the noncentral T.
K6<-n-1
K14<-qt(K11,K6)
K7<-K14*sqrt(1+K5*K5/K6)
K8<-K5*sqrt(1+K14*K14/K6)
K9<-K7-K8
pow1<-1-pt(K9,K6)
pow1
}

stein1<-function(x,del,alpha=.05,pow=.8,oneside=FALSE,n=NULL,VAR=NULL){
#
# Performs Stein's method on the data in x.
# In the event additional observations are required
# and can be obtained, use the R function stein2.
#
del<-abs(del)
if(is.null(n))n<-length(x)
if(is.null(VAR))VAR=var(x)
df<-n-1
if(!oneside)alpha<-alpha/2
d<-(del/(qt(pow,df)-qt(alpha,df)))^2
N<-max(c(n,floor(VAR/d)+1))
N
}

stein2<-function(x1,x2,mu0=0,alpha=.05){
#
# Do second stage of Stein's method
# x1 contains first stage data
# x2 contains first stage data
# mu0 is the hypothesized value
#
n<-length(x1)
df<-n-1
N<-n+length(x2)
test<-sqrt(N)*(mean(c(x1,x2))-mu0)/sqrt(var(x1))
crit <- qt(1 - alpha/2, df)
low<- mean(c(x1,x2))-crit*sqrt(var(x1))
up<- mean(c(x1,x2))+crit*sqrt(var(x1))
sig<-2*(1-pt(test,df))
list(ci = c(low, up), siglevel =sig,mean=mean(c(x1,x2)),
teststat = test, crit =  crit, df = df)
}


ci2bin<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),x=NA,y=NA,alpha=0.05){
#
# Compute a confidence interval for the
# difference between probability of success
# for two independent binomials
#
# r1=number of successes in group 1
# n1=number of observations in group 1
#
cr<-qchisq(1-alpha,1)
p1<-r1/n1
p2<-r2/n2
a<-p1+p2
b<-p1-p2
u<-.25*(1/n1+1/n2)
v<-.25*(1/n1-1/n2)
V<-u*((2-a)*a-b^2)+2*v*(1-a)*b
A<-sqrt(cr*(V+cr*u^2*(2-a)*a+cr*v^2*(1-a)^2))
B<-(b+cr*v*(1-a))/(1+cr*u)
ci<-NA
ci[1]<-B-A/(1+cr*u)
ci[2]<-B+A/(1+cr*u)
list(ci=ci)
}
powt1est<-function(x,delta=0,ci=FALSE,nboot=800){
#
# Estimate power for a given value of delta
#
# Only 20% trimming is allowed.
#
temp1<-powest(x,rep(0,5),delta,se=trimse(x))
if(ci){
set.seed(2)
pboot<-NA
datay<-rep(0,5)
print("Taking bootstrap samples. Please wait.")
datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE
                        ), nrow = nboot)
for(i in 1:nboot) {
se <- trimse(datax[i,  ])
pboot[i] <- powest(x, rep(0,5), delta, se)
}
temp <- sort(pboot)
}
ll<-floor(0.05 * nboot + 0.5)
list(est.power=temp1,ci=temp[ll])
}

powt1an<-function(x,ci=FALSE,plotit=TRUE,nboot=800){
#
# Do a power analysis for the one-sample case with 20% trimmed
# mean and when the percentile bootstrap is to be used to test
# hypoltheses.
#
x<-x[!is.na(x)]
lp<-NA
se<-trimse(x)
gval<-NA
dv<-seq(0,3.5*se,length=15)
for(i in 1:length(dv)){
gval[i]<-powest(x,rep(0,5),dv[i],se)
}
if(!ci){
if(plotit){
plot(dv,gval,type="n",xlab="delta",ylab="power")
lines(dv,gval)
}}
if(ci){
set.seed(2)
print("Taking bootstrap samples. Please wait.")
datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE),
                nrow = nboot)
pboot<-matrix(NA,nrow=nboot,ncol=length(dv))
for(i in 1:nboot){
se<-trimse(datax[i,])
for(j in 1:length(dv)){
pboot[i,j]<-powest(x,rep(0,5),dv[j],se)
}}
ll<-floor(.05*nboot+.5)
for(i in 1:15){
temp<-sort(pboot[,i])
lp[i]<-temp[ll]
}
plot(c(dv,dv),c(gval,lp),type="n",xlab="delta",ylab="power")
lines(dv,gval)
lines(dv,lp,lty=2)
}
list(delta=dv,power=gval,lowp=lp)
}

trimpb2<-function(x,y,tr=.2,alpha=.05,nboot=2000,WIN=FALSE,win=.1,plotit=FALSE,op=4,
SEED=TRUE){
#
#   Compute a 1-alpha confidence interval for
#   the difference between two 20% trimmed means.
#   Independent groups are assumed.
#
#   The default number of bootstrap samples is nboot=2000
#
#   tr is the amount of trimming
#
#   win is the amount of Winsorizing before bootstrapping
#   when WIN=T.
#
#   Missing values are automatically removed.
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
if(WIN){
if(win>tr)stop("Cannot Winsorize more than you trim")
if(tr < .2){print("When Winsorizing, the amount of trimming")
print("should be at least .2")
}
if(min(c(length(x),length(y))) < 15){
print ("Warning: Winsorizing with sample sizes less than 15")
print("can result in poor control over the probability of a Type I error")
}
x<-winval(x,win)
y<-winval(y,win)
}
xx<-list()
xx[[1]]<-x
xx[[2]]<-y
est.dif<-tmean(xx[[1]],tr=tr)-tmean(xx[[2]],tr=tr)
crit<-alpha/2
temp<-round(crit*nboot)
icl<-temp+1
icu<-nboot-temp
bvec<-matrix(NA,nrow=2,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
for(j in 1:2){
data<-matrix(sample(xx[[j]],size=length(xx[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group
}
top<-bvec[1,]-bvec[2,]
test<-sum(top<0)/nboot+.5*sum(top==0)/nboot
if(test > .5)test<-1-test
top<-sort(top)
ci<-NA
ci[1]<-top[icl]
ci[2]<-top[icu]
if(plotit)g2plot(bvec[1,],bvec[2,],op=op)
list(p.value=2*test,ci=ci,est.dif=est.dif)
}

twolsreg<-function(x1,y1,x2,y2){
#
#   Compute a .95 confidence interval for
#   the difference between two regression slopes,
#   estimated via least squares and
#    corresponding to two independent groups.
#
#   This function uses an adjusted percentile bootstrap method that
#   gives good results when the error term is heteroscedastic.
#
#   WARNING: If the number of boostrap samples is altered, it is
#   unknown how to adjust the confidence interval when n1+n2 < 250.
#
nboot<-599  #Number of bootstrap samples
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples; please wait")
xy=elimna(cbind(x1,y1))
if(ncol(xy)>2)stop("This function only allows one covariate")
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data1,1,twolsregsub,x1,y1) # A 1 by nboot matrix.
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
bvec2<-apply(data2,1,twolsregsub,x2,y2) # A 1 by nboot matrix.
bvec<-bvec1-bvec2
ilow<-15
ihi<-584
if(length(y1)+length(y2) < 250){
ilow<-14
ihi<-585
}
if(length(y1)+length(y2) < 180){
ilow<-11
ihi<-588
}
if(length(y1)+length(y2) < 80){
ilow<-8
ihi<-592
}
if(length(y1)+length(y2) < 40){
ilow<-7
ihi<-593
}
bsort<-sort(bvec)
b1<-lsfit(x1,y1)$coef[2]
b2<-lsfit(x2,y2)$coef[2]
ci<-c(bsort[ilow],bsort[ihi])
list(b1=b1,b2=b2,ci=ci)
}

twolsregsub<-function(isub, x, y)
{
        #
        #  Compute least squares estimate of the
        #  slope using x[isub] and y[isub]
        #  isub is a vector of length n,
        #  a bootstrap sample from the sequence of integers
        #  1, 2, 3, ..., n
        #
        twolsregsub<-lsfit(x[isub],y[isub])$coef[2]
        twolsregsub
}
bdanova1<-function(x,alpha=.05,power=.9,delta=NA){
#
#  Do the first stage of a Bishop-Dudewicz ANOVA method.
#  That is, based on the data in x
#  determine N_j, the number of observations needed
#  in the jth group to achieve power 1-beta.
#
#  The argument x is assumed to have list mode or the
#  data is assumed to be stored in an n by J matrix
#
if(is.na(delta))stop("A value for delta was not specified")
if(!is.list(x)){
if(!is.matrix(x))stop("Data must be stored in matrix or in list mode")
}
y<-x
if(is.list(y))y=matl(y)
x<-list()
for(j in 1:ncol(y))x[[j]]<-elimna(y[,j])
nvec<-NA
svec<-NA
J<-length(x)
for(j in 1:length(x)){
nvec[j]<-length(x[[j]])
svec[j]<-var(x[[j]])
}
nu<-nvec-1
nu1<-sum(1/(nu-2))
nu1<-J/nu1+2
A<-(J-1)*nu1/(nu1-2)
B<-(nu1^2/J)*(J-1)/(nu1-2)
C<-3*(J-1)/(nu1-4)
D<-(J^2-2*J+3)/(nu1-2)
E<-B*(C+D)
M<-(4*E-2*A^2)/(E-A^2-2*A)
L<-A*(M-2)/M
f<-qf(1-alpha,L,M)
crit<-L*f
b<-(nu1-2)*crit/nu1
zz<-qnorm(power)
A<-.5*(sqrt(2)*zz+sqrt(2*zz^2+4*(2*b-J+2)))
B<-A^2-b
d<-((nu1-2)/nu1)*delta/B
N<-NA
for(j in 1:length(x)){
N[j]<-max(c(nvec[j]+1,floor(svec[j]/d)+1))
}
list(N=N,d=d,crit=crit)
}


comvar2<-function(x,y,nboot=1000,SEED=TRUE){
#
#  Compare the variances of two independent groups.
#
x<-x[!is.na(x)]  # Remove missing values in x
y<-y[!is.na(y)]  # Remove missing values in y
# set seed of random number generator so that
# results can be duplicated.
sig<-var(x)-var(y)
if(SEED)set.seed(2)
nmin<-min(length(x),length(y))
print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(x,size=nmin*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=nmin*nboot,replace=TRUE),nrow=nboot)
v1<-apply(datax,1,FUN=var)
v2<-apply(datay,1,FUN=var)
boot<-v1-v2
boot<-sort(boot)
        ilow <- 15
        ihi <- 584
        if(nmin < 250) {
                ilow <- 13
                ihi <- 586
        }
        if(nmin < 180) {
                ilow <- 10
                ihi <- 589
        }
        if(nmin < 80) {
                ilow <- 7
                ihi <- 592
        }
        if(nmin < 40) {
                ilow <- 6
                ihi <- 593
        }
ilow<-round((ilow/599)*nboot)
ihi<-round((ihi/599)*nboot)
ci<-c(boot[ilow+1],boot[ihi])
list(ci=ci,vardif=sig)
}


regi<-function(x,y,z,pt=median(z),fr=.8,est=onestep,regfun=tsreg,testit=FALSE,...){
#
# split the data according to whether z is < or > pt, then
# use runmean2g to plot a smooth of the regression
# lines corresponding to these two groups.
#
m<-cbind(x,y,z)
m<-elimna(m)
x<-m[,1]
y<-m[,2]
z<-m[,3]
flag<-(z<pt)
runmean2g(x[flag],y[flag],x[!flag],y[!flag],fr=fr,est=est,...)
output<-"Done"
if(testit){
abline(regfun(x[flag],y[flag])$coef)
abline(regfun(x[!flag],y[!flag])$coef,lty=2)
output<-reg2ci(x[flag],y[flag],x[!flag],y[!flag],regfun=regfun,plotit=FALSE)
}
output
}

ancpb<-function(x1,y1,x2,y2,est=onestep,pts=NA,fr1=1,fr2=1,nboot=NA,alpha=.05,xout=FALSE,outfun=outpro,plotit=TRUE,...){
#
# Compare two independent  groups using an ancova method
# with a percentile bootstrap combined with a running interval
# smooth.
#
#  Assume data are in x1 y1 x2 and y2
#  Comparisons are made at the design points contained in the vector
#  pts
#
xy1=elimna(cbind(x1,y1))
x1=xy1[,1]
y1=xy1[,2]
xy2=elimna(cbind(x2,y2))
x2=xy2[,1]
y2=xy2[,2]
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
npt<-5
gv1<-vector("list")
if(is.na(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,3)
dimnames(mat)<-list(NULL,c("X","n1","n2"))
for (i in 1:5){
j<-i+5
temp1<-y1[near(x1,x1[isub[i]],fr1)]
temp2<-y2[near(x2,x1[isub[i]],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(temp1)
mat[i,3]<-length(temp2)
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
I1<-diag(npt)
I2<-0-I1
con<-rbind(I1,I2)
test<-pbmcp(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...)
}
#
if(!is.na(pts[1])){
npt<-length(pts)
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),3)
dimnames(mat)<-list(NULL,c("X","n1","n2"))
gv<-vector("list",2*length(pts))
for (i in 1:length(pts)){
j<-i+npt
temp1<-y1[near(x1,pts[i],fr1)]
temp2<-y2[near(x2,pts[i],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
mat[i,1]<-pts[i]
if(length(temp1)<=5)paste("Warning, there are",length(temp1)," points corresponding to the design point X=",pts[i])
if(length(temp2)<=5)paste("Warning, there are",length(temp2)," points corresponding to the design point X=",pts[i])
mat[i,2]<-length(temp1)
mat[i,3]<-length(temp2)
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
I1<-diag(npt)
I2<-0-I1
con<-rbind(I1,I2)
test<-pbmcp(gv1,alpha=alpha,nboot=nboot,est=est,con=con,...)
}
if(plotit){
runmean2g(x1,y1,x2,y2,fr=fr1,est=est,...)
}
list(mat=mat,output=test$output,con=test$con,num.sig=test$num.sig)
}

ancboot<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,nboot=599,pts=NA,plotit=TRUE,xout=FALSE,outfun=outpro,...){
#
# Compare two independent  groups using the ancova method
# in chapter 11 of Wilcox, 2013, Intro to Robust Estimation and Hypothesis Testing. 
# No assumption is made about the form of the regression
# lines--a running interval smoother is used.
# Confidence intervals are computed using a percentile t bootstrap
# method. Comparisons are made at five empirically chosen design points.
#
#  Assume data are in x1 y1 x2 and y2
#
if(is.na(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
m1=elimna(cbind(x1,y1))
x1=m1[,1]
y1=m1[,2]
m1=elimna(cbind(x2,y2))
x2=m1[,1]
y2=m1[,2]
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,8)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","ci.low","ci.hi",
"p.value"))
gv1<-vector("list")
for (i in 1:5){
j<-i+5
temp1<-y1[near(x1,x1[isub[i]],fr1)]
temp2<-y2[near(x2,x1[isub[i]],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
mat[i,2]<-length(temp1)
mat[i,3]<-length(temp2)
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
I1<-diag(5)
I2<-0-I1
con<-rbind(I1,I2)
test<-linconb(gv1,con=con,tr=tr,nboot=nboot)
for(i in 1:5){
mat[i,1]<-x1[isub[i]]
}
mat[,4]<-test$psihat[,2]
mat[,5]<-test$test[,2]
mat[,6]<-test$psihat[,3]
mat[,7]<-test$psihat[,4]
mat[,8]<-test$test[,4]
}
if(!is.na(pts[1])){
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
if(n1[i]<=5)paste("Warning, there are",n1[i]," points corresponding to the design point X=",pts[i])
if(n2[i]<=5)paste("Warning, there are",n2[i]," points corresponding to the design point X=",pts[i])
}
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi",
"p.value"))
gv<-vector("list",2*length(pts))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
j<-i+length(pts)
gv[[i]]<-g1
gv[[j]]<-g2
}
I1<-diag(length(pts))
I2<-0-I1
con<-rbind(I1,I2)
test<-linconb(gv,con=con,tr=tr,nboot=nboot)
mat[,1]<-pts
mat[,2]<-n1
mat[,3]<-n2
mat[,4]<-test$psihat[,2]
mat[,5]<-test$test[,2]
mat[,6]<-test$test[,3]
mat[,7]<-test$psihat[,3]
mat[,8]<-test$psihat[,4]
mat[,9]<-test$test[,4]
}
if(plotit){
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
runmean2g(x1,y1,x2,y2,fr=fr1,est=mean,tr=tr)
}
list(output=mat,crit=test$crit)
}

spear<-function(x,y=NULL){
# Compute Spearman's rho
#
if(!is.null(y[1])){
m=elimna(cbind(x,y))
x=m[,1]
y=m[,2]
corv<-cor(rank(x),rank(y))
}
if(is.null(y[1])){
x=elimna(x)
m<-apply(x,2,rank)
corv<-cor(m)
}
test <-corv * sqrt((length(x) - 2)/(1. - corv^2))
sig <- 2 * (1 - pt(abs(test), length(x) - 2))
if(is.null(y[1]))sig<-matrix(sig,ncol=sqrt(length(sig)))
list(cor=corv,siglevel = sig)
}


linchk<-function(x,y,sp,pv=1,regfun=tsreg,plotit=TRUE,nboot=599,alpha=.05,pr=T){
#
# Split the data into two groups according to whether
# predictor variable pv has a value less than sp.
# Then test the hypothesis that slope coefficients,
# based on the regression method regfun, are equal.
#
x<-as.matrix(x)
if(pr)print(paste("Splitting data using predictor", pv))
xx<-x[,pv]
flag<-(xx<=sp)
temp<-reg2ci(x[flag,],y[flag],x[!flag,],y[!flag],regfun=regfun,plotit=plotit,nboot=nboot,alpha=alpha)
temp
}

trimci<-function(x,tr=.2,alpha=.05,null.value=0,pr=TRUE){
#
#  Compute a 1-alpha confidence interval for the trimmed mean
#
#  The default amount of trimming is tr=.2
#
if(pr){
print("The p-value returned by the this function is based on the")
print("null value specified by the argument null.value, which defaults to 0")
}
x<-elimna(x)
se<-sqrt(winvar(x,tr))/((1-2*tr)*sqrt(length(x)))
trimci<-vector(mode="numeric",length=2)
df<-length(x)-2*floor(tr*length(x))-1
trimci[1]<-mean(x,tr)-qt(1-alpha/2,df)*se
trimci[2]<-mean(x,tr)+qt(1-alpha/2,df)*se
test<-(mean(x,tr)-null.value)/se
sig<-2*(1-pt(abs(test),df))
list(ci=trimci,estimate=mean(x,tr),test.stat=test,se=se,p.value=sig,n=length(x))
}

msmed<-function(x,y=NA,con=0,alpha=.05){
#
# Test a set of linear contrasts using Medians
#
#  The data are assumed to be stored in $x$ in a matrix or in list mode.
#  Length(x) is assumed to correspond to the total number of groups, J
#  It is assumed all groups are independent.
#
#  con is a J by d matrix containing the contrast coefficients that are used.
#  If con is not specified, all pairwise comparisons are made.
#
#  Missing values are automatically removed.
#
if(!is.na(y[1])){
xx<-list()
xx[[1]]<-x
xx[[2]]<-y
if(is.matrix(x) || is.list(x))stop("When y is speficied, x should not have list mode or be a matrix")
x<-xx
}
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
if(sum(duplicated(val)>0)){
print(paste("Warning: Group",j, "has tied values. Might want to used medpb"))
}
x[[j]]<-val[xx]  # Remove missing values
xbar[j]<-median(x[[j]])
w[j]<-msmedse(x[[j]])^2 # Squared standard error.
}
if(sum(con^2!=0))CC<-ncol(con)
if(sum(con^2)==0){
CC<-(J^2-J)/2
psihat<-matrix(0,CC,5)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper"))
test<-matrix(NA,CC,6)
dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","p.value"))
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k])
test[jcom,6]<-2*(1-pt(test[jcom,3],999))
sejk<-sqrt(w[j]+w[k])
test[jcom,5]<-sejk
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-(xbar[j]-xbar[k])
crit<-NA
if(CC==1)crit<-qnorm(1-alpha/2)
if(CC>1){
if(alpha==.05)crit<-smmcrit(500,CC)
if(alpha==.01)crit<-smmcrit01(500,CC)
if(is.na(crit))warning("Can only be used with alpha=.05 or .01")
}
test[jcom,4]<-crit
psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5]
psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5]
}}}}
if(sum(con^2)>0){
if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.")
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","crit","se","p.value"))
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
sejk<-sqrt(sum(con[,d]^2*w))
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
test[d,5]<-2*(1-pt(abs(test[d,2]),999))
crit<-NA
if(CC==1)crit<-qnorm(1-alpha/2)
if(alpha==.05)crit<-smmcrit(500,ncol(con))
if(alpha==.01)crit<-smmcrit01(500,ncol(con))
test[d,3]<-crit
test[d,4]<-sejk
psihat[d,3]<-psihat[d,2]-crit*sejk
psihat[d,4]<-psihat[d,2]+crit*sejk
}}
list(test=test,psihat=psihat)
}
selby<-function(m,grpc,coln){
#
#
#  A commmon situation is to have data stored in an n by p matrix where
#  one or more of the columns are  group identification numbers.
#  This function groups  all values in column coln according to the
#  group numbers in column grpc and stores the  results in list mode.
#
#  More than one column of data can sorted
#
# grpc indicates the column of the matrix containing group id number
#
if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame")
if(is.na(grpc[1]))stop("The argument grpc is not specified")
if(is.na(coln[1]))stop("The argument coln is not specified")
if(length(grpc)!=1)stop("The argument grpc must have length 1")
x<-vector("list")
grpn<-sort(unique(m[,grpc]))
it<-0
for (ig in 1:length(grpn)){
for (ic in 1:length(coln)){
it<-it+1
flag<-(m[,grpc]==grpn[ig])
x[[it]]<-m[flag,coln[ic]]
}}
list(x=x,grpn=grpn)
}


med2way<-function(J,K,x,grp=c(1:p),alpha=.05,p=J*K){
#
#  Perform a J by K (two-way) anova on  medians where
#  all jk groups are independent.
#
#  The argument x is assumed to contain the raw
#  data stored in list mode.
#  If grp is unspecified, it is assumed x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#  x[[j+1]] is the data for level 2,1, etc.
#  If the data are in wrong order, grp can be used to rearrange the
#  groups. For example, for a two by two design, grp<-c(2,4,3,1)
#  indicates that the second group corresponds to level 1,1;
#  group 4 corresponds to level 1,2; group 3 is level 2,1;
#  and group 1 is level 2,2.
#
#  It is assumed that the input variable x has length JK, the total number of
#  groups being tested. If not, a warning message is printed.
#
print("Suggestion: Use the function med2way or m2way instead, especially with tied values")
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data are not stored in a matrix or in list mode")
if(p!=length(x)){
print("Warning: The number of groups in your data is not equal to JK")
}
xbar<-0
h<-0
d<-0
R<-0
W<-0
d<-0
r<-0
w<-0
nuhat<-0
omegahat<-0
DROW<-0
DCOL<-0
xtil<-matrix(0,J,K)
aval<-matrix(0,J,K)
for (j in 1:p){
#if(sum(duplicated(x[[grp[j]]]))>0)print("WARNING: TIED VALUES")
xbar[j]<-median(x[[grp[j]]])
h[j]<-length(x[[grp[j]]])
d[j]<-msmedse(x[[grp[j]]])^2
}
d<-matrix(d,J,K,byrow=T)
xbar<-matrix(xbar,J,K,byrow=T)
h<-matrix(h,J,K,byrow=T)
for(j in 1:J){
R[j]<-sum(xbar[j,])
nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1))
r[j]<-1/sum(d[j,])
DROW[j]<-sum(1/d[j,])
}
for(k in 1:K){
W[k]<-sum(xbar[,k])
omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1))
w[k]<-1/sum(d[,k])
DCOL[k]<-sum(1/d[,k])
}
D<-1/d
for(j in 1:J){
for(k in 1:K){
xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])-
sum(D*xbar/sum(D))
aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3)
}
}
Rhat<-sum(r*R)/sum(r)
What<-sum(w*W)/sum(w)
Ba<-sum((1-r/sum(r))^2/nuhat)
Bb<-sum((1-w/sum(w))^2/omegahat)
Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1)))
Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1)))
sig.A<-1-pf(Va,J-1,9999999)
sig.B<-1-pf(Vb,K-1,9999999)
# Next, do test for interactions
Vab<-sum(D*(xbar-xtil)^2)
dfinter<-(J-1)*(K-1)
sig.AB<-1-pchisq(Vab,dfinter)
list(test.A=Va,p.val.A=sig.A,test.B=Vb,p.val.B=sig.B,test.AB=Vab,p.val.AB=sig.AB)
}


idealf<-function(x,na.rm=FALSE){
#
# Compute the ideal fourths for data in x
#
if(na.rm)x<-x[!is.na(x)]
j<-floor(length(x)/4 + 5/12)
y<-sort(x)
g<-(length(x)/4)-j+(5/12)
ql<-(1-g)*y[j]+g*y[j+1]
k<-length(x)-j+1
qu<-(1-g)*y[k]+g*y[k-1]
list(ql=ql,qu=qu)
}

lintests1<-function(vstar,yhat,res,mflag,x,regfun,...){
ystar<-yhat+res*vstar
bres<-regfun(x,ystar,...)$residuals
rval<-0
for (i in 1:nrow(x)){
rval[i]<-sum(bres[mflag[,i]])
}
rval
}





rdepth<-function(d, x, y, sortx = T)
{
##########################################################################
# This function computes the regression depth of a line with coordinates d
# relative to the bivariate data set (x,y).
# The first component of the vector d indicates the intercept of the line,
# the second component is the slope.
#
# Input : d          : vector with two components
#         x,y        : vectors of equal length (data set)
#         sortx      : logical, to set to F if the data set (x,y) is
#                      already sorted by its x-coordinates
#
# Reference:
#           Rousseeuw, P.J. and Hubert, M. (1996),
#           Regression Depth, Technical report, University of Antwerp
#           submitted for publication.
##########################################################################
        if(!is.vector(x) || !is.vector(y)) stop("x and y should be vectors")
        n <- length(x)
        if(n < 2)
                stop("you need at least two observations")
        xy <- cbind(x, y)
        b <- d[1]
        a <- d[2]
        if(sortx)
                xy <- xy[order(xy[, 1], xy[, 2]),  ]
        res <- xy[, 2] - a * xy[, 1] - b
        res[abs(res) < 9.9999999999999995e-08] <- 0
        posres <- res >= 0
        negres <- res <= 0
        lplus <- cumsum(posres)
        rplus <- lplus[n] - lplus
        lmin <- cumsum(negres)
        rmin <- lmin[n] - lmin
        depth <- pmin(lplus + rmin, rplus + lmin)
        min(depth)
}

permg<-function(x,y,alpha=.05,est=mean,nboot=1000){
#
# Do a two-sample permutation test based on means or any
# other measure of location or scale indicated by the
# argument est.
#
# The default number of permutations is nboot=1000
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
xx<-c(x,y)
dif<-est(x)-est(y)
vec<-c(1:length(xx))
v1<-length(x)+1
difb<-NA
temp2<-NA
for(i in 1:nboot){
data <- sample(xx, size = length(xx), replace = FALSE)
temp1<-est(data[c(1:length(x))])
temp2<-est(data[c(v1:length(xx))])
difb[i]<-temp1-temp2
}
difb<-sort(difb)
icl<-floor((alpha/2)*nboot+.5)
icu<-floor((1-alpha/2)*nboot+.5)
reject<-"no"
if(dif>=difb[icu] || dif <=difb[icl])reject<-"yes"
list(dif=dif,lower=difb[icl],upper=difb[icu],reject=reject)
}


pb2gen<-function(x,y,alpha=.05,nboot=2000,est=onestep,SEED=TRUE,pr=TRUE,...){
#
#   Compute a bootstrap confidence interval for the
#   the difference between any two parameters corresponding to
#   independent groups.
#   By default, M-estimators are compared.
#   Setting est=mean, for example, will result in a percentile
#   bootstrap confidence interval for the difference between means.
#   Setting est=onestep will compare M-estimators of location.
#   The default number of bootstrap samples is nboot=2000
#
x<-x[!is.na(x)] # Remove any missing values in x
y<-y[!is.na(y)] # Remove any missing values in y
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvecx<-apply(datax,1,est,...)
bvecy<-apply(datay,1,est,...)
bvec<-sort(bvecx-bvecy)
low<-round((alpha/2)*nboot)+1
up<-nboot-low
temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot)
sig.level<-2*(min(temp,1-temp))
se<-var(bvec)
list(est.1=est(x,...),est.2=est(y,...),est.dif=est(x,...)-est(y,...),ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se,n1=length(x),n2=length(y))
}




tmean<-function(x,tr=.2,na.rm=FALSE,STAND=NULL){
if(na.rm)x<-x[!is.na(x)]
val<-mean(x,tr)
val
}

depth<-function(U,V,m){
#
#  Compute the halfspace depth of the point (u,v) for the pairs of points
#  in the n by 2 matrix m.
#
X<-m[,1]
Y<-m[,2]
FV<-NA
NUMS<-0
NUMH<-0
SDEP<-0.0
HDEP<-0.0
N<-length(X)
P<-acos(-1)
P2<-P*2.0
EPS<-0.000001
ALPHA<-NA
NT<-0
for(i in 1:nrow(m)){
               DV<-sqrt(((X[i]-U)*(X[i]-U)+(Y[i]-V)*(Y[i]-V)))
              if (DV <= EPS){
              NT<-NT+1
                              }
          else{
              XU<-(X[i]-U)/DV
              YU<-(Y[i]-V)/DV
              if (abs(XU) > abs(YU)){
                  if (X[i] >= U){
                      ALPHA[i-NT]<-asin(YU)
                      if(ALPHA[i-NT] < 0.0)
                          ALPHA[i-NT]<-P2+ALPHA[i-NT]
                                  }
                  else{
                      ALPHA[i-NT]<-P-asin(YU)
                      }
                                    }
              else{
                  if (Y[i] >= V)
                      ALPHA[i-NT]<-acos(XU)
                  else
                      ALPHA[i-NT]<-P2-acos(XU)
                  }
              if (ALPHA[i-NT] >= P2-EPS) ALPHA[i-NT]<-0.0
                }
}
NN<-N-NT
if(NN<=1){
NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+
depths1(NT,3)
      if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0)
      NUMH<-NUMH+NT
      HDEP<-(NUMH+0.0)/(N+0.0)
      return(HDEP)
}
ALPHA<-sort(ALPHA[1:NN])
ANGLE<-ALPHA[1]-ALPHA[NN]+P2
for(i in 2:NN){
ANGLE<-max(c(ANGLE,ALPHA[i]-ALPHA[i-1]))
               }
if(ANGLE > (P+EPS)){
NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+
depths1(NT,3)
      if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0)
      NUMH<-NUMH+NT
      HDEP<-(NUMH+0.0)/(N+0.0)
      return(HDEP)
                  }
ANGLE<-ALPHA[1]
NU<-0
for (i in 1:NN){
ALPHA[i]<-ALPHA[i]-ANGLE
if(ALPHA[i]<(P-EPS))NU<-NU+1
               }
if(NU >= NN){
NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+
depths1(NT,3)
      if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0)
      NUMH<-NUMH+NT
      HDEP<-(NUMH+0.0)/(N+0.0)
      return(HDEP)
}
#
#  Mergesort the alpha with their antipodal angles beta,
#  and at the same time update I, F(I), and NBAD.
#
JA<-1
JB<-1
      ALPHK<-ALPHA[1]
      BETAK<-ALPHA[NU+1]-P
      NN2<-NN*2
      NBAD<-0
      I<-NU
      NF<-NN
for(J in 1:NN2){
           ADD<-ALPHK+EPS
          if (ADD < BETAK){
              NF<-NF+1
              if(JA < NN){
                  JA<-JA+1
                  ALPHK<-ALPHA[JA]
              }
              else
                  ALPHK<-P2+1.0
              }
          else{
              I<-I+1
              NN1<-NN+1
              if(I==NN1){
                  I<-1
                  NF<-NF-NN
              }
              FV[I]<-NF
              NFI<-NF-I
              NBAD<-NBAD+depths1(NFI,2)
              if(JB < NN){
                  JB<-JB+1
                  if(JB+NU <= NN)
                      BETAK<-ALPHA[JB+NU]-P
                  else
                      BETAK<-ALPHA[JB+NU-NN]+P
              }
              else
                  BETAK<-P2+1.0
          }
}
NUMS<-depths1(NN,3)-NBAD
#
#  Computation of NUMH for halfspace depth.
#
      GI<-0
      JA<-1
      ANGLE<-ALPHA[1]
      dif<-NN-FV[1]
      NUMH<-min(FV[1],dif)
for(I in 2:NN){
          AEPS<-ANGLE+EPS
          if(ALPHA[I] <= AEPS){
              JA<-JA+1
                              }
          else{
              GI<-GI+JA
              JA<-1
              ANGLE<-ALPHA[I]
              }
          KI<-FV[I]-GI
          NNKI<-NN-KI
          NUMH<-min(c(NUMH,min(c(KI,NNKI))))
   }
NUMS<-NUMS+depths1(NT,1)*depths1(NN,2)+depths1(NT,2)*depths1(NN,1)+
depths1(NT,3)
      if(N >= 3)SDEP<-(NUMS+0.0)/(depths1(N,3)+0.0)
      NUMH<-NUMH+NT
      HDEP<-(NUMH+0.0)/(N+0.0)
      HDEP
}

rtdep<-function(pts,m,nsamp=100,SEED=NA){
#
#  Determine Tukey depth by randomly sampling
#  p-1 points from m (which has p columns),
#  combine this with pt, fit a plane, check
#  the residuals, and repeat many times.
#  Count how many positive residuals
#  there are, say pr, how many negative residuals, nr.
#  The approximate depth is min (pr,nr) over all samples.
#
set.seed(2)
if(!is.na(SEED))set.seed(SEED)
if(!is.matrix(m))stop("Second argument is not a matrix")
if(ncol(m)==2)tdep<-depth(pts[1],pts[2],m)
if(ncol(m)>2){
n<-nrow(m)
pts<-matrix(pts,ncol=ncol(m))
mold<-m
p<-ncol(m)
pm1<-p-1
mdup<-matrix(rep(pts,nrow(m)),ncol=ncol(m),byrow=T)
dif<-abs(m-mdup)
chk<-apply(dif,1,sum)
flag<-(chk!=0)
m<-m[flag,]
m<-as.matrix(m)
dmin<-sum(chk==0)
m3<-rbind(m,pts)
tdep<-nrow(m)+1
for(i in 1:nsamp){
mat<-sample(nrow(m),pm1,T)
#if(p==2)x<-c(m[mat,2:p],pts[,2:p])
if(p>2)x<-rbind(m[mat,2:p],pts[,2:p])
y<-c(m[mat,1],pts[1])
if(prod(eigen(var(x))$values) >10^{-8}){
#print(prod(eigen(var(x))$values))
temp<-qr(x)
#print(temp)
#print(ncol(x))
if(temp$rank[1]==ncol(x)){
temp<-lsfit(x,y)$coef
m2<-cbind(rep(1,nrow(m3)),m3[,2:p])
res<-m3[,1]-temp%*%t(m2)
p1<-sum((res>0))
p2<-sum((res<0))
tdep<-min(c(tdep,p1,p2))
if(tdep<dmin)tdep<-dmin
}}}
tdep<-tdep/n
}
tdep
}
hdep <- function(PNT, X, NDIR=100, EPS=10E-7, SEED=NA, PRINT=F )
{
#========================
 #
 # X  - A numeric matrix with N rows and NP columns
 # PNT  - A numeric vector representing a point in the same space as
 # defined by X, so
 #     length of T has to equal to NP.
 # NDIR - A number of samples to draw
 # EPS  - Precision.
 # SEED - If specified, sets the seen of the random
 # number generator
 # PRINT - Default=F. If T, prints warning messages, such as Eigenvectors
 # are 0.
 #
 #=============================
  # SUBROUTINES
 #  DEP
 #  Reduce
 #

   DEP  <- function( X, PNT, NDIR, EPS=10E-8, PRINT=F )
   {
    #=================================================

    #
    # X  - A numeric matrix with N rows and NP columns
    # PNT  - A numeric vector representing a point in the same space as
    # defined by X, so
    #     length of T has to equal to NP.
    # NDIR - A number of samples to draw
    # EPS  - Precision.
    #
    #==================================
    # ---------------------------------------
    # Initialize Number of singular samples
    # ---------------------------------------
     NSIN <- 0
     N  <- nrow( X )
     NP  <- ncol( X )

    # ---------------------------------------
    # Intitialize Halfspace Depth at random
    # seed
    # ---------------------------------------
     NDEP <- N
     for( NRAN in 1:NDIR )
     {
       foundSingular <- F
      # --------------------------------------- ---
      # Draw a random sample of size NP without
      #    replacement
      # ------------------------------------------
       JSAMP <- sample( 1:N, size=NP, replace=FALSE )

      # ------------------------------------------
      #   Compute covariance matrix of the sample
      # ------------------------------------------
       sX  <- matrix( X[JSAMP,], nrow=NP, ncol=NP )
       COV  <- var( sX )

      # ------------------------------------------
      # Computing Eigen Values And Eigen Vector
      # for COV matrix
      # ------------------------------------------
       resEigen <- eigen( COV )
       Eval  <- resEigen[[1]]
       Evec  <- resEigen[[2]]

       if (Eval[NP] > EPS)
       {
        NSIN <- NSIN + 1
        foundSingular <- T
        if (PRINT)
         paste( "ERROR: No Eigenvalue = 0 for sample", NRAN)
        next
       }

      # ------------------------------------------
      # Need to test for singularity
      # ------------------------------------------
       if (Eval[NP-1] <= EPS)
       {
        NSIN <- NSIN + 1
       }

      # ------------------------------------------
      # Projecting all pints on line through
      # theta with direction given by the eigen
      # vector of the smallest eigenvalue, i.e.,
      # the direction orthogonal on the hyperplane
      # given by the NP-subset.
      # Compute the one-dimensional halfspace depth
      # of theta on this line.
      # ------------------------------------------
      # in Splus the smallest eigenvalue is the
      # last one and corresponding vector is the
      # last one, hence Eval[NP] is the smallest
      # and Evec[,NP] is the corresponding vector
      # ------------------------------------------
       eigenVec <- Evec[,NP]
       NT   <- sum( ifelse( eigenVec <= EPS, 1, 0 ) )
       KT   <- sum( ifelse( eigenVec > EPS, PNT * eigenVec, 0 ) )
       if (NT == NP)
       {
        NSIN <- NSIN + 1
        foundSingular <- T
        if (PRINT)
         paste( "      ERROR: Eigenvector = 0 for sample", NRAN )
        if (foundSingular) next             # Do next Sample
       }
       K  <- X %*% eigenVec
       K  <- K - KT
       NUMH <- sum( ifelse( K > EPS, 1, 0 ) )
       NT  <- sum( ifelse( abs(K) <= EPS, 1, 0 ) )
      # -------------------------------------------
      # If all projections collapse with theta,
      # return to reduce the dimension
      # -------------------------------------------
       if (NT == N)
       {
        NSIN <- -1
        return( list( NDEP=NDEP, NSIN=NSIN, EVEC=Evec ) ) # Will need
#Eigen Vector matrix to reduce dimension
       }

      # -------------------------------------------
      # Update halfspace depth
      # -------------------------------------------
       NDEP <- min( NDEP, min( NUMH+NT,N-NUMH ) )
     }

     return( list( NDEP=NDEP, NSIN=NSIN, EVEC=Evec ) )
   }

   #================================================
   Reduce <- function( X, PNT, Evec )
   {
    Det <- det(Evec)
    if (Det==0)
    {
     return( list( X=X, PNT=PNT, DET=Det ) )
    }
    NP <- ncol(X)

    # ---------------------------------------
    # Compute (NP-1)-dimentional coordinates
    # for all points and theta
    # ---------------------------------------
    RedEvec <- matrix(Evec[,1:(NP-1)],nrow=NP,ncol=(NP-1)) # Reducing
    #  dimension by removing the last dimension with 0 variance.
    PNT   <- PNT %*% RedEvec
    X   <- X %*% RedEvec
    if (!is.matrix(X)) X <- matrix(X,ncol=(NP-1))
    return( list( X=X, PNT=PNT, DET=Det ) )
   }

#
# PROGRAM BEGINS
#
  if (!is.na(SEED)) set.seed( SEED )
 # ---------------------------------------
 # Initialize Number of singular samples
 # ---------------------------------------
  Nsin <- 0

  X  <- as.matrix( X )
  N  <- nrow( X )
  NP  <- ncol( X )

if (length(PNT) != NP){print("Length of 'PNT' has to equal to")
stop("number of columns in X !!!   " )
}

 # ---------------------------------------
 # Handle special case where N=1
 # ---------------------------------------
  if (N==1)
  {
   NDEP <- ifelse( abs(X[1,]-PNT) > EPS, 0, 1 )  # if any dimension
#  different from point PNT, NDEP=0, else = 1
   NDEP <- min( NDEP )
   DEPTH <- NDEP/ N
   return( DEPTH )
  }

 # ---------------------------------------
 # Handle special case where NP=1
 # ---------------------------------------
 repeat #+++++++++++++++++++++++++++++++++
 {
 # In this case depth is equal to number of points <= to T
  if (NP==1)
  {
   MORE <- sum( ifelse( X[,1] >= (PNT-EPS), 1, 0 ) )
   LESS <- sum( ifelse( X[,1] <= (PNT+EPS), 1, 0 ) )
   NDEP <- min( LESS, MORE )
   DEPTH <- NDEP / N
   return( DEPTH )
  }

 # ---------------------------------------
 # General Case, call function DEP
 # ---------------------------------------
  if (N > NP)
  {
   RES  <- DEP( X=X, PNT=PNT, NDIR=NDIR, EPS=EPS, PRINT=PRINT )
   NDEP <- RES$NDEP
   NSIN <- RES$NSIN
   EVEC <- RES$EVEC
  }
  else
  {
   NSIN <- -1  # Needs to reduce dimensions
   EVEC <- eigen( var( X ) )[[2]]  # Getting eigenvector
  }

 # ---------------------------------------
 # If all points and theta are identified
 # as lying on the same hyperplane, reduce
 # the dimension of the data set by projection
 # on that hyperplane, and compute the depth
 # on the reduced data set
 # ---------------------------------------
  if (NSIN == -1)
  {
   NSIN <- 0
   if (PRINT) print( "      Direction with zero variance detected" )
   RED  <- Reduce( X=X, PNT=PNT, Evec=EVEC )
   X  <- RED$X
   PNT  <- RED$PNT
   Det  <- RED$DET
   if (Det==0)
   {
print("\n\n\t DIMENSION REDUCTION TERMINATED\n\t EIGENVECTORS ARE NOT")
stop("INDEPENDENT\n\n" )
   }
   NP  <- ncol(X)
   if (PRINT) paste("     Dimension reduced to", NP )
  }
  else
  {
   break   # No need to reduce dimension of X and hence no need to
#return, breaks 'repeat' loop
  }
 } # End repeat+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


 DEPTH <- NDEP / N
 return( DEPTH )
}



depths1<-function(m,j){
if(m < j)depths1<-0
else{
if(j==1)depths1<-m
if(j==2)depths1<-(m*(m-1))/2
if(j==3)depths1<-(m*(m-1)*(m-2))/6
}
depths1
}

outbox<-function(x,mbox=FALSE,gval=NA,plotit=FALSE,STAND=FALSE){
#
# This function detects outliers using the
# boxplot rule, but unlike the R function boxplot,
# the ideal fourths are used to estimate the quartiles.
#
# Setting mbox=T results in using the modification
# of the boxplot rule suggested by Carling (2000).
#
x<-x[!is.na(x)] # Remove missing values
if(plotit)boxplot(x)
n<-length(x)
temp<-idealf(x)
if(mbox){
if(is.na(gval))gval<-(17.63*n-23.64)/(7.74*n-3.71)
cl<-median(x)-gval*(temp$qu-temp$ql)
cu<-median(x)+gval*(temp$qu-temp$ql)
}
if(!mbox){
if(is.na(gval))gval<-1.5
cl<-temp$ql-gval*(temp$qu-temp$ql)
cu<-temp$qu+gval*(temp$qu-temp$ql)
}
flag<-NA
outid<-NA
vec<-c(1:n)
for(i in 1:n){
flag[i]<-(x[i]< cl || x[i]> cu)
}
if(sum(flag)==0)outid<-NA
if(sum(flag)>0)outid<-vec[flag]
keep<-vec[!flag]
outval<-x[flag]
n.out=sum(length(outid))
list(out.val=outval,out.id=outid,keep=keep,n.out=n.out,cl=cl,cu=cu)
}

mscov<-function(m,STAND=FALSE){
#
# m is an n by p matrix
#
# Compute a skipped covariance matrix
#
# Eliminate outliers using a projection method
# That is, compute Donoho-Gasko median, for each point
# consider the line between it and the median,
# project all points onto this line, and
# check for outliers using a boxplot rule.
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# Eliminate any outliers and compute covariances
#  using remaining data.
#
m<-elimna(m)
temp<-outpro(m,plotit=FALSE,STAND=STAND)$keep
mcor<-var(m[temp,])
mcor
}

runm3d<-function(x,y,theta=50,phi=25,fr=.8,tr=.2,plotit=TRUE,pyhat=FALSE,nmin=0,
expand=.5,scale=FALSE,zscale=FALSE,xout=FALSE,outfun=out,eout=FALSE,xlab="X",ylab="Y",zlab="",
pr=TRUE,SEED=TRUE,ticktype="simple"){
#
# running mean using interval method
#
# fr controls amount of smoothing
# tr is the amount of trimming
# x is an n by p matrix of predictors.
#
#  Rows of data with missing values are automatically removed.
#
# When plotting, theta and phi can be used to change
# the angle at which the plot is viewed.
#
#  theta is the azimuthal direction and phi the colatitude
#   expand controls relative length of z-axis
#
library(MASS)
library(akima)
if(plotit){
if(pr){
print("Note: when there is independence, scale=F is probably best")
print("When there is dependence, scale=T is probably best")
}}
if(!is.matrix(x))stop("x should be a matrix")
if(nrow(x) != length(y))stop("number of rows of x should equal length of y")
temp<-cbind(x,y)
p<-ncol(x)
p1<-p+1
temp<-elimna(temp) # Eliminate any rows with missing values.
if(xout){
keepit<-rep(T,nrow(x))
flag<-outfun(x,plotit=FALSE)$out.id
keepit[flag]<-F
x<-x[keepit,]
y<-y[keepit]
}
if(zscale){
for(j in 1:p1){
temp[,j]<-(temp[,j]-median(temp[,j]))/mad(temp[,j])
}}
x<-temp[,1:p]
y<-temp[,p1]
pyhat<-as.logical(pyhat)
plotit<-as.logical(plotit)
if(SEED)set.seed(12)
m<-cov.mve(x)
iout<-c(1:nrow(x))
rmd<-1 # Initialize rmd
nval<-1
for(i in 1:nrow(x))rmd[i]<-mean(y[near3d(x,x[i,],fr,m)],tr)
for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)])
if(plotit){
if(ncol(x)!=2)stop("When plotting, x must be an n by 2 matrix")
fitr<-rmd[nval>nmin]
y<-y[nval>nmin]
x<-x[nval>nmin,]
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand,
scale=scale,ticktype=ticktype)
}
last<-"Done"
if(pyhat)last<-rmd
last
}

rdplot<-function(x,fr=NA,plotit=TRUE,theta=50,phi=25,expand=.5,pyhat=FALSE,pts=NA,
xlab="X",ylab="",ticktype="simple"){
#
# Expected frequency curve
#
# fr controls amount of smoothing
#  theta is the azimuthal direction and phi the colatitude
#
plotit<-as.logical(plotit)
x<-elimna(x)
x<-as.matrix(x)
rmd<-NA
if(ncol(x)==1){
if(is.na(fr))fr<-.8
if(is.na(pts[1]))pts<-x
for(i in 1:length(pts)){
rmd[i]<-sum(near(x,pts[i],fr))
}
if(mad(x)!=0)rmd<-rmd/(2*fr*mad(x))
rmd<-rmd/length(x)
if(plotit){
plot(pts,rmd,type="n",ylab=ylab,xlab=xlab)
sx<-sort(pts)
xorder<-order(pts)
sysm<-rmd[xorder]
lines(sx,sysm)
}}
if(ncol(x)>1){
library(MASS)
if(is.na(fr))fr<-.6
m<-cov.mve(x)
for(i in 1:nrow(x)){
rmd[i]<-sum(near3d(x,x[i,],fr,m))
}
rmd<-rmd/nrow(x)
if(plotit && ncol(x)==2){
library(akima)
fitr<-rmd
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1]
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fit,theta=theta,phi=phi,expand=expand,xlab="Var 1",ylab="Var 2",zlab="",
ticktype=ticktype)
}
}
if(pyhat)last<-rmd
if(!pyhat)last<-"Done"
last
}

rimul<-function(J,K,x,alpha=.05,p=J*K,grp=c(1:p),plotit=TRUE,op=4){
#
#  Rank-based multiple comparisons for all interactions
#  in J by K design. The method is based on an
#  extension of Cliff's heteroscedastic technique for
#  handling tied values and the Patel-Hoel definition of no interaction.
#
#  The familywise type I error probability is controlled by using
#  a critical value from the Studentized maximum modulus distribution.
#
#  It is assumed all groups are independent.
#
#  Missing values are automatically removed.
#
#  The default value for alpha is .05. Any other value results in using
#  alpha=.01.
#
#  Argument grp can be used to rearrange the order of the data.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
CCJ<-(J^2-J)/2
CCK<-(K^2-K)/2
CC<-CCJ*CCK
test<-matrix(NA,CC,7)
test.p<-matrix(NA,CC,7)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
}
mat<-matrix(grp,ncol=K,byrow=T)
dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","delta","ci.lower","ci.upper"))
jcom<-0
crit<-smmcrit(200,CC)
if(alpha!=.05)crit<-smmcrit01(200,CC)
alpha<-1-pnorm(crit)
for (j in 1:J){
for (jj in 1:J){
if (j < jj){
for (k in 1:K){
for (kk in 1:K){
if (k < kk){
jcom<-jcom+1
test[jcom,1]<-j
test[jcom,2]<-jj
test[jcom,3]<-k
test[jcom,4]<-kk
temp1<-cid(x[[mat[j,k]]],x[[mat[j,kk]]],plotit=FALSE)
temp2<-cid(x[[mat[jj,k]]],x[[mat[jj,kk]]],plotit=FALSE)
delta<-temp2$d-temp1$d
sqse<-temp1$sqse.d+temp2$sqse.d
test[jcom,5]<-delta/2
test[jcom,6]<-delta/2-crit*sqrt(sqse/4)
test[jcom,7]<-delta/2+crit*sqrt(sqse/4)
}}}}}}
if(J==2 && K==2){
if(plotit){
m1<-outer(x[[1]],x[[2]],FUN="-")
m2<-outer(x[[3]],x[[4]],FUN="-")
m1<-as.vector(m1)
m2<-as.vector(m2)
g2plot(m1,m2,op=op)
}}
list(test=test)
}

ifmest<-function(x,bend=1.28,op=2){
#
#   Estimate the influence function of an M-estimator, using
#   Huber's Psi, evaluated at x.
#
#   Data are in the vector x, bend is the percentage bend
#
#  op=2, use adaptive kernel estimator
#  otherwise use Rosenblatt's shifted histogram
#
tt<-mest(x,bend)  # Store M-estimate in tt
s<-mad(x)*qnorm(.75)
if(op==2){
val<-akerd(x,pts=tt,plotit=FALSE,pyhat=T)
val1<-akerd(x,pts=tt-s,plotit=FALSE,pyhat=T)
val2<-akerd(x,pts=tt+s,plotit=FALSE,pyhat=T)
}
if(op!=2){
val<-kerden(x,0,tt)
val1<-kerden(x,0,tt-s)
val2<-kerden(x,0,tt+s)
}
ifmad<-sign(abs(x-tt)-s)-(val2-val1)*sign(x-tt)/val
ifmad<-ifmad/(2*.6745*(val2+val1))
y<-(x-tt)/mad(x)
n<-length(x)
b<-sum(y[abs(y)<=bend])/n
a<-hpsi(y)*mad(x)-ifmad*b
ifmest<-a/(length(y[abs(y)<=bend])/n)
ifmest
}

qmjci<-function(x,q=.5,alpha=.05,op=1,pr=TRUE){
#
#   Compute a 1-alpha confidence for qth quantile using the
#   Maritz-Jarrett estimate of the standard error.
#
#   The default quantile is .5.
#   The default value for alpha is .05.
#
x=elimna(x)
if(pr){
if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb")
}
if(q <= 0 || q>=1)stop("q must be between 0 and 1")
y<-sort(x)
m<-floor(q*length(x)+.5)
crit<-qnorm(1-alpha/2)
qmjci<-vector(mode="numeric",2)
se<-NA
if(op==1)se<-mjse(x)
if(op==2){
if(q!=.5)stop("op=2 works only with q=.5")
se<-msmedse(x)
}
if(op==3)se<-qse(x,q)
if(is.na(se))stop("Something is wrong, op should be 1, 2 or 3")
qmjci[1]<-y[m]-crit*se
qmjci[2]<-y[m]+crit*se
qmjci
}


bootdpci<-function(x,y,est=onestep,nboot=NA,alpha=.05,plotit=TRUE,dif=TRUE,BA=FALSE,...){
#
#   Use percentile bootstrap method,
#   compute a .95 confidence interval for the difference between
#   a measure of location or scale
#   when comparing two dependent groups.
#   By default, a one-step M-estimator (with Huber's psi) is used.
#   If, for example, it is desired to use a fully iterated
#   M-estimator, use fun=mest when calling this function.
#
output<-rmmcppb(x,y,est=est,nboot=nboot,alpha=alpha,
plotit=plotit,dif=dif,BA=BA,...)$output
list(output=output)
}


relfun<-function(xv,yv,C=36,epsilon=.0001,plotit=TRUE){
#   Compute the measures of location, scale and correlation used in the
#   bivariate boxplot of Goldberg and Iglewicz,
#   Technometrics, 1992, 34, 307-320.
#
#   The code in relplot plots the boxplot.
#
#   This code assumes the data are in xv and yv
#
#   This code uses the function biloc, stored in the file biloc.b7 and
#   bivar stored in bivar.b7
#
plotit<-as.logical(plotit)
#
# Do pairwise elimination of missing values
#
temp<-matrix(c(xv,yv),ncol=2)
temp<-elimna(temp)
xv<-temp[,1]
yv<-temp[,2]
tx<-biloc(xv)
ty<-biloc(yv)
sx<-sqrt(bivar(xv))
sy<-sqrt(bivar(yv))
z1<-(xv-tx)/sx+(yv-ty)/sy
z2<-(xv-tx)/sx-(yv-ty)/sy
ee<-((z1-biloc(z1))/sqrt(bivar(z1)))^2+
((z2-biloc(z2))/sqrt(bivar(z2)))^2
w<-(1-ee/C)^2
if(length(w[w==0])>=length(xv)/2)warning("More than half of the w values equal zero")
sumw<-sum(w[ee<C])
tempx<-w*xv
txb<-sum(tempx[ee<C])/sumw
tempy<-w*yv
tyb<-sum(tempy[ee<C])/sumw
tempxy<-w*(xv-txb)*(yv-tyb)
tempx<-w*(xv-txb)^2
tempy<-w*(yv-tyb)^2
sxb<-sum((tempx[ee<C]))/sumw
syb<-sum((tempy[ee<C]))/sumw
rb<-sum(tempxy[ee<C])/(sqrt(sxb*syb)*sumw)
z1<-((xv-txb)/sqrt(sxb)+(yv-tyb)/sqrt(syb))/sqrt(2*(1+rb))
z2<-((xv-txb)/sqrt(sxb)-(yv-tyb)/sqrt(syb))/sqrt(2*(1-rb))
wo<-w
ee<-z1^2+z2^2
w<-(1-ee/C)^2
sumw<-sum(w[ee<C])
tempx<-w*xv
txb<-sum(tempx[ee<C])/sumw
tempy<-w*yv
tyb<-sum(tempy[ee<C])/sumw
tempxy<-w*(xv-txb)*(yv-tyb)
tempx<-w*(xv-txb)^2
tempy<-w*(yv-tyb)^2
sxb<-sum((tempx[ee<C]))/sumw
syb<-sum((tempy[ee<C]))/sumw
rb<-sum(tempxy[ee<C])/(sqrt(sxb*syb)*sumw)
z1<-((xv-txb)/sqrt(sxb)+(yv-tyb)/sqrt(syb))/sqrt(2*(1+rb))
z2<-((xv-txb)/sqrt(sxb)-(yv-tyb)/sqrt(syb))/sqrt(2*(1-rb))
iter<-0
while(iter<=10){
iter<=iter+1
ee<-z1^2+z2^2
w<-(1-ee/C)^2
sumw<-sum(w[ee<C])
tempx<-w*xv
txb<-sum(tempx[ee<C])/sumw
tempy<-w*yv
tyb<-sum(tempy[ee<C])/sumw
tempxy<-w*(xv-txb)*(yv-tyb)
tempx<-w*(xv-txb)^2
tempy<-w*(yv-tyb)^2
sxb<-sum((tempx[ee<C]))/sumw
syb<-sum((tempy[ee<C]))/sumw
rb<-sum(tempxy[ee<C])/(sqrt(sxb*syb)*sumw)
z1<-((xv-txb)/sqrt(sxb)+(yv-tyb)/sqrt(syb))/sqrt(2*(1+rb))
z2<-((xv-txb)/sqrt(sxb)-(yv-tyb)/sqrt(syb))/sqrt(2*(1-rb))
wo<-w
ee<-z1^2+z2^2
w<-(1-ee/C)^2
dif<-w-wo
crit<-sum(dif^2)/(mean(w))^2
if(crit <=epsilon)break
}
if(plotit){
em<-median(sqrt(ee))
r1<-em*sqrt((1+rb)/2)
r2<-em*sqrt((1-rb)/2)
temp<-c(0:179)
thet<-2*3.141593*temp/180
theta1<-r1*cos(thet)
theta2<-r2*sin(thet)
xplot1<-txb+(theta1+theta2)*sqrt(sxb)
yplot1<-tyb+(theta1-theta2)*sqrt(syb)
emax<-max(sqrt(ee[ee<7*em^2]))
r1<-emax*sqrt((1+rb)/2)
r2<-emax*sqrt((1-rb)/2)
theta1<-r1*cos(thet)
theta2<-r2*sin(thet)
xplot<-txb+(theta1+theta2)*sqrt(sxb)
yplot<-tyb+(theta1-theta2)*sqrt(syb)
totx<-c(xv,xplot,xplot1)
toty<-c(yv,yplot,yplot1)
plot(totx,toty,type="n",xlab="x",ylab="y")
points(xv,yv)
points(xplot,yplot,pch=".")
points(xplot1,yplot1,pch=".")
}
list(mest=c(txb,tyb),mvar=c(sxb,syb),mrho=rb)
}

relplot<-relfun

lsfitci<-function(x,y,nboot=599,alpha=.05,SEED=TRUE,xout=FALSE,outfun=out){
#
#   Compute a confidence interval for the slope parameters of
#   a linear regression equation when using the least squares estimator.
#
#   For p=1 predictor,
#   this function uses an adjusted percentile bootstrap method that
#   gives good results when the error term is heteroscedastic.
#   For p>1, a standard percentile bootstrap method is used
#   with FWE (the probability of at least one type I error)
#   controlled via the Bonferroni inequality.
#
#   The predictor values are assumed to be in the n by p matrix x.
#   The default number of bootstrap samples is nboot=599
#
#   SEED=T causes the seed of the random number generator to be set to 2,
#   otherwise the seed is not set.
#
#   Warning: probability coverage has been studied only when alpha=.05
#
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
temp<-elimna(cbind(x,y)) # Remove any missing values.
x<-temp[,1:p]
y<-temp[,p+1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,pp]
}
x<-as.matrix(x)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples; please wait")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regboot,x,y,lsfit) # A p+1 by n matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
if(p==1){
if(alpha != .05){print("Resetting alpha to .05")
print("With p=1, unknown how to adjust confidence interval")
print("when alpha is not equal to .05.")
}
ilow<-15
ihi<-584
if(length(y) < 250){
ilow<-13
ihi<-586
}
if(length(y) < 180){
ilow<-10
ihi<-589
}
if(length(y) < 80){
ilow<-7
ihi<-592
}
if(length(y) < 40){
ilow<-6
ihi<-593
}
ilow<-round((ilow/599)*nboot)
ihi<-round((ihi/599)*nboot)
}
if(p>1){
ilow<-round(alpha*nboot/2)+1
ihi<-nboot-ilow
}
lsfitci<-matrix(0,ncol(x),2)
for(i in 1:ncol(x)){
ip<-i+1
bsort<-sort(bvec[ip,])
lsfitci[i,1]<-bsort[ilow+1]
lsfitci[i,2]<-bsort[ihi]
}
bsort<-sort(bvec[1,])
interceptci<-c(bsort[15],bsort[584])
crit.level<-NA
pmat<-NA
if(p>1){
crit.level<-alpha/p
pmat<-matrix(NA,nrow=p,ncol=2)
dimnames(pmat) <- list(NULL, c("Slope","p-value"))
for(pv in 1:p){
pmat[pv,1]<-pv
pp<-pv+1
#pmat[pv,2]<-sum(bvec[pp,]<0)/nboot
pmat[pv,2]<-(sum(bvec[pp,]<0)+.5*sum(bvec[pp,]==0))/nboot
temp3<-1-pmat[pv,2]
pmat[pv,2]<-2*min(pmat[pv,2],temp3)
}}
list(intercept.ci=interceptci,slope.ci=lsfitci,crit.level=crit.level,
p.values=pmat)
}

wmve<-function(m,SEED=TRUE){
#
# Compute skipped measure of location and scatter
# using MVE method
#
if(is.matrix(m))n<-nrow(m)
if(is.vector(m))n<-length(m)
flag<-rep(T,n)
vec<-out(m,plotit=FALSE,SEED=SEED)$out.id
flag[vec]<-F
if(is.vector(m)){
center<-mean(m[flag])
scatter<-var(m[flag])
}
if(is.matrix(m)){
center<-apply(m[flag,],2,mean)
scatter<-var(m[flag,])
}
list(center=center,cov=scatter)
}

wmw<-function(x,y){
#
# Do Mann-Whitney test
# Return the usual p-value followed by adjusted
# p-value using Hodges, Ramsey and Wechsler (1990) method
# (See Wilcox, 2003, p. 559.)
#
m<-length(x)
n<-length(y)
com<-rank(c(x,y))
xp1<-length(x)+1
x<-com[1:length(x)]
y<-com[xp1:length(com)]
u<-sum(y)-n*(n+1)/2
sigsq<-m*n*(n+m+1)/12
yv<-(u+.5-m*n/2)/sqrt(sigsq)
kv<-20*m*n*(m+n+1)/(m^2+n^2+n*m+m+n)
S<-yv^2
T1<-S-3
T2<-(155*S^2-416*S-195)/42
cv<-1+T1/kv+T2/kv^2
sighrw<-2*(1-pnorm(abs(cv*yv)))
z<-(u-(.5*m*n))/sqrt(sigsq)
sig<-2*(1-pnorm(abs(z)))
list(p.value=sig,sigad=sighrw)
}

lsfitNci<-function(x,y,alpha=.05){
#
# Compute confidence for least squares
# regression using heteroscedastic method
# recommended by Long and Ervin (2000).
#
x<-as.matrix(x)
if(nrow(x) != length(y))stop("Length of y does not match number of x values")
m<-cbind(x,y)
m<-elimna(m)
y<-m[,ncol(x)+1]
temp<-lsfit(x,y)
x<-cbind(rep(1,nrow(x)),m[,1:ncol(x)])
xtx<-solve(t(x)%*%x)
h<-diag(x%*%xtx%*%t(x))
hc3<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^2)%*%x%*%xtx
df<-nrow(x)-ncol(x)
crit<-qt(1-alpha/2,df)
al<-ncol(x)
ci<-matrix(NA,nrow=al,ncol=3)
for(j in 1:al){
ci[j,1]<-j
ci[j,2]<-temp$coef[j]-crit*sqrt(hc3[j,j])
ci[j,3]<-temp$coef[j]+crit*sqrt(hc3[j,j])
}
print("Confidence intervals for intercept followed by slopes:")
list(ci=ci,stand.errors=sqrt(diag(hc3)))
}



pow2an<-function(x,y,ci=FALSE,plotit=TRUE,nboot=800){
#
# Do a power analysis when comparing the 20% trimmed
# means of two independent groups with the percentile
# bootstrap method.
#
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
lp<-NA
se<-yuen(x,y)$se
gval<-NA
dv<-seq(0,3.5*se,length=15)
for(i in 1:length(dv)){
gval[i]<-powest(x,y,dv[i],se)
}
if(!ci){
if(plotit){
plot(dv,gval,type="n",xlab="delta",ylab="power")
lines(dv,gval)
}}
if(ci){
print("Taking bootstrap samples. Please wait.")
datax <- matrix(sample(x, size = length(x) * nboot, replace = TRUE),
                nrow = nboot)
datay <- matrix(sample(y, size = length(y) * nboot, replace = TRUE),
                nrow = nboot)
pboot<-matrix(NA,ncol=15,nrow=nboot)
for(i in 1:nboot){
se<-yuen(datax[i,],datay[i,])$se
for(j in 1:length(dv)){
pboot[i,j]<-powest(x,y,dv[j],se)
}}
ll<-floor(.05*nboot+.5)
for(i in 1:15){
temp<-sort(pboot[,i])
lp[i]<-temp[ll]
}
plot(c(dv,dv),c(gval,lp),type="n",xlab="delta",ylab="power")
lines(dv,gval)
lines(dv,lp,lty=2)
}
list(delta=dv,power=gval,lowp=lp)
}
powest<-function(x=NA,y=NA,delta=0,se=NA,wv1=NA,wv2=NA,n1=NA,n2=NA){
#
# wv1 = Winsorized variance for group 1
# wv2 = Winsorized variance for group 2
#
# Only 20% trimming is allowed.
#
tr<-.2
if(is.na(se)){
if(is.na(wv1)){
h1 <- length(x) - 2 * floor(tr * length(x))
h2 <- length(y) - 2 * floor(tr * length(y))
q1 <- ((length(x) - 1) * winvar(x, tr))/(h1 * (h1 - 1))
q2 <- ((length(y) - 1) * winvar(y, tr))/(h2 * (h2 - 1))
}
if(!is.na(wv1)){
if(is.na(n1))stop("Need to specify sample size for group 1")
if(is.na(n2))stop("Need to specify sample size for group 2")
h1<-n1-2*floor(tr*n1)
h2<-n2-2*floor(tr*n2)
q1<-(n1-1)*wv1/(h1*(h1-1))
q2<-(n2-1)*wv2/(h2*(h2-1))
}
se<-sqrt(q1+q2)
}
ygam<-sqrt(2*.01155)*c(0:35)/8
pow<-c(500.0,540.0,607.0, 706.0, 804.0,981.0,1176.0,1402.0,1681.0, 2008.0,
   2353.0, 2769.0, 3191.0, 3646.0, 4124.0, 4617.0, 5101.0, 5630.0,
   6117.0, 6602.0, 7058.0, 7459.0, 7812.0, 8150.0, 8479.0, 8743.0,
   8984.0, 9168.0, 9332.0, 9490.0, 9607.0, 9700.0, 9782.0, 9839.0,
   9868.0)/10000
flag<-(delta==0 && se==0)
if(flag)powest<-.05
else{
chk<-floor(8*delta/se)+1
chk1<-chk+1
gval<-delta/se
d1<-(gval-(chk-1)/8)*8
if(chk > length(pow))powest<-1
if(chk == length(pow))pow[chk1]<-1
if(chk <= length(pow))
powest<-pow[chk]+d1*(pow[chk1]-pow[chk])
}
powest
}

twopcor<-function(x1,y1,x2,y2,SEED=TRUE){
#
#   Compute a .95 confidence interval for
#   the difference between two Pearson
#   correlations corresponding to two independent
#   goups.
#
#   This function uses an adjusted percentile bootstrap method that
#   gives good results when the error term is heteroscedastic.
#
#   WARNING: If the number of boostrap samples is altered, it is
#   unknown how to adjust the confidence interval when n1+n2 < 250.
#
nboot<-599  #Number of bootstrap samples
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
X<-elimna(cbind(x1,y1))
x1<-X[,1]
y1<-X[,2]
X<-elimna(cbind(x2,y2))
x2<-X[,1]
y2<-X[,2]
print("Taking bootstrap samples; please wait")
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data1,1,pcorbsub,x1,y1) # A 1 by nboot matrix.
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
bvec2<-apply(data2,1,pcorbsub,x2,y2) # A 1 by nboot matrix.
bvec<-bvec1-bvec2
ilow<-15
ihi<-584
if(length(y1)+length(y2) < 250){
ilow<-14
ihi<-585
}
if(length(y1)+length(y2) < 180){
ilow<-11
ihi<-588
}
if(length(y1)+length(y2) < 80){
ilow<-8
ihi<-592
}
if(length(y1)+length(y2) < 40){
ilow<-7
ihi<-593
}
bsort<-sort(bvec)
r1<-cor(x1,y1)
r2<-cor(x2,y2)
ci<-c(bsort[ilow],bsort[ihi])
list(r1=r1,r2=r2,ci=ci)
}

indtall<-function(x,y=NULL,tr=0,nboot=500,SEED=TRUE){
#
# Test the hypothesis of independence for
#  1. all pairs of variables in matrix x, if y=NA, or
#  2. between each variable stored in the matrix x and y.
#  This is done by repeated to calls to indt
#
x<-as.matrix(x)
# First, eliminate any rows of data with missing values.
if(!is.null(y[1])){
temp <- cbind(x, y)
        temp <- elimna(temp)
        pval<-ncol(temp)-1
        x <- temp[,1:pval]
        y <- temp[, pval+1]
}
x<-as.matrix(x)
if(is.null(y[1])){
ntest<-(ncol(x)^2-ncol(x))/2
if(ntest==0)stop("Something is wrong. Does x have only one column?")
output<-matrix(NA,nrow=ntest,ncol=4)
dimnames(output)<-list(NULL,c("VAR","VAR","Test Stat.","p-value"))
x<-elimna(x)
ic<-0
for (j in 1:ncol(x)){
for (jj in 1:ncol(x)){
if(j<jj){
temp<-indt(x[,j],x[,jj],tr=tr,nboot=nboot,SEED=SEED)
ic<-ic+1
output[ic,1]<-j
output[ic,2]<-jj
output[ic,3]<-temp$dstat
output[ic,4]<-temp$p.value.d
}}}}
if(!is.null(y[1])){
ntest<-ncol(x)
output<-matrix(NA,nrow=ntest,ncol=3)
dimnames(output)<-list(NULL,c("VAR","Test Stat.","p-value"))
ic<-0
for (j in 1:ncol(x)){
temp<-indt(x[,j],y,tr=tr,nboot=nboot,SEED=SEED)
ic<-ic+1
output[ic,1]<-j
output[ic,2]<-temp$dstat
output[ic,3]<-temp$p.value.d
}}
list(output=output)
}





qhat<-function(x,y,nboot=50,op=2,SEED=TRUE,pr.track=F){
#
#   Estimate Q, a nonparametric measure of effect size, using
#   the .632 method of estimating prediction error.
#   (See Efron and Tibshirani, 1993, pp. 252--254)
#
#   The default number of bootstrap samples is nboot=100
#
#   Missing values are automatically removed
#
# op=1, use Rosenblatt's shifted histogram version of kernel estimate
# op=2, use adaptive kernel estimate with initial estimate based
#       on expected frequency curve.
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(length(x),size=length(x)*nboot,replace=TRUE),nrow=nboot)
#    datax is an nboot by n matrix containing subscripts for bootstrap sample
#    associated with first group.
datay<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
#    datay is an nboot by m matrix containing subscripts for bootstrap sample
#    associated with second group.
bidx<-apply(datax,1,idb,n=length(x))
#  bidx is a n by nboot matrix. If the jth bootstrap sample from
#  1, ..., n contains the value i, bid[i,j]=0; otherwise bid[i,j]=1
bidy<-apply(datay,1,idb,n=length(y))
temp3<-matrix(0,ncol=length(x),nrow=nboot)
temp5<-matrix(0,ncol=length(y),nrow=nboot)
for(i in 1:nboot){
temp3[i,]<-disker(x[datax[i,]],y[datay[i,]],x,op=op)$zhat
# temp3 contains vector of 0s and 1s, 1 if x[i] is
#  is classified as coming from group 1.
temp5[i,]<-disker(y[datay[i,]],x[datax[i,]],y,op=op)$zhat
if(pr.track)print(paste("Iteration ", i, "of ", nboot," is complete"))
}
temp4<-temp3*t(bidx)
temp4<-apply(temp4,2,sum)/apply(bidx,1,sum)
temp6<-temp5*t(bidy)
temp6<-apply(temp6,2,sum)/apply(bidy,1,sum)
ep0x<-mean(temp4,na.rm=TRUE)  # epsilon hat_x
aperrorx<-disker(x,y,op=op)$phat  # apparent error
regprex<-.368*aperrorx+.632*ep0x
ep0y<-mean(temp6,na.rm=TRUE)
aperrory<-disker(y,x,op=op)$phat  # apparent error
regprey<-.368*aperrory+.632*ep0y
aperror<-(length(x)*aperrorx+length(y)*aperrory)/(length(x)+length(y))
regpre<-(length(x)*regprex+length(y)*regprey)/(length(x)+length(y))
list(qhat.632=regpre)
}
qhati=qhat

disker<-function(x,y,z=NA,plotit=FALSE,op=1){
#
#  Estimate apparent effect size
#  using probability of correct classification based on values in
#  first group.
#
#  A "CORRECT" classification is the event of deciding
#  that an observation
#  from the first group did indeed come from the first group based
#  on a kernel density estimate of the distributions.
#  The function returns the
#  proportion of correctly classified observations (phat).
#
#  The function also returns a vector of 0s and 1s (in zhat)
#  indicating  whether values in z would be
#  classified as coming from the first group.
#
# op=1, use Rosenblatt's shifted histogram version of kernel estimate
# op=2, use adaptive kernel estimate with initial estimate based
#       on expected frequency curve.
#
xsort<-sort(x)
ysort<-sort(y)
xhat<-0
yhat<-0
yyhat<-0
if(op==1){
for(i in 1:length(xsort))xhat[i]<-kerden(x,0,xsort[i])
for(i in 1:length(xsort))yhat[i]<-kerden(y,0,xsort[i])
}
if(op==2){
xhat<-akerd(x,pts=xsort,pyhat=TRUE,plotit=FALSE)
yhat<-akerd(y,pts=xsort,pyhat=TRUE,plotit=FALSE)
}
yhat[is.na(yhat)]<-0
if(plotit){
if(op==1){
for(i in 1:length(ysort))yyhat[i]<-kerden(y,0,ysort[i])
}
if(op==2)yyhat<-akerd(y,pts=ysort,plotit=FALSE,pyhat=T)
plot(c(xsort,ysort),c(xhat,yyhat),type="n",xlab="",ylab="")
lines(xsort,xhat)
lines(ysort,yyhat)
}
#
# Compute apparent error
#
phat<-sum(xhat>yhat)/length(x)
zhat<-NA
if(!is.na(z[1])){
#
#  Make decisions for the data in z,
#  set zhat=1 if decide it came from
#  group 1.
#
zxhat<-0
zyhat<-0
zhat<-0
if(op==2){
zxhat<-akerd(x,pts=z,pyhat=TRUE,plotit=FALSE)
zyhat<-akerd(y,pts=z,pyhat=TRUE,plotit=FALSE)
}
for(i in 1:length(z)){
if(op==1){
zxhat[i]<-kerden(x,0,z[i])
zyhat[i]<-kerden(y,0,z[i])
}
zhat[i]<-1
if(is.na(zxhat[i]) || is.na(zyhat[i])){
# Missing values,
# data can't be used to make a decision,
# so make a random decision about whether a value
# came from first group.
arb<-runif(1)
zhat[i]<-1
if(arb < .5)zhat[i]<-0
}
else
if(zxhat[i]<zyhat[i])zhat[i]<-0
}
}
list(phat=phat,zhat=zhat)
#phat is the apparent probability  of a correct classification
}


lplot<-function(x,y,span=.75,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE,
expand=.5,low.span=2/3,varfun=pbvar,cor.op=FALSE,cor.fun=pbcor,pr=TRUE,
scale=FALSE,xlab="X",ylab="Y",zlab="",theta=50,phi=25,family="gaussian",
duplicate="error",pc="*",ticktype="simple",...){
#
# Plot regression surface using LOESS
#
# low.span is the span when lowess is used and there is one predictor
# span is the span when loess is used with two or more predictors
# pyhat=T will return Y hat values
# eout=T will eliminate outliers
# xout=T  will eliminate points where X is an outliers
# family="gaussian"; see the description of the built-in function loess
#
# duplicate="error"
# In some situations where duplicate values occur, when plotting with
# two predictors, it is necessary to set duplicate="strip"
#
#library(modreg)
library(stats)
x<-as.matrix(x)
m<-elimna(cbind(x,y))
n.orig=nrow(m)
n.keep=n.orig
if(!is.matrix(x))stop("x is not a matrix")
d<-ncol(x)
if(d>=2){
library(akima)
if(ncol(x)==2 && !scale){
if(pr){
print("scale=F is specified.")
print("If there is dependence, might use scale=T")
print("To get a p-value, based on the measure of the")
print("strength of association based on this function,")
print("use the function lplotPV")
}}
x<-m[,1:d]
y<-m[,d+1]
if(eout && xout)stop("Can't have both eout and xout = F")
if(eout){
flag<-outfun(m,plotit=FALSE,...)$keep
m<-m[flag,]
n.keep=nrow(m)
}
if(xout){
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
n.keep=nrow(m)
}
x<-m[,1:d]
y<-m[,d+1]
if(d==2)fitr<-fitted(loess(y~x[,1]*x[,2],span=span,family=family))
if(d==3)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3],span=span,family=family))
if(d==4)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3]*x[,4],span=span,family=family))
if(d>4)stop("Can have at most four predictors")
last<-fitr
if(d==2 && plotit){
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fitr<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand,
scale=scale,ticktype=ticktype)
}}
if(d==1){
m<-elimna(cbind(x,y))
x<-m[,1:d]
y<-m[,d+1]
if(eout && xout)stop("Can't have both eout and xout = T")
if(eout){
flag<-outfun(m,plotit=FALSE,...)$keep
m<-m[flag,]
n.keep=nrow(m)
}
if(xout){
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
n.keep=nrow(m)
}
x<-m[,1:d]
y<-m[,d+1]
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab,pch=pc)
lines(lowess(x,y,f=low.span))
}
tempxy<-lowess(x,y,f=low.span)
yyy<-tempxy$y
xxx<-tempxy$x
last<-yyy
chkit<-sum(duplicated(x))
if(chkit>0){
last<-rep(1,length(y))
for(j in 1:length(yyy)){
for(i in 1:length(y)){
if(x[i]==xxx[j])last[i]<-yyy[j]
}}
}
}
E.power<-1
if(!cor.op)E.power<-varfun(last[!is.na(last)])/varfun(y)
if(cor.op || E.power>=1){
if(d==1){
xord<-order(x)
E.power<-cor.fun(last,y[xord])$cor^2
}
if(d>1)E.power<-cor.fun(last,y)$cor^2
}
E.power=as.numeric(E.power)
if(!pyhat)last <- NULL
list(Strength.Assoc=sqrt(E.power),Explanatory.power=E.power,yhat.values=last,n=n.orig,
n.keep=n.keep)
}
qci<-function(x,q=.5,alpha=.05,op=3){
#
# Compute a confidence interval for qth quantile
#  using an  estimate of standard error based on
#  adaptive kernel density estimator.
# The qth quantile is estimated with a single order statistic.
#
# For argument op, see the function qse.
#
if(sum(duplicated(x)>0))stop("Duplicate values detected; use hdpb")
n<-length(x)
xsort<-sort(x)
iq <- floor(q * n + 0.5)
qest<-xsort[iq]
se<-qse(x,q,op=op)
crit<-qnorm(1-alpha/2)
ci.low<-qest-crit*se
ci.up<-qest+crit*se
list(ci.low=ci.low,ci.up=ci.up,q.est=qest)
}
qint<-function(x,q=.5,alpha=.05,pr=TRUE){
#
# Compute a 1-alpha confidence interval for the qth quantile
# The function returns the exact probability coverage.
#
if(pr){
if(sum(duplicated(x)>0))print("Duplicate values detected; use hdpb")
}
n<-length(x)
ii<-floor(q*n+.5)
jj<-ii+1
if(ii<=0)stop("Cannot compute a confidence interval for this q")
if(jj>n)stop("Cannot compute a confidence interval for this q")
jjm<-jj-1
iim<-ii-1
cicov<-pbinom(jjm,n,q)-pbinom(iim,n,q)
while(cicov<1-alpha){
iim<-max(iim-1,0)
jjm<-min(jjm+1,n)
if(iim==0 && jjm==n)break
cicov<-pbinom(jjm,n,q)-pbinom(iim,n,q)
}
xsort<-sort(x)
low<-xsort[iim+1]
hi<-xsort[jjm]
if(cicov<1-alpha){
if(print)print("Warning: Desired probability coverage could not be achieved")
}
list(ci.low=low,ci.up=hi,ci.coverage=cicov)
}

anova1<-function(x){
#
# conventional one-way anova
#
if(is.matrix(x))x<-listm(x)
A<-0
B<-0
C<-0
N<-0
for(j in 1:length(x)){
N<-N+length(x[[j]])
A<-A+sum(x[[j]]^2)
B<-B+sum(x[[j]])
C<-C+(sum(x[[j]]))^2/length(x[[j]])
}
SST<-A-B^2/N
SSBG<-C-B^2/N
SSWG<-A-C
nu1<-length(x)-1
nu2<-N-length(x)
MSBG<-SSBG/nu1
MSWG<-SSWG/nu2
FVAL<-MSBG/MSWG
pvalue<-1-pf(FVAL,nu1,nu2)
list(F.test=FVAL,p.value=pvalue,df1=nu1,df2=nu2,MSBG=MSBG,MSWG=MSWG)
}
qest<-function(x,q=.5){
#
# Compute an estimate of qth quantile
#  using a single order statistic
#
x<-elimna(x)
if(q<=0 || q>=1)stop("q must be > 0 and < 1")
n<-length(x)
xsort<-sort(x)
iq <- floor(q * n + 0.5)
qest<-NA
if(iq>0 || iq<=n)qest<-xsort[iq]
qest
}
taureg<-function(m,y,corfun=tau){
#
#    Compute Kendall's tau between y and each of the
#    p variables stored  in the n by p matrix m.
#
#    Alternative measures of correlation can be used via the
#    argument corfun. The only requirement is that the function
#    corfun returns the correlation in corfun$cor and the p-value
#    in corfun$siglevel.
#
#    This function also returns the two-sided significance level
#    for all pairs of variables, plus a test of zero correlations
#    among all pairs. (See chapter 9 of Wilcox, 2005, for details.)
#
m<-as.matrix(m)
tauvec<-NA
siglevel<-NA
for (i in 1:ncol(m)){
pbc<-corfun(m[,i],y)
tauvec[i]<-pbc$cor
siglevel[i]<-pbc$siglevel
}
list(cor=tauvec,siglevel=siglevel)
}

correg.sub<-function(X,theta,corfun=tau){
np<-ncol(X)
p<-np-1
x<-X[,1:p]
y<-X[,np]
temp<-t(t(x)*theta)
yhat<-apply(temp,1,sum)
yhat<-yhat
res<-y-yhat
val<-sum(abs(taureg(x,res,corfun=corfun)$cor))
val
}
correg<-function(x,y,corfun=tau,loc.fun=median){
#
# A generalization of the Theil-Sen estimator
# Rather than use Kendall's tau, can use an alternative
# correlation via the argument corfun.
# loc.fun determines how the intercept is computed;
#
# The Nelder-Mead method is used rather than
# Gauss-Seidel.
#
#
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
N<-np-1
temp<-tsreg(x,y)$coef
START<-temp[2:np]
temp<-nelderv2(X,N,FN=correg.sub,START=START,corfun=corfun)
x <- as.matrix(x)
alpha <- loc.fun(y - x %*% temp)
coef <- c(alpha,temp)
res <- y - x %*% temp - alpha
list(coef = coef, residuals = res)
}
rmulnorm<-function(n,p,cmat,SEED=F){
#
# Generate data from a multivariate normal
# n= sample size
# p= number of variables
# cmat is the covariance (or correlation) matrix
#
# Method (e.g. Browne, M. W. (1968) A comparison of factor analytic
# techniques. Psychometrika, 33, 267-334.
#  Let U'U=R be the Cholesky decomposition of R. Generate independent data
#  from some dist yielding X. Then XU has population correlation matrix R
#
if(SEED)set.seed(2)
y<-matrix(rnorm(n*p),ncol=p)
rval<-matsqrt(cmat)
y<-t(rval%*%t(y))
y
}

 matsqrt <- function(x) {
       xev1<-NA
         xe <- eigen(x)
         xe1 <- xe$values
         if(all(xe1 >= 0)) {
             xev1 <- diag(sqrt(xe1))
         }
if(is.na(xev1[1]))stop("The matrix has negative eigenvalues")
         xval1 <- cbind(xe$vectors)
         xval1i <- solve(xval1)
         y <- xval1 %*% xev1 %*% xval1i
y
 }


ghmul<-function(n,g=0,h=0,p=2,cmat=diag(rep(1,p)),SEED=F){
#
# generate n observations from a p-variate dist
# based on the g and h dist.
#
# cmat is the correlation matrix
#
x<-rmulnorm(n,p,cmat,SEED=SEED)
for(j in 1:p){
if (g>0){
x[,j]<-(exp(g*x[,j])-1)*exp(h*x[,j]^2/2)/g
}
if(g==0)x[,j]<-x[,j]*exp(h*x[,j]^2/2)
}
x
}

yhall<-function(x,y,tr=.2,alpha=.05){
#
#  Perform Yuen's test for trimmed means on the data in x and y
#  in conjunction with Hall's transformation.
#  The default amount of trimming is 20%
#  Missing values (values stored as NA) are automatically removed.
#
#  A confidence interval for the trimmed mean of x minus the
#  the trimmed mean of y is computed and returned in yuen$ci.
#
x<-x[!is.na(x)]  # Remove any missing values in x
y<-y[!is.na(y)]  # Remove any missing values in y
winx<-winval(x,tr=tr)
winy<-winval(y,tr=tr)
m3x<-sum((winx-mean(winx))^3)/length(x)
m3y<-sum((winy-mean(winy))^3)/length(y)
h1<-length(x)-2*floor(tr*length(x))
h2<-length(y)-2*floor(tr*length(y))
mwx<-length(x)*m3x/h1
mwy<-length(y)*m3y/h2
q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1))
q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1))
sigtil<-q1+q2
mtil<-(mwx/h1^2)-(mwy/h2^2)
dif<-mean(x,tr)-mean(y,tr)
thall<-dif+mtil/(6*sigtil)+mtil*dif^2/(3*sigtil^2)+mtil^2*dif^3/(27*sigtil^4)
thall<-thall/sqrt(sigtil)
nhat<-mtil/sigtil^1.5
list(test.stat=thall,nu.tilda=nhat,sig.tilda=sqrt(sigtil))
}

linconm<-function(x,con=0,est=onestep,alpha=.05,nboot=500,pr=TRUE,...){
#
#   Compute a 1-alpha confidence interval for a set of d linear contrasts
#   involving M-estimators using a bootstrap method. (See Chapter 6.)
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode.  Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#
#   con is a J by d matrix containing the contrast coefficents of interest.
#   If unspecified, all pairwise comparisons are performed.
#   For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first two measures of location is
#   equal to the sum of the second two, and (2) the difference between
#   the first two is equal to the difference between the measure of location for
#   groups 5 and 6.
#
#   The default number of bootstrap samples is nboot=399
#
#   This function uses the function trimpartt written for this
#   book.
#
#
#
#
if(pr){
print("Note: confidence intervals are adjusted to control FWE")
print("But p-values are not adjusted to control FWE")
}
if(is.matrix(x))x<-listm(x)
con<-as.matrix(con)
if(!is.list(x))stop("Data must be stored in list mode.")
J<-length(x)
Jm<-J-1
d<-(J^2-J)/2
if(sum(con^2)==0){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
if(nrow(con)!=length(x))stop("The number of groups does not match the number of contrast coefficients.")
m1<-matrix(0,J,nboot)
m2<-1 # Initialize m2
mval<-1
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
if(pr)print(paste("Working on group ",j))
mval[j]<-est(x[[j]],...)
xcen<-x[[j]]-est(x[[j]],...)
data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
m1[j,]<-apply(data,1,est,...) # A J by nboot matrix.
m2[j]<-var(m1[j,])
}
boot<-matrix(0,ncol(con),nboot)
bot<-1
for (d in 1:ncol(con)){
top<-apply(m1,2,trimpartt,con[,d])
#            A vector of length nboot containing psi hat values
consq<-con[,d]^2
bot[d]<-trimpartt(m2,consq)
boot[d,]<-abs(top)/sqrt(bot[d])
}
testb<-apply(boot,2,max)
ic<-floor((1-alpha)*nboot)
testb<-sort(testb)
psihat<-matrix(0,ncol(con),6)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper","se","p.value"))
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-trimpartt(mval,con[,d])
psihat[d,3]<-psihat[d,2]-testb[ic]*sqrt(bot[d])
psihat[d,4]<-psihat[d,2]+testb[ic]*sqrt(bot[d])
psihat[d,5]<-sqrt(bot[d])
pval<-mean((boot[d,]<abs(psihat[d,2])/psihat[d,5]))
psihat[d,6]<-1-pval
}
list(psihat=psihat,crit=testb[ic],con=con)
}




mmean<-function(x,est=tmean,...){
center<-NA
if(is.list(x))center=lapply(x,est,...)
if(is.matrix(x))center<-apply(x,2,est,...)
center
}


akerd<-function(xx,hval=NA,aval=.5,op=1,fr=.8,pyhat=FALSE,pts=NA,plotit=TRUE,
xlab="",ylab="",zlab="",theta=50,phi=25,expand=.5,scale=TRUE,ticktype="simple"){
#
# Compute adaptive kernel density estimate
# for univariate data
# (See Silverman, 1986)
#
# op=1 Use expected frequency as initial estimate of the density
# op=2 Univariate case only
#      Use normal kernel to get initial estimate of the density
#  ticktype="detailed" will create ticks as done for a two-dimensional plot
#
#  Note, when pyhat=T, returns estimate of density at these point AFTER
#  putting the points in ascending order.
#
xx=elimna(xx)
fval<-"Done"
if(is.matrix(xx)){
if(ncol(xx)>1)fval<-akerdmul(xx,pts=pts,hval=hval,aval=aval,fr=fr,pr=pyhat,
plotit=plotit,theta=theta,phi=phi,expand=expand,scale=scale,ticktype=ticktype)
plotit<-F
}
if(is.matrix(xx) && ncol(xx)==1)xx<-xx[,1]
if(!is.matrix(xx)){
x<-sort(xx)
if(op==1){
m<-mad(x)
if(m==0){
temp<-idealf(x)
m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25))
}
if(m==0)m<-sqrt(winvar(x)/.4129)
if(m==0)stop("All measures of dispersion are equal to 0")
fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr)
if(m>0)fhat<-fhat/(2*fr*m)
}
if(op==2){
init<-density(xx)
fhat <- init$y
x<-init$x
}
n<-length(x)
if(is.na(hval)){
sig<-sqrt(var(x))
temp<-idealf(x)
iqr<-(temp$qu-temp$ql)/1.34
A<-min(c(sig,iqr))
if(A==0)A<-sqrt(winvar(x))/.64
hval<-1.06*A/length(x)^(.2)
# See Silverman, 1986, pp. 47-48
}
gm<-exp(mean(log(fhat[fhat>0])))
alam<-(fhat/gm)^(0-aval)
dhat<-NA
if(is.na(pts[1]))pts<-x
pts<-sort(pts)
for(j in 1:length(pts)){
temp<-(pts[j]-x)/(hval*alam)
epan<-ifelse(abs(temp)<sqrt(5),.75*(1-.2*temp^2)/sqrt(5),0)
dhat[j]<-mean(epan/(alam*hval))
}
if(plotit){
plot(pts,dhat,type="n",ylab=ylab,xlab=xlab)
lines(pts,dhat)
}
#fval<-"Done"
if(pyhat)fval<-dhat
}
fval
}


sedm<-function(x,y=NA,q=.5){
#
# Let D=X_m-Y_m be the difference between
# mth order statistics where X and Y are dependent.
# Estimate standard error D with m=[qn+.5]
# using adaptive kernel method
#
# This function is used by qdtest
#
x<-as.matrix(x)
if(is.na(y[1]))y<-x[,2]
x<-x[,1]
n<-length(x)
m<-floor(q*n+.5)
yord<-sort(y)
flag<-(y<=yord[m])
xord<-sort(x)
xq<-xord[m]
yord<-sort(y)
yq<-yord[m]
flag1<-(x<=xq)
flag2<-(y<=yq)
A<-mean(flag1*flag2)
flag1<-(x<=xq)
flag2<-(y>yq)
B<-mean(flag1*flag2)
flag1<-(x>xq)
flag2<-(y<=yq)
C1<-mean(flag1*flag2)
flag1<-(x>xq)
flag2<-(y>yq)
D1<-mean(flag1*flag2)
fx<-akerd(x,pts=xq,plotit=FALSE,pyhat=T)
fy<-akerd(y,pts=yq,plotit=FALSE,pyhat=T)
v1<-(q-1)^2*A
v2<-(q-1)*q*B
v3<-(q-1)*q*C1
v4<-q*q*D1
temp<-0-2*(v1+v2+v3+v4)/(fx*fy)+q*(1-q)/fx^2+q*(1-q)/fy^2
val<-sqrt(temp/n)
val
}

akerdmul<-function(x,pts=NA,hval=NA,aval=.5,fr=.8,pr=FALSE,plotit=TRUE,theta=50,
phi=25,expand=.5,scale=FALSE,xlab="X",ylab="Y",zlab="",ticktype="simple"){
#
# Compute adaptive kernel density estimate
# for multivariate data
# (See Silverman, 1986)
#
#  Use expected frequency as initial estimate of the density
#
# hval is the span used by the kernel density estimator
# fr is the span used by the expected frequency curve
# pr=T, returns density estimates at pts
# ticktype="detailed" will create ticks as done in two-dimensional plot
#
library(MASS)
library(akima)
if(is.na(pts[1]))pts<-x
if(ncol(x)!=ncol(pts))stop("Number of columns for x and pts do not match")
if(!is.matrix(x))stop("Data should be stored in a matrix")
fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr)
n<-nrow(x)
d<-ncol(x)
pi<-gamma(.5)^2
cd<-c(2,pi)
if(d==2)A<-1.77
if(d==3)A<-2.78
if(d>2){
for(j in 3:d)cd[j]<-2*pi*cd[j-2]/n  # p. 76
}
if(d>3)A<-(8*d*(d+2)*(d+4)*(2*sqrt(pi))^d)/((2*d+1)*cd[d])  # p. 87
if(is.na(hval))hval<-A*(1/n)^(1/(d+4))  # Silverman, p. 86
svec<-NA
for(j in 1:d){
sig<-sqrt(var(x[,j]))
temp<-idealf(x[,j])
iqr<-(temp$qu-temp$ql)/1.34
A<-min(c(sig,iqr))
x[,j]<-x[,j]/A
svec[j]<-A
}
hval<-hval*sqrt(mean(svec^2)) # Silverman, p. 87
# Now do adaptive; see Silverman, 1986, p. 101
gm<-exp(mean(log(fhat[fhat>0])))
alam<-(fhat/gm)^(0-aval)
dhat<-NA
nn<-nrow(pts)
for(j in 1:nn){
#temp1<-t(t(x)-x[j,])/(hval*alam)
temp1<-t(t(x)-pts[j,])/(hval*alam)
temp1<-temp1^2
temp1<-apply(temp1,1,FUN="sum")
temp<-.5*(d+2)*(1-temp1)/cd[d]
epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, p. 76
dhat[j]<-mean(epan/(alam*hval)^d)
}
if(plotit && d==2){
fitr<-dhat
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1]
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand,
scale=scale,ticktype=ticktype)
}
m<-"Done"
if(pr)m<-dhat
m
}
cov2med<-function(x,y=NA,q=.5){
#
# Estimate the covariance between two dependent
# order statistics
# By default, q=.5 meaning that an estimate of
# of covariance is made when a single order statistic
# is used to estimate the median.
# y=NA, function returns squared standard error.
#
if(is.na(y[1]))val<-qse(x,q=q,op=3)^2
if(!is.na(y[1])){
if(sum((x-y)^2)==0)val<-qse(x,q=q,op=3)^2
if(sum((x-y)^2)>0){
n<-length(x)
m<-floor(q*n+.5)
yord<-sort(y)
flag<-(y<=yord[m])
xord<-sort(x)
xq<-xord[m]
yord<-sort(y)
yq<-yord[m]
flag1<-(x<=xq)
flag2<-(y<=yq)
A<-mean(flag1*flag2)
flag1<-(x<=xq)
flag2<-(y>yq)
B<-mean(flag1*flag2)
flag1<-(x>xq)
flag2<-(y<=yq)
C1<-mean(flag1*flag2)
flag1<-(x>xq)
flag2<-(y>yq)
D1<-mean(flag1*flag2)
fx<-akerd(x,pts=xq,plotit=FALSE,pyhat=T)
fy<-akerd(y,pts=yq,plotit=FALSE,pyhat=T)
v1<-(q-1)^2*A
v2<-(q-1)*q*B
v3<-(q-1)*q*C1
v4<-q*q*D1
val<-((v1+v2+v3+v4)/(fx*fy))/n
}}
val
}


covmmed<-function(x,p=length(x),grp=c(1:p),q=.5){
#
#  Estimate the covariance matrix for the sample medians
#  based on a SINGLE order statistic, using
#  the data in the R variable x.
# (x[[1]] contains the data for group 1, x[[2]] the data for group 2, etc.)
#  The function returns a p by p matrix of covariances, the diagonal
#  elements being equal to the squared standard error of the sample
#  trimmed means, where p is the number of groups to be included.
#  By default, all the groups in x are used, but a subset of
#  the groups can be used via grp.  For example, if
#  the goal is to estimate the covariances between the medians
#   for groups 1, 2, and 5, use the command grp<-c(1,2,5)
#  before calling this function.
#
#  Missing values (values stored as NA) are not allowed.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("The data are not stored in a matrix or list mode.")
p<-length(grp)
pm1<-p-1
for (i in 1:pm1){
ip<-i+1
if(length(x[[grp[ip]]])!=length(x[[grp[i]]]))stop("The number of observations in each group must be equal")
}
n<-length(x[[grp[1]]])
covest<-matrix(0,p,p)
for(j in 1:p){
for(k in 1:p){
if(j==k)covest[j,j]<-cov2med(x[[grp[j]]],q=q)
if(j<k){
covest[j,k]<-cov2med(x[[grp[j]]],x[[grp[k]]],q=q)
covest[k,j]<-covest[j,k]
}}}
covest
}

msplit<-function(J,K,data,grp=c(1:p),p=J*K,q=.5){
#  Perform a J by K anova using medians with
#  repeated measures on the second factor. That is, a split-plot design
#  is assumed, with the first factor consisting of independent groups.
#
#  The R variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
x<-data
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                data <- y
        }
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
tmeans<-0
h<-0
v<-matrix(0,p,p)
klow<-1-K
kup<-0
#for (i in 1:p)tmeans[i]<-median(data[[grp[i]]],na.rm=TRUE)
for (i in 1:p)tmeans[i]<-qest(data[[grp[i]]],q=q)
for (j in 1:J){
h[j]<-length(data[[grp[j]]])
#    h is the  sample size for the jth level of factor A
#   Use covmtrim to determine blocks of squared standard errors and
#   covariances.
klow<-klow+K
kup<-kup+K
sel<-c(klow:kup)
v[sel,sel]<-covmmed(data[grp[klow:kup]],q=q)
}
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,ik)  # Contrast matrix for factor A
Qa<-johansp(cmat,tmeans,v,h,J,K)
Qa.siglevel<-1-pf(Qa$teststat,J-1,999)
# Do test for factor B
cmat<-kron(ij,ck)  # Contrast matrix for factor B
Qb<-johansp(cmat,tmeans,v,h,J,K)
Qb.siglevel<-1-pf(Qb$teststat,K-1,999)
# Do test for factor A by B interaction
cmat<-kron(cj,ck)  # Contrast matrix for factor A by B
Qab<-johansp(cmat,tmeans,v,h,J,K)
Qab.siglevel<-1-pf(Qab$teststat,(J-1)*(K-1),999)
list(Qa=Qa$teststat,Qa.siglevel=Qa.siglevel,
Qb=Qb$teststat,Qb.siglevel=Qb.siglevel,
Qab=Qab$teststat,Qab.siglevel=Qab.siglevel)
}


bdiag<-function(nb,np,rho=0){
#
# Let p=nb*np
# Create a p by p block diagonal matrix with each
# np by np block having a correlation matrix with common
# correlation rho
# So nb is the number of blocks
#
p<-nb*np
m<-matrix(0,p,p)
mat<-matrix(rho,np,np)
diag(mat)<-1
ilow<-1-np
iup<-0
for(i in 1:nb){
ilow<-ilow+np
iup<-iup+np
m[ilow:iup,ilow:iup]<-mat
}
m
}

bootcov<-function(x,est=median,nboot=100,pr=TRUE,SEED=FALSE,...){
#
# For multivariate data, determine the squared standard errors
# and covariances when using the estimator
# est.
#
# SEED=TRUE, sets the seed of the random number generator.
#
if(SEED)set.seed(2)
if(is.list(x))x<-matl(x)
x<-elimna(x)
bvec<-matrix(NA,ncol=ncol(x),nrow=nboot)
if(pr)print("Taking Bootstrap Samples. Please wait.")
for(i in 1:nboot){
data<-sample(nrow(x),size=nrow(x),replace=TRUE)
bvec[i,]<-apply(x[data,],2,FUN=est,...)
}
covmat<-var(bvec)
covmat
}
yuenbt<-function(x,y,tr=.2,alpha=.05,nboot=599,side=FALSE,nullval=0,pr=TRUE,
plotit=FALSE,op=1,SEED=TRUE){
#
#  Compute a 1-alpha confidence interval for the difference between
#  the trimmed means corresponding to two independent groups.
#  The bootstrap percentile t method is used.
#
#  The default amount of trimming is tr=.2
#  side=T indicates two-sided method using absolute value of the
#  test statistics within the bootstrap; otherwise the equal-tailed method
#  is used.
#
#  This function uses trimse.
#
side<-as.logical(side)
p.value<-NA
yuenbt<-vector(mode="numeric",length=2)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
x<-x[!is.na(x)]  # Remove missing values in x
y<-y[!is.na(y)]  # Remove missing values in y
xcen<-x-mean(x,tr)
ycen<-y-mean(y,tr)
if(!side){
if(pr)print("NOTE: p-value computed only when side=T")
}
test<-(mean(x,tr)-mean(y,tr))/sqrt(trimse(x,tr=tr)^2+trimse(y,tr=tr)^2)
datax<-matrix(sample(xcen,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(ycen,size=length(y)*nboot,replace=TRUE),nrow=nboot)
top<-apply(datax,1,mean,tr)-apply(datay,1,mean,tr)
botx<-apply(datax,1,trimse,tr)
boty<-apply(datay,1,trimse,tr)
tval<-top/sqrt(botx^2+boty^2)
if(plotit){
if(op == 1)
akerd(tval)
if(op == 2)
rdplot(tval)
}
if(side)tval<-abs(tval)
tval<-sort(tval)
icrit<-floor((1-alpha)*nboot+.5)
ibot<-floor(alpha*nboot/2+.5)
itop<-floor((1-alpha/2)*nboot+.5)
se<-sqrt((trimse(x,tr))^2+(trimse(y,tr))^2)
yuenbt[1]<-mean(x,tr)-mean(y,tr)-tval[itop]*se
yuenbt[2]<-mean(x,tr)-mean(y,tr)-tval[ibot]*se
if(side){
yuenbt[1]<-mean(x,tr)-mean(y,tr)-tval[icrit]*se
yuenbt[2]<-mean(x,tr)-mean(y,tr)+tval[icrit]*se
p.value<-(sum(abs(test)<=abs(tval)))/nboot
}
list(ci=yuenbt,test.stat=test,p.value=p.value,est.1=mean(x,tr),est.2=mean(y,tr),est.dif=mean(x,tr)-mean(y,tr),
n1=length(x),n2=length(y))
}

trimcibt<-function(x,tr=.2,alpha=.05,nboot=599,side=TRUE,plotit=FALSE,op=1,
nullval=0,pr=TRUE,SEED=TRUE,prCRIT=FALSE){
#
#  Compute a 1-alpha confidence interval for the trimmed mean
#  using a bootstrap percentile t method.
#
#  The default amount of trimming is tr=.2
#  side=T, for true,  indicates the symmetric two-sided method
#
#
#  Side=F yields an equal-tailed confidence interval
#
#
#  NOTE: p.value is reported when side=T only.
#
x=elimna(x)
side<-as.logical(side)
p.value<-NA
if(SEED)set.seed(2) # set seed of random number generator so that
#   results can be duplicated.
test<-(mean(x,tr)-nullval)/trimse(x,tr)
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
data<-data-mean(x,tr)
top<-apply(data,1,mean,tr)
bot<-apply(data,1,trimse,tr)
tval<-top/bot
if(plotit){
if(op==1)akerd(tval)
if(op==2)rdplot(tval)
}
if(side)tval<-abs(tval)
tval<-sort(tval)
icrit<-floor((1-alpha)*nboot+.5)
ibot<-round(alpha*nboot/2)+1
itop<-nboot-ibot-1
if(!side){
if(prCRIT)print(paste("Lower crit=",tval[ibot],"Upper crit=",tval[itop]))
if(prCRIT)print(paste(".025 Lower Type I=",mean(tval<=0-1.96)))
if(prCRIT)print(paste(".05 Lower Type I=",mean(tval<=0-1.645)))
if(prCRIT)print(paste(".025 Upper Type I=",mean(tval>=1.96)))
if(prCRIT)print(paste(".05 Upper Type I=",mean(tval>=1.645)))
trimcibt<-mean(x,tr)-tval[itop]*trimse(x,tr)
trimcibt[2]<-mean(x,tr)-tval[ibot]*trimse(x,tr)
if(pr)print("NOTE: p.value is computed only when side=T")
}
if(side){
if(prCRIT)print(paste("Symmetric Crit.val=",tval[icrit]))
trimcibt<-mean(x,tr)-tval[icrit]*trimse(x,tr)
trimcibt[2]<-mean(x,tr)+tval[icrit]*trimse(x,tr)
p.value<-(sum(abs(test)<=abs(tval)))/nboot
}
list(estimate=mean(x,tr),ci=trimcibt,test.stat=test,p.value=p.value)
}

khomreg<-function(x,y,xout=FALSE,outfun=out,...){
#
# Test hypothesis that error term in a linear regression model
# is homoscedastic using modification of Cook-Weisberg
# statistic derived by Koenker;
# See Lyon and Tsai, 1996, Statistician, 45, 337-349
#
x<-as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-as.matrix(x)
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
pv<-ncol(x)
pv1<-pv+1
m<-cbind(x,y)
m<-elimna(m)
x<-m[,1:pv]
x<-as.matrix(x)
y<-m[,pv1]
dvec<-NA
mat<-matrix(nrow=nrow(x),ncol=pv)
temp<-lsfit(x,y)
sigest<-mean(temp$res^2)
dvec<-y-temp$res
dbar<-dvec-mean(dvec)
uval<-temp$res^2
uval<-as.matrix(uval)
test<-t(uval)%*%dbar%*%solve(t(dbar)%*%dbar)%*%t(dbar)%*%uval
psihat<-mean((temp$res^2-sigest)^2)
test<-test/psihat
p.value<-1-pchisq(test,1)
list(test=test,p.value=p.value)
}
bootdse<-function(x,y=NA,est=median,nboot=100,pr=TRUE,...){
#
# Determine standard error of difference between
# two measures of location for dependent groups.
#
if(!is.na(y[1]))x<-cbind(x,y)
x<-elimna(x)
if(pr)print("Taking Bootstrap Samples. Please wait.")
data<-matrix(sample(nrow(x),size=nrow(x)*nboot,replace=TRUE),nrow=nboot)
xmat<-matrix(x[data,1],nrow=nboot,ncol=length(x))
ymat<-matrix(x[data,2],nrow=nboot,ncol=length(x))
bvec<-apply(xmat,1,FUN=est,...)-apply(ymat,1,FUN=est,...)
se<-sqrt(var(bvec))
se
}

qdtest<-function(x,y=NA,q=.5,bop=FALSE,nboot=100,se.val=NA){
#
# Test hypothesis of equal q quantiles for
# two dependent groups.
#
# x is either a matrix with 2 columns or a vector
# If x is a vector, must specify y
#
# Appears to adequately control type I error when n>=20
#
if(!is.na(y[1]))x<-cbind(x,y)
if(!is.matrix(x))stop("Something is wrong, with x or y")
x<-elimna(x)
y<-x[,2]
x<-x[,1]
n<-length(y)
df<-n-1
if(is.na(se.val[1])){
if(!bop)se.val<-sedm(x,y,q=q)
if(bop)se.val<-bootdse(x,y,est=qest,q=q,pr=FALSE,nboot=nboot)
}
test<-(qest(x,q)-qest(y,q))/se.val
sig.level<-2*(1-pt(abs(test),df))
list(test.stat=test,p.value=sig.level,se=se.val)
}

lincdm<-function(x,con=0,alpha=.05,q=.5,mop=FALSE,nboot=100,SEED=TRUE){
#
#  A heteroscedastic test of d linear contrasts among
#  dependent groups using medians.
#
#  The data are assumed to be stored in $x$ in list mode.
#  Length(x) is assumed to correspond to the total number of groups, J
#  It is assumed all groups are independent.
#
#  con is a J by d matrix containing the contrast coefficients that are used.
#  If con is not specified, all pairwise comparisons are made.
#
#  q is the quantile used to compare groups.
#  con contains contrast coefficients,
#  con=0 means all pairwise comparisons are used
#  mop=F, use single order statistic
#  mop=T, use usual sample median, even if q is not equal to .5
#  in conjunction with a bootstrap estimate of covariances among
#  the medians using
#  nboot samples.
#
#  Missing values are automatically removed.
#
#
if(mop && SEED)set.seed(2)
if(is.list(x)){
x<-matl(x)
x<-elimna(x)
}
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
h<-length(x[[1]])
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
if(!mop)xbar[j]<-qest(x[[j]],q=q)
if(mop)xbar[j]<-median(x[[j]])
}
if(sum(con^2)==0){
temp<-qdmcp(x,alpha=alpha,q=q,pr=FALSE)
test<-temp$test
psihat<-temp$psihat
num.sig<-temp$num.sig
}
if(sum(con^2)>0){
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
if(nrow(con)!=length(x)){
stop("The number of groups does not match the number of contrast coefficients.")
}
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","p.value","crit.p.value","se"))
df<-length(x[[1]])-1
if(!mop)w<-covmmed(x,q=q)
if(mop)w<-bootcov(x,nboot=nboot,pr=FALSE)
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
cvec<-as.matrix(con[,d])
sejk<-sqrt(t(cvec)%*%w%*%cvec)
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
test[d,3]<-2*(1-pt(abs(test[d,2]),df))
test[d,5]<-sejk
}
temp1<-test[,3]
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
test[temp2,4]<-zvec
psihat[,3]<-psihat[,2]-qt(1-test[,4]/2,df)*test[,5]
psihat[,4]<-psihat[,2]+qt(1-test[,4]/2,df)*test[,5]
num.sig<-sum(test[,3]<=test[,4])
}
list(test=test,psihat=psihat,num.sig=num.sig)
}
mwwmcp<-function(J,K,x,grp=c(1:p),p=J*K,q=.5,bop=FALSE,alpha=.05,nboot=100,
SEED=TRUE){
#
#  For a J by K anova using quantiles with
#  repeated measures on both factors,
#  Perform all multiple comparisons for main effects
#  and interactions.
#
#  q=.5 by default meaning medians are compared
#  bop=F means bootstrap option not used;
#  with bop=T, function uses usual medians rather
#  rather than a single order statistic to estimate median
#  in conjunction with a bootstrap estimate of covariances
#  among sample medians.
#
#  The R variable data is assumed to contain the raw
#  data stored in a matrix or in list mode.
#  When in list mode data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
Qa<-NA
Qab<-NA
if(is.data.frame(x))x=as.matrix(x)
if(is.list(x))x<-elimna(matl(x))
if(is.matrix(x))x<-elimna(x)
data<-x
if(is.matrix(data))data<-listm(data)
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups stored in x is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
tmeans<-0
        # Create the three contrast matrices
        #
Ja<-(J^2-J)/2
Ka<-(K^2-K)/2
JK<-J*K
conA<-matrix(0,nrow=JK,ncol=Ja)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
ic<-ic+1
mat<-matrix(0,nrow=J,ncol=K)
mat[j,]<-1
mat[jj,]<-0-1
conA[,ic]<-t(mat)
}}}
conB<-matrix(0,nrow=JK,ncol=Ka)
ic<-0
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
mat<-matrix(0,nrow=J,ncol=K)
mat[,k]<-1
mat[,kk]<-0-1
conB[,ic]<-t(mat)
}}}
conAB<-matrix(0,nrow=JK,ncol=Ka*Ja)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
mat<-matrix(0,nrow=J,ncol=K)
mat[j,k]<-1
mat[j,kk]<-0-1
mat[jj,k]<-0-1
mat[jj,kk]<-1
}
conAB[,ic]<-t(mat)
}}}}}
Qa<-lincdm(x,con=conA,alpha=alpha,mop=bop,nboot=nboot,SEED=SEED)
# Do test for factor B
Qb<-lincdm(x,con=conB,alpha=alpha,mop=bop,nboot=nboot,SEED=SEED)
# Do test for factor A by B interaction
Qab<-lincdm(x,con=conAB,alpha=alpha,mop=bop,nboot=nboot,SEED=SEED)
list(Qa=Qa,Qb=Qb,Qab=Qab)
}


lincdtr<-function(x,con=0,alpha=.05,tr=.2){
#
#  A heteroscedastic test of d linear contrasts among
#  dependent groups using trimmed means.
#
#  The data are assumed to be stored in $x$ in list mode.
#  Length(x) is assumed to correspond to the total number of groups, J
#  It is assumed all groups are independent.
#
#  con is a J by d matrix containing the contrast coefficients that are used.
#  If con is not specified, all pairwise comparisons are made.
#
#  con contains contrast coefficients,
#  con=0 means all pairwise comparisons are used
#
#  Missing values are automatically removed.
#
#
if(is.list(x)){
x<-matl(x)
x<-elimna(x)
}
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
h<-length(x[[1]])
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xbar[j]<-mean(x[[j]],tr=tr)
}
if(sum(con^2)==0){
temp<-rmmcp(x,alpha=alpha,tr=tr,dif=F)
test<-temp$test
psihat<-temp$psihat
num.sig<-temp$num.sig
}
if(sum(con^2)>0){
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
if(nrow(con)!=length(x)){
stop("The number of groups does not match the number of contrast coefficients.")
}
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","p.value","crit.p.value","se"))
df<-length(x[[1]])-1
w<-covmtrim(x,tr=tr)
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
cvec<-as.matrix(con[,d])
sejk<-sqrt(t(cvec)%*%w%*%cvec)
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
test[d,3]<-2*(1-pt(abs(test[d,2]),df))
test[d,5]<-sejk
}
temp1<-test[,3]
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
test[temp2,4]<-zvec
psihat[,3]<-psihat[,2]-qt(1-test[,4]/2,df)*test[,5]
psihat[,4]<-psihat[,2]+qt(1-test[,4]/2,df)*test[,5]
num.sig<-sum(test[,3]<=test[,4])
}
list(test=test,psihat=psihat,num.sig=num.sig)
}

sintv2<-function(x,alpha=.05,nullval=0,pr=TRUE){
#
#   Compute a 1-alpha confidence interval for the median using
#   the Hettmansperger-Sheather interpolation method.
#   (See section 4.5.2.)
#
#   The default value for alpha is .05.
#
if(pr){
if(sum(duplicated(x)>0))print("Duplicate values detected; hdpb might have more power")
}
ci<-sint(x,alpha=alpha,pr=FALSE)
alph<-c(1:99)/100
for(i in 1:99){
irem<-i
chkit<-sint(x,alpha=alph[i],pr=FALSE)
if(chkit[1]>nullval || chkit[2]<nullval)break
}
p.value<-irem/100
if(p.value<=.1){
iup<-(irem+1)/100
alph<-seq(.001,iup,.001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-sint(x,alpha=alph[i],pr=FALSE)
if(chkit[1]>nullval || chkit[2]<nullval)break
}}
if(p.value<=.001){
alph<-seq(.0001,.001,.0001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-sint(x,alpha=alph[i],pr=FALSE)
if(chkit[1]>nullval || chkit[2]<nullval)break
}}
list(n=length(elimna(x)),ci.low=ci[1],ci.up=ci[2],p.value=p.value)
}



qdmcp<-function(x,alpha = 0.05,bop=FALSE,nboot=100,pr=TRUE,q=.5,SEED=TRUE){
#
# For dependent groups,
# Perform all pairwise comparisons
# using quantiles estimated with a single order statistic.
#  FWE controlled with Rom's method
#
if(is.data.frame(x))x=as.matrix(x)
if(!is.matrix(x))x<-matl(x)
if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
J<-ncol(x)
xbar<-vector("numeric",J)
x<-elimna(x)  # Remove missing values
df<-nrow(x)-1
nval<-nrow(x)
for(j in 1: J){
if(!bop)xbar[j]<-qest(x[,j],q=q)
if(bop)xbar[j]<-median(x[,j])
}
CC<-(J^2-J)/2
ncon<-CC
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
psihat<-matrix(0,CC,5)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper"))
test<-matrix(NA,CC,6)
dimnames(test)<-list(NULL,c("Group","Group","test","p-value","p.crit","se"))
if(bop)se.val<-bootdse(x,nboot=nboot,pr=pr)
temp1<-0
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
if(!bop)temp<-qdtest(x[,j],x[,k],q=q,bop=bop)
if(bop)temp<-qdtest(x[,j],x[,k],se.val=se.val[jcom])
sejk<-temp$se
test[jcom,6]<-sejk
test[jcom,3]<-temp$test.stat
test[jcom,4]<-temp$p.value
if(length(x[,j])<20)test[jcom,4]<-mrm1way(x[,c(j,k)],q=q,SEED=SEED)$p.value
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-(xbar[j]-xbar[k])
}}}
temp1<-test[,4]
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
test[temp2,5]<-zvec
psihat[,4]<-psihat[,3]-qt(1-test[,5]/2,df)*test[,6]
psihat[,5]<-psihat[,3]+qt(1-test[,5]/2,df)*test[,6]
num.sig<-sum(test[,4]<=test[,5])
list(test=test,psihat=psihat,num.sig=num.sig)
}



bwmedbmcp<-function(J,K,x,tr=.2,JK=J*K,grp=c(1:JK),con=0,alpha=.05,dif=FALSE,pool=FALSE,bop=FALSE,nboot=100,SEED=TRUE){
#
# All pairwise comparisons among levels of Factor B
# in a split-plot design using trimmed means.
#
# Data are pooled for each level
# of Factor B.
# Then this function calls rmmcp.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(is.data.frame(x))x=as.matrix(x)
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}
JK<-J*K
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
if(pool){
data<-list()
m1<-matrix(c(1:JK),J,K,byrow=T)
for(k in 1:K){
for(j in 1:J){
flag<-m1[j,k]
if(j==1)temp<-x[[flag]]
if(j>1){
temp<-c(temp,x[[flag]])
}}
data[[k]]<-temp
}
print("Group numbers refer to levels of Factor B")
if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop)
if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha)
return(temp)
}
if(!pool){
mat<-matrix(c(1:JK),ncol=K,byrow=T)
for(j in 1:J){
data<-list()
ic<-0
for(k in 1:K){
ic<-ic+1
data[[ic]]<-x[[mat[j,k]]]
}
print(paste("For level ", j, " of Factor A:"))
if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop)
if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha)
print(temp$test)
print(temp$psihat)
}}
}

qdmcpdif<-function(x, con = 0,alpha = 0.05){
#
# MCP with medians on difference scores
# FWE controlled with Rom's method
#
if(is.data.frame(x))x=as.matrix(x)
if(!is.matrix(x))x<-matl(x)
if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-ncol(x)
xbar<-vector("numeric",J)
x<-elimna(x)  # Remove missing values
nval<-nrow(x)
h1<-nrow(x)
df<-h1-1
if(sum(con^2!=0))CC<-ncol(con)
if(sum(con^2)==0)CC<-(J^2-J)/2
ncon<-CC
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
if(sum(con^2)==0){
psihat<-matrix(0,CC,5)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper"))
test<-matrix(NA,CC,5)
dimnames(test)<-list(NULL,c("Group","Group","p-value","p.crit","se"))
temp1<-0
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
dv<-x[,j]-x[,k]
test[jcom,5]<-msmedse(dv)
temp<-sintv2(dv,alpha=alpha/CC)
temp1[jcom]<-temp$p.value
test[jcom,3]<-temp$p.value
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-median(dv)
psihat[jcom,4]<-temp$ci.low
psihat[jcom,5]<-temp$ci.up
}}}
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
if(sum(sigvec)<ncon){
dd<-ncon-sum(sigvec) #number that are sig.
ddd<-sum(sigvec)+1
zvec[ddd:ncon]<-dvec[ddd]
}
test[temp2,4]<-zvec
}
if(sum(con^2)>0){
if(nrow(con)!=ncol(x))print("WARNING: The number of groups does not match the number of contrast coefficients.")
ncon<-ncol(con)
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),4)
dimnames(test)<-list(NULL,c("con.num","sig","crit.sig","se"))
temp1<-NA
for (d in 1:ncol(con)){
psihat[d,1]<-d
for(j in 1:J){
if(j==1)dval<-con[j,d]*x[,j]
if(j>1)dval<-dval+con[j,d]*x[,j]
}
temp3<-sintv2(dval)
temp1[d]<-temp3$p.value
test[d,1]<-d
test[d,4]<-msmedse(dval)
psihat[d,2]<-median(dval)
psihat[d,3]<-temp3$ci.low
psihat[d,4]<-temp3$ci.up
}
test[,2]<-temp1
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
print(c(ncon,zvec))
sigvec<-(test[temp2,2]>=zvec)
if(sum(sigvec)<ncon){
dd<-ncon-sum(sigvec) #number that are sig.
ddd<-sum(sigvec)+1
zvec[ddd:ncon]<-dvec[ddd]
}
test[temp2,3]<-zvec
}
if(sum(con^2)==0)num.sig<-sum(psihat[,4]>0)+ sum(psihat[,5]<0)
if(sum(con^2)>0)num.sig<-sum(psihat[,3]>0)+ sum(psihat[,4]<0)
list(test=test,psihat=psihat,con=con,num.sig=num.sig)
}



l2dci<-function(x,y,est=median,alpha=.05,nboot=500,SEED=TRUE,pr=TRUE,...){
#
#   Compute a bootstrap confidence interval for a
#   measure of location associated with
#   the distribution of x-y,
#   est indicates which measure of location will be used
#
#   Function returns confidence interval, p-value and estimate
#   of square standard error of the estimator used.
#
x<-x[!is.na(x)] # Remove any missing values in x
y<-y[!is.na(y)] # Remove any missing values in y
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-NA
for(i in 1:nboot)bvec[i]<-loc2dif(datax[i,],datay[i,],est=est)
bvec<-sort(bvec)
low<-round((alpha/2)*nboot)+1
up<-nboot-low
temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot)
sig.level<-2*(min(temp,1-temp))
se<-var(bvec)
list(ci=c(bvec[low],bvec[up]),p.value=sig.level,sq.se=se)
}


qdec2ci<-function(x,y=NA,nboot=500,alpha=.05,pr=TRUE,SEED=TRUE,plotit=TRUE){
#
# Compare the deciles of two dependent groups
# with quantiles estimated with a single order statistic
#
if(SEED)set.seed(2)
if(is.na(y[1])){
y<-x[,2]
x<-x[,1]
}
bvec<-matrix(NA,nrow=nboot,ncol=9)
if(pr)print("Taking bootstrap samples. Please Wait.")
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(i in 1:nboot)bvec[i,]<-qdec(x[data[i,]])-qdec(y[data[i,]])
pval<-NA
m<-matrix(0,9,4)
dimnames(m)<-list(NULL,c("lower","upper","Delta.hat","p.values"))
crit <- alpha/2
icl <- round(crit * nboot) + 1
icu <- nboot - icl
for(i in 1:9){
pval[i]<-(sum(bvec[,i]<0)+.5*sum(bvec[,i]==0))/nboot
pval[i]<-2*min(pval[i],1-pval[i])
temp<-sort(bvec[,i])
m[i,1]<-temp[icl]
m[i,2]<-temp[icu]
}
m[,3]<-qdec(x)-qdec(y)
m[,4]<-pval
if(plotit){
xaxis<-c(qdec(x),qdec(x))
par(pch="+")
yaxis<-c(m[,1],m[,2])
plot(xaxis,yaxis,ylab="delta",xlab="x (first group)")
par(pch="*")
points(qdec(x),m[,3])
}
m
}




ancovam<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,
pr=T){
#
# Compare two independent  groups using an ancova method
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
# This function is designed specifically for
# MEDIANS
#
#  Assume data are in x1 y1 x2 and y2
#
if(pr){
print("NOTE: Confidence intervals are adjusted to control the probability")
print("of at least one Type I error.")
print("But p-values are not")
}
if(is.na(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,9)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi","p.value"))
critv<-NA
if(alpha==.05)critv<-smmcrit(500,5)
if(alpha==.01)critv<-smmcrit01(500,5)
if(is.na(critv))critv<-smmval(rep(999,5),alpha=alpha)
for (i in 1:5){
g1<-y1[near(x1,x1[isub[i]],fr1)]
g2<-y2[near(x2,x1[isub[i]],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test<-msmed(g1,g2)
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(g1)
mat[i,3]<-length(g2)
mat[i,4]<-median(g1)-median(g2)
mat[i,5]<-test$test[3]
mat[i,6]<-test$test[5]
cilow<-mat[i,4]-critv*mat[i,6]
cihi<-mat[i,4]+critv*mat[i,6]
mat[i,7]<-cilow
mat[i,8]<-cihi
mat[i,9]<-test$test[6]
}}
if(!is.na(pts[1])){
if(length(pts)>=29)stop("At most 28 points can be compared")
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi",
"p.value"))
critv<-NA
if(length(pts)>=2){
if(alpha==.05)critv<-smmcrit(500,length(pts))
if(alpha==.01)critv<-smmcrit01(500,length(pts))
if(is.na(critv))critv<-smmval(rep(999,length(pts)),alpha=alpha)
}
if(length(pts)==1)critv<-qnorm(1-alpha/2)
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test<-msmed(g1,g2)
mat[i,1]<-pts[i]
mat[i,2]<-length(g1)
mat[i,3]<-length(g2)
if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i]))
if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i]))
mat[i,4]<-median(g1)-median(g2)
mat[i,5]<-test$test[3]
mat[i,6]<-test$test[5]
cilow<-mat[i,4]-critv*mat[i,6]
cihi<-mat[i,4]+critv*mat[i,6]
mat[i,7]<-cilow
mat[i,8]<-cihi
mat[i,9]<-test$test[6]
}}
if(plotit)
runmean2g(x1,y1,x2,y2,fr=fr1,est=median,sm=sm)
list(output=mat,crit=critv)
}


modgen<-function(p,adz=FALSE){
#
# Used by regpre to generate all models
# p=number of predictors
# adz=T, will add the model where only a measure
# of location is used.
#
#
model<-list()
if(p>5)stop("Current version is limited to 5 predictors")
if(p==1)model[[1]]<-1
if(p==2){
model[[1]]<-1
model[[2]]<-2
model[[3]]<-c(1,2)
}
if(p==3){
for(i in 1:3)model[[i]]<-i
model[[4]]<-c(1,2)
model[[5]]<-c(1,3)
model[[6]]<-c(2,3)
model[[7]]<-c(1,2,3)
}
if(p==4){
for(i in 1:4)model[[i]]<-i
model[[5]]<-c(1,2)
model[[6]]<-c(1,3)
model[[7]]<-c(1,4)
model[[8]]<-c(2,3)
model[[9]]<-c(2,4)
model[[10]]<-c(3,4)
model[[11]]<-c(1,2,3)
model[[12]]<-c(1,2,4)
model[[13]]<-c(1,3,4)
model[[14]]<-c(2,3,4)
model[[15]]<-c(1,2,3,4)
}
if(p==5){
for(i in 1:5)model[[i]]<-i
model[[6]]<-c(1,2)
model[[7]]<-c(1,3)
model[[8]]<-c(1,4)
model[[9]]<-c(1,5)
model[[10]]<-c(2,3)
model[[11]]<-c(2,4)
model[[12]]<-c(2,5)
model[[13]]<-c(3,4)
model[[14]]<-c(3,5)
model[[15]]<-c(4,5)
model[[16]]<-c(1,2,3)
model[[17]]<-c(1,2,4)
model[[18]]<-c(1,2,5)
model[[19]]<-c(1,3,4)
model[[20]]<-c(1,3,5)
model[[21]]<-c(1,4,5)
model[[22]]<-c(2,3,4)
model[[23]]<-c(2,3,5)
model[[24]]<-c(2,4,5)
model[[25]]<-c(3,4,5)
model[[26]]<-c(1,2,3,4)
model[[27]]<-c(1,2,3,5)
model[[28]]<-c(1,2,4,5)
model[[29]]<-c(1,3,4,5)
model[[30]]<-c(2,3,4,5)
model[[31]]<-c(1,2,3,4,5)
}
if(adz){
ic<-length(model)+1
model[[ic]]<-0
}
model
}



locpre<-function(y,est=mean,error=sqfun,nboot=100,SEED=TRUE,pr=TRUE,mval=round(5*log(length(y)))){
#
#   Estimate the prediction error using a measure of location
#   given by the argument
#   est
#
#   The .632 method is used.
#   (See Efron and Tibshirani, 1993, pp. 252--254)
#
#   Prediction error is the expected value of the function error.
#   The argument error defaults to squared error.
#
#   est can be any R function that returns a measure of location
#
#   The default value for mval, the number of observations to resample
#   for each of the B bootstrap samples is based on results by
#   Shao (JASA, 1996, 655-665). (Resampling n vectors of observations
#   model selection may not lead to the correct model as n->infinity.
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=mval*nboot,replace=TRUE),nrow=nboot)
bid<-apply(data,1,idb,length(y))
#  bid is an n by nboot matrix. If the jth bootstrap sample from
#  1, ..., mval contains the value i, bid[i,j]=0; otherwise bid[i,j]=1
#
yhat<-apply(data,1,locpres1,y,est=est)
# yhat is nboot vector
# containing the bootstrap estimates
#
yhat<-matrix(yhat,nrow=length(y),ncol=nboot) # convert to n x nboot matrix
bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253
temp<-(bid*(yhat-y))
diff<-apply(temp,1,error)
ep0<-sum(diff/bi)/length(y)
aperror<-error(y-est(y))/length(y) # apparent error
val<-.368*aperror+.632*ep0
val
}


locpres1<-function(isub,x,est){
#
#  Compute a measure of location x[isub]
#  isub is a vector of length mval,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  mval is the sample size
#  of the bootstrap sample, where mval<n is used to get
#  consistency when choosing the correct model.

#  This function is used by other functions when computing
#  bootstrap estimates.
#
regboot<-est(x[isub])
regboot
}

med1way<-function(x,grp=NA,alpha=.05,crit=NA,iter=1000,SEED=TRUE,pr=T){
#
#  A heteroscedastic one-way ANOVA for medians.
#
#  If
#  crit=NA, an appropriate critical value is determined
#  for the alpha value specified and
#  a p-value is returned.
#
#  If a value for crit is specified, it is used as the critical
#  value, but no p-value is reported. Specifying a value for
#  crit reduces execution time.
#  With crit=NA, the critical value is a function of the sample
#  sizes and is determined by calling the function med1way.crit.
#
#  The data are assumed to be stored in $x$ in list mode.
#  Length(x) is assumed to correspond to the total number of groups.
#  By default, the null hypothesis is that all groups have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
#  Missing values are automatically removed.
#
if(is.data.frame(x))x=as.matrix(x)
if(pr){
print("NOTE: This function was modified in Dec. 2004")
print("A new approximate critical value is used if crit=NA")
print("This might improve type I error probabilities substantially")
print("For discrete data with ties, this function is NOT recommended.")
print("Use the function medpb; it is best for general use")
}
if(is.matrix(x)){
y<-list()
for(j in 1:ncol(x))y[[j]]<-x[,j]
x<-y
}
if(is.na(sum(grp[1])))grp<-c(1:length(x))
if(!is.list(x))stop("Data are not stored in a matrix or in list mode.")
J<-length(grp)  # The number of groups to be compared
n<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
w[j]<-1/msmedse(x[[grp[j]]])^2
xbar[j]<-median(x[[grp[j]]])
n[j]<-length(x[[grp[j]]])
}
pval<-NA
u<-sum(w)
xtil<-sum(w*xbar)/u
TEST<-sum(w*(xbar-xtil)^2)/(J-1)
if(is.na(crit)){
temp<-med1way.crit(n,alpha,SEED=SEED,iter=iter,TEST=TEST)
crit.val<-temp$crit.val
}
if(!is.na(crit))crit.val<-crit
list(TEST=TEST,crit.val=crit.val,p.value=temp$p.value)
}

med1way.crit<-function(n,alpha=.05,iter=1000,TEST=NA,SEED=TRUE){
#
#  Determine the critical value for the function
#  med1way, assuming normality, based on the sample
#  sizes in n.
#
J<-length(n)
x<-list()
w<-vector("numeric",J)
xbar<-vector("numeric",J)
if(SEED)set.seed(2)
chk<-NA
grp<-c(1:J)
for (it in 1:iter){
for(j in 1:J){
x[[j]]<-rnorm(n[j])
w[j]<-1/msmedse(x[[grp[j]]])^2
xbar[j]<-median(x[[grp[j]]])
n[j]<-length(x[[grp[j]]])
}
u<-sum(w)
xtil<-sum(w*xbar)/u
chk[it]<-sum(w*(xbar-xtil)^2)/(J-1)
}
chk<-sort(chk)
iv<-round((1-alpha)*iter)
crit.val<-chk[iv]
pval<-NA
if(!is.na(TEST))pval<-sum((TEST<=chk))/iter
list(crit.val=crit.val,p.value=pval)
}

bpmed<-function(x,con=0,alpha=.05){
#
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
xbar[j]<-median(x[[j]])
w[j]<-bpmedse(x[[j]])^2 # Squared standard error.
}
if(sum(con^2!=0))CC<-ncol(con)
if(sum(con^2)==0){
CC<-(J^2-J)/2
psihat<-matrix(0,CC,5)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper"))
test<-matrix(NA,CC,5)
dimnames(test)<-list(NULL,c("Group","Group","test","crit","se"))
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k])
sejk<-sqrt(w[j]+w[k])
test[jcom,5]<-sejk
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-(xbar[j]-xbar[k])
crit<-NA
if(CC==1)crit<-qnorm(1-alpha/2)
if(CC>1){
if(alpha==.05)crit<-smmcrit(500,CC)
if(alpha==.01)crit<-smmcrit01(500,CC)
if(is.na(crit))warning("Can only be used with alpha=.05 or .01")
}
test[jcom,4]<-crit
psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5]
psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5]
}}}}
if(sum(con^2)>0){
if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.")
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","crit","se","df"))
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
sejk<-sqrt(sum(con[,d]^2*w))
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
crit<-NA
if(CC==1)crit<-qnorm(1-alpha/2)
if(alpha==.05)crit<-smmcrit(500,ncol(con))
if(alpha==.01)crit<-smmcrit01(500,ncol(con))
test[d,3]<-crit
test[d,4]<-sejk
psihat[d,3]<-psihat[d,2]-crit*sejk
psihat[d,4]<-psihat[d,2]+crit*sejk
}}
list(test=test,psihat=psihat)
}



bpmedse<-function(x){
#
# compute standard error of the median using method
# recommended by Price and Bonett (2001)
#
y<-sort(x)
n<-length(x)
av<-round((n+1)/2-sqrt(n))
if(av==0)av<-1
avm<-av-1
astar<-pbinom(avm,n,.5)  #alpha*/2
zval<-qnorm(1-astar)
top<-n-av+1
sqse<-((y[top]-y[av])/(2*zval))^2 # The sq. standard error
se<-sqrt(sqse)
se
}
exmed<-function(x,y=NA,con=0,alpha=.05,iter=1000,se.fun=bpmedse,SEED=TRUE){
#
# Test a set of linear contrasts using medians
#
# Get exact control over type I errors under normality, provided
# iter is sufficietly large.
# iter determines number of replications used in a simulation
# to determine critical value.
#
# se.fun indicates method used to estimate standard errors.
# default is the method used by Bonett and Price (2002)
# To use the McKean-Shrader method,
# set se.fun=msmedse
#
#  The data are assumed to be stored in $x$ in a matrix or in list mode.
#  Length(x) is assumed to correspond to the total number of groups, J
#  It is assumed all groups are independent.
#
#  con is a J by d matrix containing the contrast coefficients that are used.
#  If con is not specified, all pairwise comparisons are made.
#
#  Missing values are automatically removed.
#
#  Function returns the critical value used so that FWE=alpha
#  (under the column crit)
#  p-values are determined for each test but are not adjusted so
#  that FWE=alpha.
#  The confidence intervals are adjusted so that the simultaneous
#  probability coverage is 1-alpha.
#
if(!is.na(y[1])){
xx<-list()
xx[[1]]<-x
xx[[2]]<-y
if(is.matrix(x) || is.list(x))stop("When y is speficied, x should not have list mode or be a matrix")
x<-xx
}
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
h<-vector("numeric",J)
w<-vector("numeric",J)
nval<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
xbar[j]<-median(x[[j]])
nval[j]<-length(x[[j]])
# w[j]<-msmedse(x[[j]])^2
 w[j]<-se.fun(x[[j]])^2
}
if(sum(con^2!=0))CC<-ncol(con)
if(sum(con^2)==0){
CC<-(J^2-J)/2
psihat<-matrix(0,CC,5)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper"))
test<-matrix(NA,CC,6)
dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","p.value"))
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k])
# Next determine p-value for each individual test
temp<-msmedsub(c(nval[j],nval[k]),se.fun=se.fun,SEED=SEED,iter=iter)
test[jcom,6]<-sum((test[jcom,3]<=temp))/iter
sejk<-sqrt(w[j]+w[k])
test[jcom,5]<-sejk
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-(xbar[j]-xbar[k])
# Determine critical value for controlling FWE
temp<-msmedsub(nval,se.fun=se.fun,SEED=SEED,iter=iter)
ic<-round((1-alpha)*iter)
crit<-temp[ic]
test[jcom,4]<-crit
psihat[jcom,4]<-psihat[jcom,3]-crit*test[jcom,5]
psihat[jcom,5]<-psihat[jcom,3]+crit*test[jcom,5]
}}}}
if(sum(con^2)>0){
if(nrow(con)!=length(x))warning("The number of groups does not match the number of contrast coefficients.")
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","crit","se","p.value"))
# Determine critical value that controls FWE
temp<-msmedsub(nval,con=con,se.fun=se.fun,SEED=SEED,iter=iter)
ic<-round((1-alpha)*iter)
crit<-temp[ic]
for (d in 1:ncol(con)){
flag<-(con[,d]==0)
nvec<-nval[!flag]
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
sejk<-sqrt(sum(con[,d]^2*w))
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
#  Determine p-value for individual (dth) test
temp<-msmedsub(nvec,iter=iter,se.fun=se.fun,SEED=SEED)
test[d,3]<-crit
test[d,4]<-sejk
test[d,5]<-sum(abs((test[d,2])<=temp))/iter
psihat[d,3]<-psihat[d,2]-crit*sejk
psihat[d,4]<-psihat[d,2]+crit*sejk
}}
list(test=test,psihat=psihat)
}
msmedsub<-function(n,con=0,alpha=.05,se.fun=bpmedse,iter=1000,SEED=TRUE){
#
# Determine a Studentized critical value, assuming normality
# and homoscedasticity, for the function msmedv2
#
# Goal: Test a set of linear contrasts using medians
#
#  The data are assumed to be stored in $x$ in a matrix or in list mode.
#  Length(x) is assumed to correspond to the total number of groups, J
#  It is assumed all groups are independent.
#
#  con is a J by d matrix containing the contrast coefficients that are used.
#  If con is not specified, all pairwise comparisons are made.
#
if(SEED)set.seed(2)
con<-as.matrix(con)
J<-length(n)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
x<-list()
test<-NA
testmax<-NA
for (it in 1:iter){
for(j in 1:J){
x[[j]]<-rnorm(n[j])
xbar[j]<-median(x[[j]])
 w[j]<-se.fun(x[[j]])^2
}
if(sum(con^2!=0))CC<-ncol(con)
if(sum(con^2)==0){
CC<-(J^2-J)/2
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
test[jcom]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k])
}}}}
if(sum(con^2)>0){
for (d in 1:ncol(con)){
sejk<-sqrt(sum(con[,d]^2*w))
test[d]<-sum(con[,d]*xbar)/sejk
}}
testmax[it]<-max(abs(test))
}
testmax<-sort(testmax)
testmax
}
cnorm<-function(n,epsilon=.1,k=10){
#
# generate n observations from a contaminated normal
# distribution
# probability 1-epsilon from a standard normal
# probability epsilon from normal with mean 0 and standard deviation k
#
if(epsilon>1)stop("epsilon must be less than or equal to 1")
if(epsilon<0)stop("epsilon must be greater than or equal to 0")
if(k<=0)stop("k must be greater than 0")
val<-rnorm(n)
uval<-runif(n)
flag<-(uval<=1-epsilon)
val[!flag]<-k*val[!flag]
val
}
twwmcp<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,alpha=.05,dif=F){
#
#  For a J by K anova using quantiles with
#  repeated measures on both factors,
#  Perform all multiple comparisons for main effects
#  and interactions.
#
#  tr=.2. default trimming
#  bop=F means bootstrap option not used;
#  with bop=T, function uses usual medians rather
#  rather than a single order statistic to estimate median
#  in conjunction with bootstrap estimate of covariances
#  among the sample medians.
#
#  The R variable data is assumed to contain the raw
#  data stored in a matrix or in list mode.
#  When in list mode data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
Qa<-NA
Qab<-NA
if(is.list(x))x<-elimna(matl(x))
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-elimna(x)
data<-x
if(is.matrix(data))data<-listm(data)
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups stored in x is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
tmeans<-0
temp<-con2way(J,K) # contrasts matrices stored in temp
Qa<-rmmcp(x,con=temp$conA,alpha=alpha,dif=dif,tr=tr)
# Do test for factor B
Qb<-rmmcp(x,con=temp$conB,alpha=alpha,dif=dif,tr=tr)
# Do test for factor A by B interaction
Qab<-rmmcp(x,con=temp$conAB,alpha=alpha,dif=dif,tr=tr)
list(Qa=Qa,Qb=Qb,Qab=Qab)
}

medpb<-function(x,alpha=.05,nboot=NA,grp=NA,est=median,con=0,bhop=FALSE,
SEED=TRUE,...){
#
#   Multiple comparisons for  J independent groups using medians.
#
#   A percentile bootstrap method with Rom's method is used.
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#
#   con can be used to specify linear contrasts; see the function lincon
#
#   Missing values are allowed.
#
con<-as.matrix(con)
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
tempn<-0
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
dvec<-alpha/c(1:ncon)
if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.")
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-matrix(NA,nrow=J,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
#print(paste("Working on group ",j))
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group
}
test<-NA
bcon<-t(con)%*%bvec #ncon by nboot matrix
tvec<-t(con)%*%mvec
for (d in 1:ncon){
tv<-sum(bcon[d,]==0)/nboot
test[d]<-sum(bcon[d,]>0)/nboot+.5*tv
if(test[d]> .5)test[d]<-1-test[d]
}
test<-2*test
output<-matrix(0,ncon,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tvec[ic,]
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(bcon[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}
rbbinom<-function(n,nbin,r,s){
#
# Generate n values from a beta-binomial,
# r and s are the parameters of the beta distribution.
# nbin is for the binomial distribution, i.e., sample space=c(0:nbin)
#
x<-NA
for(i in 1:n){
pval<-rbeta(1,r,s)
x[i]<-rbinom(1,nbin,pval)
}
x
}

med2g<-function(x,y,alpha=.05,nboot=1000,SEED=TRUE,...){
#
#   Compare medians of two independent groups using percentile bootstrap
#
#   Missing values are allowed.
#
x<-elimna(x)
y<-elimna(y)
mvec<-NA
mvec[1]<-median(x)
mvec[2]<-median(y)
bvec<-NA
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(datax,1,median) # Bootstrapped values for jth group
bvec2<-apply(datay,1,median) # Bootstrapped values for jth group
test<-sum((bvec1>bvec2))/nboot
tv<-sum(bvec1==bvec2)/nboot
test<-test+.5*tv
if(test> .5)test<-1-test
test<-2*test
dvec<-sort(bvec1-bvec2)
icl<-round(alpha*nboot/2)+1
icu<-nboot-icl-1
cilow<-dvec[icl]
ciup<-dvec[icu]
list(p.value=test,est.dif=mvec[1]-mvec[2],ci.low=cilow,ci.up=ciup)
}


twobinom<-function(r1=sum(elimna(x)),n1=length(x),r2=sum(elimna(y)),n2=length(y),x=NA,y=NA,alpha=.05){
#
# Test the hypothesis that two independent binomials have equal
# probability of success
#
# r1=number of successes in group 1
# n1=number of observations in group 1
#
n1p<-n1+1
n2p<-n2+1
n1m<-n1-1
n2m<-n2-1
chk<-abs(r1/n1-r2/n2)
x<-c(0:n1)/n1
y<-c(0:n2)/n2
phat<-(r1+r2)/(n1+n2)
m1<-outer(x,y,"-")
m2<-matrix(1,n1p,n2p)
flag<-(abs(m1)>=chk)
m3<-m2*flag
b1<-1
b2<-1
xv<-c(1:n1)
yv<-c(1:n2)
xv1<-n1-xv+1
yv1<-n2-yv+1
dis1<-c(1,pbeta(phat,xv,xv1))
dis2<-c(1,pbeta(phat,yv,yv1))
pd1<-NA
pd2<-NA
for(i in 1:n1)pd1[i]<-dis1[i]-dis1[i+1]
for(i in 1:n2)pd2[i]<-dis2[i]-dis2[i+1]
pd1[n1p]<-phat^n1
pd2[n2p]<-phat^n2
m4<-outer(pd1,pd2,"*")
test<-sum(m3*m4)
list(p.value=test,p1=r1/n1,p2=r2/n2)
}

lband.fun<-function(x,y,crit){
#
#  function used to determine probability of type I error given crit
#
pi<-gamma(.5)^2
xr<-rank(x)
yr<-rank(y)
temp<-apply(cbind(xr,yr),1,max)
n<-length(x)
fj<-NA
for(i in 1:n)fj[i]<-sum(temp==i)
v1<-NA
for(j in 1:n)v1[j]<-(j-sum(fj[1:j]))/n
psi<-rep(0,n)
for(j in 1:n){
if(v1[j]>0)psi[j]<-crit*exp(0-crit^2/(2*v1[j]))/sqrt(2*pi*v1[j]^3)
}
res<-mean(fj*psi)
res
}

lband.fun2<-function(m,crit,alpha=.05){
x<-m[,1]
y<-m[,2]
val<-abs(alpha-lband.fun(x,y,crit))
val
}
qdec<-function(x){
#
# compute deciles using single order statistics
# (function deciles uses Harrell-Davis estimator)
#
vals<-NA
for(i in 1:9){
vals[i]<-qest(x,i/10)
}
vals
}
m2way<-function(J,K,x,est=hd,alpha=.05,nboot=600,SEED=TRUE,grp=NA,pr=TRUE,...){
#
# Two-way ANOVA based on forming averages
#
#  By default
#  est=hd meaning that medians are used with the Harrell-Davis estimator.
#
#   The data are assumed to be stored in x in list mode or in a matrix.
#  If grp is unspecified, it is assumed x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#  x[[j+1]] is the data for level 2,1, etc.
#  If the data are in wrong order, grp can be used to rearrange the
#  groups. For example, for a two by two design, grp<-c(2,4,3,1)
#  indicates that the second group corresponds to level 1,1;
#  group 4 corresponds to level 1,2; group 3 is level 2,1;
#  and group 1 is level 2,2.
#
#   Missing values are automatically removed.
#
JK<-J*K
if(is.data.frame(x))x=as.matrix(x)
xcen<-list()
        if(is.matrix(x))
                x <- listm(x)
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        if(!is.na(grp[1])) {
                yy <- x
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
for(j in 1:JK){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
xx<-list()
mloc<-NA
for(i in 1:JK){
xx[[i]]<-x[[i]]
mloc[i]<-est(xx[[i]],...)
xcen[[i]]<-xx[[i]]-mloc[i]
}
x<-xx
mat<-matrix(mloc,nrow=J,ncol=K,byrow=T)
leva<-apply(mat,1,mean) # J averages over columns
levb<-apply(mat,2,mean)
gm<-mean(levb)
testa<-sum((leva-mean(leva))^2)
testb<-sum((levb-mean(levb))^2)
testab<-NA
tempab<-matrix(NA,nrow=J,ncol=K)
for(j in 1:J){
for(k in 1:K){
tempab[j,k]<-mat[j,k]-leva[j]-levb[k]+gm
}}
testab<-sum(tempab^2)
bvec<-matrix(NA,nrow=JK,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
for(j in 1:JK){
if(pr)print(paste("Working on group ",j))
data<-matrix(sample(xcen[[j]],size=length(xcen[[j]])*nboot,replace=TRUE),
nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # JK by nboot matrix, jth row contains
#                          bootstrapped  estimates for jth group
}
boota<-NA
bootb<-NA
bootab<-NA
for(i in 1:nboot){
mat<-matrix(bvec[,i],nrow=J,ncol=K,byrow=T)
leva<-apply(mat,1,mean) # J averages over columns
levb<-apply(mat,2,mean)
gm<-mean(mat)
boota[i]<-sum((leva-mean(leva))^2)
bootb[i]<-sum((levb-mean(levb))^2)
for(j in 1:J){
for(k in 1:K){
tempab[j,k]<-mat[j,k]-leva[j]-levb[k]+gm
}}
bootab[i]<-sum(tempab^2)}
pvala<-1-sum(testa>=boota)/nboot
pvalb<-1-sum(testb>=bootb)/nboot
pvalab<-1-sum(testab>=bootab)/nboot
list(p.value.A=pvala,p.value.B=pvalb,p.value.AB=pvalab,
test.A=testa,test.B=testb,
test.AB=testab,est.loc=matrix(mloc,nrow=J,ncol=K,byrow=T))
}



b1way<-function(x,est=mest,nboot=599,SEED=TRUE,...){
#
#   Test the hypothesis that J measures of location are equal
#   using the percentile bootstrap method.
#   By default, M-estimators are compared using 599 bootstrap samples.
#
#   The data are assumed to be stored in x in list mode.  Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#
#
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or a matrix.")
J<-length(x)
for(j in 1:J)x[[j]]=elimna(x[[j]])
nval<-vector("numeric",length(x))
gest<-vector("numeric",length(x))
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
bvec<-matrix(0,J,nboot)
#print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
#print(paste("Working on group ",j))
nval[j]<-length(x[[j]])
gest[j]<-est(x[[j]])
xcen<-x[[j]]-est(x[[j]],...)
data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # A J by nboot matrix
#                     containing the bootstrap values of est.
}
teststat<-wsumsq(gest,nval)
testb<-apply(bvec,2,wsumsq,nval)
p.value<-1 - sum(teststat >= testb)/nboot
teststat<-wsumsq(gest,nval)
if(teststat == 0)p.value <- 1
list(teststat=teststat,p.value=p.value)
}


lintest<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,SEED=TRUE,
outfun=out,...){
#
# Test the hypothesis that the regression surface is a plane.
# Stute et al. (1998, JASA, 93, 141-149).
#
if(SEED)set.seed(2)
if(identical(regfun,Qreg))print('When using Qreg, be sure to include res.vals=TRUE')
x<-as.matrix(x)
d<-ncol(x)
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
x<-as.matrix(x)
y<-temp[,d+1]
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
x<-as.matrix(x)
y<-y[flag]
}
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
reg<-regfun(x,y,...)
yhat<-y-reg$residuals
print("Taking bootstrap sample, please wait.")
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-sqrt(12)*(data-.5) # standardize the random numbers.
rvalb<-apply(data,1,lintests1,yhat,reg$residuals,mflag,x,regfun,...)
# An n x nboot matrix of R values
rvalb<-rvalb/sqrt(length(y))
dstatb<-apply(abs(rvalb),2,max)
wstatb<-apply(rvalb^2,2,mean)
# compute test statistic
v<-c(rep(1,length(y)))
rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...)
rval<-rval/sqrt(length(y))
dstat<-max(abs(rval))
wstat<-mean(rval^2)
ib<-round(nboot*(1-alpha))
p.value.d<-1-sum(dstat>=dstatb)/nboot
p.value.w<-1-sum(wstat>=wstatb)/nboot
list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w)
}


tauloc<-function(x,cval=4.5){
#
# Compute the tau measure of location as described in
# Yohai and Zamar (JASA, 83, 406-413).
#
x<-elimna(x)
s<-qnorm(.75)*mad(x)
y<-(x-median(x))/s
W<-(1-(y/cval)^2)^2
flag<-(abs(W)>cval)
W[flag]<-0
val<-sum(W*x)/sum(W)
val
}

tauvar<-function(x,cval=3){
#
# Compute the tau measure of scale as described in
# Yohai and Zamar (JASA, 1988, 83, 406-413).
# The computational method is described in Maronna and Zamar
# (Technometrics, 2002, 44, 307-317)
#  see p. 310
#
x<-elimna(x)
s<-qnorm(.75)*mad(x)
y<-(x-tauloc(x))/s
cvec<-rep(cval,length(x))
W<-apply(cbind(y^2,cvec^2),1,FUN="min")
val<-s^2*sum(W)/length(x)
val
}

gkcor<-function(x,y,varfun=tauvar,ccov=FALSE,...){
#
# Compute a correlation coefficient using the Gnanadesikan-Ketterning
# estimator.
#  ccov=T, computes covariance instead.
# (cf. Marrona & Zomar, 2002, Technometrics
#
val<-.25*(varfun(x+y,...)-varfun(x-y,...))
if(!ccov)val<-val/(sqrt(varfun(x,...))*sqrt(varfun(y,...)))
val
}
covroc<-function(x){
#
# compute Rocke's TBS covariance matrix
#
 library(robust)
temp<-covRob(x,estim="M")
val<-temp[2]$cov
val
}
indt<-function(x,y,nboot=500,flag=1,SEED=TRUE){
#
# Test the hypothesis of independence between x and y by
# testing the hypothesis that the regression surface is a horizontal plane.
# Stute et al. (1998, JASA, 93, 141-149).
#
#  flag=1 gives Kolmogorov-Smirnov test statistic
#  flag=2 gives the Cramer-von Mises test statistic
#  flag=3 causes both test statistics to be reported.
#
#  tr=0 results in the Cramer-von Mises test statistic when flag=2
#      With tr>0, a trimmed version of the test statistic is used.
#
#  Modified Dec 2005.
#
tr=0
#if(tr<0)stop("Amount trimmed must be > 0")
#if(tr>.5)stop("Amount trimmed must be <=.5")
if(SEED)set.seed(2)
x<-as.matrix(x)
# First, eliminate any rows of data with missing values.
temp <- cbind(x, y)
        temp <- elimna(temp)
        pval<-ncol(temp)-1
        x <- temp[,1:pval]
        y <- temp[, pval+1]
x<-as.matrix(x)
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
# ith row of mflag indicates which rows of the matrix x are less
# than or equal to ith row of x
#
yhat<-mean(y)
res<-y-yhat
print("Taking bootstrap sample, please wait.")
data<-matrix(runif(length(y)*nboot),nrow=nboot)#
data<-(data-.5)*sqrt(12) # standardize the random numbers.
rvalb<-apply(data,1,regts1,yhat,res,mflag,x,tr)
# An n x nboot matrix of R values
rvalb<-rvalb/sqrt(length(y))
dstatb<-apply(abs(rvalb),2,max)
wstatb<-apply(rvalb^2,2,mean,tr=tr)
v<-c(rep(1,length(y)))
rval<-regts1(v,yhat,res,mflag,x,tr=0)
rval<-rval/sqrt(length(y))
dstat<-NA
wstat<-NA
critd<-NA
critw<-NA
p.vald<-NA
p.valw<-NA
if(flag==1 || flag==3){
dstat<-max(abs(rval))
p.vald<-1-sum(dstat>=dstatb)/nboot
}
if(flag==2 || flag==3){
wstat<-mean(rval^2,tr=tr)
p.valw<-1-sum(wstat>=wstatb)/nboot
}
list(dstat=dstat,wstat=wstat,p.value.d=p.vald,p.value.w=p.valw)
}


taulc<-function(x,mu.too=F){
#
val<-tauvar(x)
if(mu.too){
val[2]<-val
val[1]<-tauloc(x)
}
val
}


trimww.sub<-function(cmat,vmean,vsqse,h,J,K){
#
#  This function is used by trimww
#
#  The function performs a variation of Johansen's test of C mu = 0 for
#  a within by within design
#  C is a k by p matrix of rank k and mu is a p by 1 matrix of
#  of unknown  medians.
#  The argument cmat contains the matrix C.
#  vmean is a vector of length p containing the p medians
#  vsqe is matrix containing the
#  estimated covariances among the medians
#  h is  the sample size
#
p<-J*K
yvec<-matrix(vmean,length(vmean),1)
test<-cmat%*%vsqse%*%t(cmat)
invc<-solve(test)
test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec
temp<-0
mtem<-vsqse%*%t(cmat)%*%invc%*%cmat
temp<-(sum(diag(mtem%*%mtem))+(sum(diag(mtem)))^2)/(h-1)
A<-.5*sum(temp)
cval<-nrow(cmat)+2*A-6*A/(nrow(cmat)+2)
test<-test/cval
test
}



trimww<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2){
#
#  Perform a J by K anova using trimmed means with
#  repeated measures on both factors.
#
#  tr=.2 is default trimming
#
#  The R variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(is.list(x))x<-elimna(matl(x))
if(is.matrix(x))x<-elimna(x)
data<-x
if(is.matrix(data))data<-listm(data)
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups stored in x is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
tmeans<-0
h<-length(data[[grp[1]]])
v<-matrix(0,p,p)
for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr=tr,na.rm=TRUE)
v<-covmtrim(data,tr=tr)
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,ik)  # Contrast matrix for factor A
#Qa<-johansp(cmat,tmeans,v,h,J,K)
Qa<-trimww.sub(cmat,tmeans,v,h,J,K)
#Qa.siglevel<-1-pf(Qa$teststat,J-1,999)
Qa.siglevel<-1-pf(Qa,J-1,999)
# Do test for factor B
cmat<-kron(ij,ck)  # Contrast matrix for factor B
#Qb<-johansp(cmat,tmeans,v,h,J,K)
Qb<-trimww.sub(cmat,tmeans,v,h,J,K)
Qb.siglevel<-1-pf(Qb,K-1,999)
# Do test for factor A by B interaction
cmat<-kron(cj,ck)  # Contrast matrix for factor A by B
#Qab<-johansp(cmat,tmeans,v,h,J,K)
Qab<-trimww.sub(cmat,tmeans,v,h,J,K)
Qab.siglevel<-1-pf(Qab,(J-1)*(K-1),999)
list(Qa=Qa,Qa.siglevel=Qa.siglevel,
Qb=Qb,Qb.siglevel=Qb.siglevel,
Qab=Qab,Qab.siglevel=Qab.siglevel)
}


msmedci<-function(x,alpha=.05,nullval=0){
#
# Confidence interval for the median
#
se<-msmedse(x)
est<-median(x)
ci.low<-est-qnorm(1-alpha/2)*se
ci.hi<-est+qnorm(1-alpha/2)*se
test<-(est-nullval)/se
p.value<-2*(1-pnorm(abs(test)))
list(test=test,ci.low=ci.low,ci.hi=ci.hi,p.value=p.value)
}
medcipb<-function(x,alpha=.05,null.val=NA,nboot=500,SEED=TRUE,...){
#
#   Bootstrap confidence interval for the median of single variable.
#
#   Missing values are allowed.
#
x<-elimna(x)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,median) # Bootstrapped values
test<-NULL
if(!is.na(null.val)){
tv<-sum(bvec==null.val)/nboot
test<-sum(bvec>null.val)/nboot+.5*tv
if(test> .5)test<-1-test
test<-2*test
}
bvec<-sort(bvec)
icl<-round(alpha*nboot/2)+1
icu<-nboot-icl-1
cilow<-bvec[icl]
ciup<-bvec[icu]
list(ci.low=cilow,ci.up=ciup,p.value=test)
}

regtest<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE,
grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),xout=FALSE,outfun=outpro,SEED=TRUE,pr=TRUE,...){
#
#  Test the hypothesis that q of the p predictors are equal to
#  some specified constants. By default, the hypothesis is that all
#  p predictors have a coefficient equal to zero.
#  The method is based on a confidence ellipsoid.
#  The critical value is determined with the percentile bootstrap method
#  in conjunction with Mahalanobis distance.
#
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
if(pr)print("Default for outfun is now outpro")
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
x<-as.matrix(x)
if(length(grp)!=length(nullvec))stop("The arguments grp and nullvec must have the same length.")
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
grp<-grp+1
est<-regfun(x,y)$coef
estsub<-est[grp]
bsub<-t(bvec[grp,])
if(length(grp)==1){
m1<-sum((bvec[grp,]-est)^2)/(length(y)-1)
dis<-(bsub-estsub)^2/m1
}
if(length(grp)>1){
mvec<-apply(bsub,2,FUN=mean)
m1<-var(t(t(bsub)-mvec+estsub))
dis<-mahalanobis(bsub,estsub,m1)
}
dis2<-order(dis)
dis<-sort(dis)
critn<-floor((1-alpha)*nboot)
crit<-dis[critn]
test<-mahalanobis(t(estsub),nullvec,m1)
sig.level<-1-sum(test>dis)/nboot
if(length(grp)==2 && plotit){
plot(bsub,xlab="Parameter 1",ylab="Parameter 2")
points(nullvec[1],nullvec[2],pch=0)
xx<-bsub[dis2[1:critn],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(test=test,crit=crit,p.value=sig.level,nullvec=nullvec,est=estsub,n=length(y))
}

reg2ci<-function(x,y,x1,y1,regfun=tsreg,nboot=599,alpha=.05,plotit=TRUE,SEED=TRUE,
xout=FALSE,outfun=outpro,xlab="X",ylab="Y",pr=TRUE,...){
#
#   Compute a .95 confidence interval for the difference between the
#   the intercepts and slopes corresponding to two independent groups.
#   The default regression method is Theil-Sen.
#
#   The predictor values for the first group are
#   assumed to be in the n by p matrix x.
#   The predictors for the second group are in x1
#
#   The default number of bootstrap samples is nboot=599
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimate of
#   the first predictor, etc.
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
x1<-as.matrix(x1)
xx1<-cbind(x1,y1)
xx1<-elimna(xx1)
x1<-xx1[,1:ncol(x1)]
x1<-as.matrix(x1)
y1<-xx1[,ncol(x1)+1]
x=as.matrix(x)
x1=as.matrix(x1)
if(xout){
if(pr)print("outfun now defaults to outpro rather than out")
flag1=outfun(x,...)$keep
flag2=outfun(x1,...)$keep
x=x[flag1,]
y=y[flag1]
x1=x1[flag2,]
y1=y1[flag2]
}
n=length(y)
n[2]=length(y1)
x<-as.matrix(x)
x1<-as.matrix(x1)
est1=regfun(x,y,...)$coef
est2=regfun(x1,y1,...)$coef
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...) # A p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data,1,regboot,x1,y1,regfun,xout=FALSE,...)
bvec<-bvec-bvec1
p1<-ncol(x)+1
regci<-matrix(0,p1,6)
dimnames(regci)<-list(NULL,
c("Parameter","ci.lower","ci.upper","p.value","Group 1","Group 2"))
ilow<-round((alpha/2)*nboot)+1
ihi<-nboot-(ilow-1)
for(i in 1:p1){
temp<-sum(bvec[i,]<0)/nboot+sum(bvec[i,]==0)/(2*nboot)
regci[i,4]<-2*min(temp,1-temp)
bsort<-sort(bvec[i,])
regci[i,2]<-bsort[ilow]
regci[i,3]<-bsort[ihi]
regci[,1]<-c(0:ncol(x))
}
regci[,5]=est1
regci[,6]=est2
if(ncol(x)==1 && plotit){
plot(c(x,x1),c(y,y1),type="n",xlab=xlab,ylab=ylab)
points(x,y)
points(x1,y1,pch="+")
abline(regfun(x,y,...)$coef)
abline(regfun(x1,y1,...)$coef,lty=2)
}
list(n=n,output=regci)
}
med2mcp<-function(J,K,x,alpha=.05,nboot=NA,grp=NA,est=median,bhop=FALSE,SEED=TRUE,
...){
#
#   Multiple comparisons for  J by K designs using percentile
#   bootstrap and medians (independent groups).
#
#   A percentile bootstrap method with Rom's method is used.
#
#   The data are assumed to be stored as done in the function t2way
#
#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
chk<-con2way(J,K)
test1<-medpb(x,alpha=alpha,nboot=nboot,grp=grp,est=est,con=chk$conA,bhop=FALSE,SEED=TRUE,...)
test2<-medpb(x,alpha=alpha,nboot=nboot,grp=grp,est=est,con=chk$conB,bhop=FALSE,SEED=TRUE,...)
test3<-medpb(x,alpha=alpha,nboot=nboot,grp=grp,est=est,con=chk$conAB,bhop=FALSE,SEED=TRUE,...)
list(Factor.A=test1,Factor.B=test2,Factor.AB=test3)
}

anova1<-function(x){
#
# conventional one-way anova
#
if(is.matrix(x))x<-listm(x)
A<-0
B<-0
C<-0
N<-0
for(j in 1:length(x)){
N<-N+length(x[[j]])
A<-A+sum(x[[j]]^2)
B<-B+sum(x[[j]])
C<-C+(sum(x[[j]]))^2/length(x[[j]])
}
SST<-A-B^2/N
SSBG<-C-B^2/N
SSWG<-A-C
nu1<-length(x)-1
nu2<-N-length(x)
MSBG<-SSBG/nu1
MSWG<-SSWG/nu2
FVAL<-MSBG/MSWG
pvalue<-1-pf(FVAL,nu1,nu2)
list(F.test=FVAL,p.value=pvalue,df1=nu1,df2=nu2,MSBG=MSBG,MSWG=MSWG)
}
twodcor8<-function(x,y){
#
#   Compute a .95 confidence interval for
#   the difference between two dependent
#   correlations corresponding to two independent
#   goups.
#
#
# x is a matrix with two columns,
# y is a vector
#  Goal: test equality of Pearson correlation for x1, y versus x2, y.
#
# For general use, twodcor10 is probably better,
# which calls this function and estimates an adjusted p-value.
#
X<-elimna(cbind(x,y))
Z1<-(X[,1]-mean(X[,1]))/sqrt(var(X[,1]))
Z2<-(X[,2]-mean(X[,2]))/sqrt(var(X[,2]))
temp<-cor.test(Z1-Z2,X[,3])
temp<-temp[3]$p.value
list(p.value=temp)
}

twodcor10<-function(x,y,nboot=500,SEED=TRUE,alpha=.05){
#
#   Compute a .95 confidence interval for
#   the difference between two dependent
#   correlations corresponding to two independent
#   goups.
#
# x is a matrix with two columns,
# y is a vector
#  Goal: test equality of Pearson correlation for x1, y versus x2, y.
#
#   This function uses an adjusted p-value, the adjustment
#  being made assuming normality.
#
#  nboot indicates how many samples from a normal distribution
#  are used to approximate the adjustment.
#
# Simulations suggest that this fucntion
#  continues to work well under non-normality.
#
if(SEED)set.seed(2)
X<-elimna(cbind(x,y))
if(ncol(X)!=3)stop("x should be a matrix with  two columns")
n<-nrow(X)
cval<-cor(X)
nval<-(cval[1,3]+cval[2,3])/2
cmat<-bdiag(1,3,nval)
cmat[1,2]<-nval
cmat[2,1]<-nval
pval<-NA
for(i in 1:nboot){
d<-rmul(n,p=3,cmat=cmat)
pval[i]<-twodcor8(d[,1:2],d[,3])$p.value
}
pval<-sort(pval)
iv<-round(alpha*nboot)
est.p<-pval[iv]
adp<-alpha/est.p
test<-twodcor8(X[,1:2],X[,3])$p.value
p.value<-test*adp
if(p.value>1)p.value<-1
list(p.value=p.value)
}

matsplit<-function(m,coln=NULL){
#
# Column coln of matrix m is assumed to have a binary variable
# This functions removes rows with missing values
# and then splits m into two matrices based on the values
# in column coln
#
if(is.null(coln))stop("specify coln")
x<-m[,coln]
val<-unique(x)
if(length(val)>2)stop("More than two values detected in specified column")
flag<-(x==val[1])
m1<-m[flag,]
m2<-m[!flag,]
list(m1=m1,m2=m2)
}
tkmcp<-function(x,alpha=.05,ind.pval=T){
#
# conventional Tukey-Kramer multiple comparison procedure
# for all pairiwise comparisons.
#
#  ind.pval=T, computes p-value for each individual test
#  ind.pval=F computes p-value based on controlling the
#  familywise error rate. (The alpha level at which the
#  Tukey-Kramer test would reject.)
#
if(is.matrix(x))x<-listm(x)
J<-length(x)
A<-0
B<-0
C<-0
N<-0
for(j in 1:J){
N<-N+length(x[[j]])
A<-A+sum(x[[j]]^2)
B<-B+sum(x[[j]])
C<-C+(sum(x[[j]]))^2/length(x[[j]])
}
SST<-A-B^2/N
SSBG<-C-B^2/N
SSWG<-A-C
nu1<-length(x)-1
nu2<-N-length(x)
MSBG<-SSBG/nu1
MSWG<-SSWG/nu2
numcom<-length(x)*(length(x)-1)/2
output<-matrix(nrow=numcom,ncol=7)
dimnames(output)<-list(NULL,c("Group","Group","t.test","est.difference",
"ci.lower","ci.upper","p.value"))
ic<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
ic<-ic+1
output[ic,1]<-j
output[ic,2]<-k
dif<-mean(x[[j]])-mean(x[[k]])
output[ic,3]<-abs(dif)/sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2)
output[ic,4]<-dif
crit<-qtukey(1-alpha,length(x),nu2)
output[ic,5]<-dif-crit*sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2)
output[ic,6]<-dif+crit*sqrt(MSWG*(1/length(x[[j]])+1/length(x[[k]]))/2)
if(!ind.pval)output[ic,7]<-1-ptukey(output[ic,3],length(x),nu2)
if(ind.pval)output[ic,7]<-2*(1-pt(output[ic,3],nu2))
}}}
output
}

lstest4<-function(vstar,yhat,res,x){
ystar <- yhat + res * vstar
p<-ncol(x)
pp<-p+1
vals<-t(as.matrix(lsfit(x,ystar)$coef[2:pp]))
sa<-lsfitNci4(x, ystar)$cov[-1, -1]
sai<-solve(sa)
test<-(vals)%*%sai%*%t(vals)
test<-test[1,1]
test
}
twodcor10<-function(x,y,nboot=500,SEED=TRUE,alpha=.05){
#
#   Compute a .95 confidence interval for
#   the difference between two dependent
#   correlations corresponding to two independent
#   goups.
#
# x is a matrix with two columns,
# y is a vector
#  Goal: test equality of Pearson correlation for x1, y versus x2, y.
#
#   This function uses an adjusted p-value, the adjustment
#  being made assuming normality.
#
#  nboot indicates how many samples from a normal distribution
#  are used to approximate the adjustment.
#
# Simulations suggest that this fucntion
#  continues to work well under non-normality.
#
if(SEED)set.seed(2)
X<-elimna(cbind(x,y))
if(ncol(X)!=3)stop("x should be a matrix with  two columns")
n<-nrow(X)
cval<-cor(X)
nval<-(cval[1,3]+cval[2,3])/2
cmat<-bdiag(1,3,nval)
cmat[1,2]<-nval
cmat[2,1]<-nval
pval<-NA
for(i in 1:nboot){
d<-rmul(n,p=3,cmat=cmat)
pval[i]<-twodcor8(d[,1:2],d[,3])$p.value
}
pval<-sort(pval)
iv<-round(alpha*nboot)
est.p<-pval[iv]
adp<-alpha/est.p
test<-twodcor8(X[,1:2],X[,3])$p.value
p.value<-test*adp
if(p.value>1)p.value<-1
list(p.value=p.value)
}

twodcor8<-function(x,y){
#
#   Compute a .95 confidence interval for
#   the difference between two dependent
#   correlations corresponding to two independent
#   goups.
#
#
# x is a matrix with two columns,
# y is a vector
#  Goal: test equality of Pearson correlation for x1, y versus x2, y.
#
# For general use, twodcor10 is probably better,
# which calls this function and estimates an adjusted p-value.
#
X<-elimna(cbind(x,y))
Z1<-(X[,1]-mean(X[,1]))/sqrt(var(X[,1]))
Z2<-(X[,2]-mean(X[,2]))/sqrt(var(X[,2]))
temp<-cor.test(Z1-Z2,X[,3])
temp<-temp[3]$p.value
list(p.value=temp)
}

lsfitNci4<-function(x,y,alpha=.05){
#
# Compute confidence for least squares
# regression using heteroscedastic method
# recommended by Cribari-Neto (2004).
#
x<-as.matrix(x)
if(nrow(x) != length(y))stop("Length of y does not match number of x values")
m<-cbind(x,y)
m<-elimna(m)
y<-m[,ncol(x)+1]
temp<-lsfit(x,y)
x<-cbind(rep(1,nrow(x)),m[,1:ncol(x)])
xtx<-solve(t(x)%*%x)
h<-diag(x%*%xtx%*%t(x))
n<-length(h)
d<-(n*h)/sum(h)
for(i in 1:length(d)){
	d[i]<-min(4, d[i])
}
hc4<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^d)%*%x%*%xtx
df<-nrow(x)-ncol(x)
crit<-qt(1-alpha/2,df)
al<-ncol(x)
ci<-matrix(NA,nrow=al,ncol=3)
for(j in 1:al){
ci[j,1]<-j
ci[j,2]<-temp$coef[j]-crit*sqrt(hc4[j,j])
ci[j,3]<-temp$coef[j]+crit*sqrt(hc4[j,j])
}
list(ci=ci,stand.errors=sqrt(diag(hc4)), cov=hc4)
}


hc4qtest<-function(x,y,k,nboot=500,SEED=TRUE){
#
# Test the hypothesis that a OLS slope is zero using HC4 wild bootstrap using quasi-t test.
# k is the index of coefficient being tested
#
if(SEED)set.seed(2)
x<-as.matrix(x)
# First, eliminate any rows of data with missing values.
temp <- cbind(x, y)
        temp <- elimna(temp)
        pval<-ncol(temp)-1
        x <- temp[,1:pval]
        y <- temp[, pval+1]
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
temp<-lsfit(x,y)
yhat<-mean(y)
res<-y-yhat
s<-lsfitNci4(x, y)$cov[-1, -1]
s<-as.matrix(s)
si<-s[k,k]
b<-temp$coef[2:pp]
qtest<-b[k]/sqrt(si)
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-(data-.5)*sqrt(12) # standardize the random numbers.
rvalb<-apply(data,1,lsqtest4,yhat,res,x, k)
sum<-sum(abs(rvalb)>= abs(qtest[1]))
p.val<-sum/nboot
list(p.value=p.val)
}

lsqtest4<-function(vstar,yhat,res,x, k){
ystar <- yhat + res * vstar
p<-ncol(x)
pp<-p+1
vals<-lsfit(x,ystar)$coef[2:pp]
sa<-lsfitNci4(x, ystar)$cov[-1, -1]
sa<-as.matrix(sa)
sai<-sa[k,k]
test<-vals[k]/sqrt(sai)
test
}
mrm1way<-function(x,q=.5,grp=NA,bop=FALSE,SEED=TRUE,mop=FALSE){
#  Perform a within groups one-way ANOVA using medians
#
#  If grp specified, do analysis on only the groups in grp.
#  Example: grp=(c(1,4)), compare groups 1 and 4 only.
#
#  bop=F, use non-bootstrap estimate of covariance matrix
#  bop=T, use bootstrap
#
#  mop=T, use usual median, otherwise use single order statistic
#
if(is.data.frame(x))x=as.matrix(x)
if(SEED)set.seed(2)
if(is.matrix(x))x<-listm(x)
K<-length(x) # Number of groups
p<-K
if(is.na(grp[1]))grp<-c(1:p)
x<-x[grp]
if(!is.list(x))stop("Data are not stored in list mode or a matrix")
tmeans<-0
n<-length(x[[1]])
v<-matrix(0,p,p)
if(!mop){
for (i in 1:p)tmeans[i]<-qest(x[[i]],q=q)
if(!bop)v<-covmmed(x,q=q)
if(bop)v<-bootcov(x,pr=FALSE,est=qest,q=q)
}
if(mop){
tmeans[i]<-median(x[[i]])
v<-bootcov(x,pr=FALSE)
}
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
Qb<-johansp(ck,tmeans,v,n,1,K)
#print(Qb)
#p.value<-1-pf(Qb$teststat,K-1,999)
p.value<-Qb$siglevel
if(n>=20)p.value<-1-pf(Qb$teststat,K-1,999)
list(test.stat=Qb$teststat,p.value=p.value)
}
rmul<-function(n,p=2,cmat=diag(rep(1,p)),rho=NA,
mar.fun=rnorm,...){
#
# generage n observations from a p-variate dist
# By default, use normal distributions.
#
#To get a g-and-h distribution
# for the marginals, use mar.fun=ghdist.
# Example rmul(30,p=4,rho=.3,mar.fun=ghdist,g=.5,h=.2) will
# generate 30 vectors from a 4-variate distribution where the marginals
# have a g-and-h distribution with g=.5 and h=.2.
#
# This function is similar to ghmul, only here, generate the marginal values
# and then transform the data to have correlation matrix cmat
#
# cmat is the correlation matrix
# if argument
# rho is specified, the correlations are taken to
# have a this common value.
#
# Method (e.g. Browne, M. W. (1968) A comparison of factor analytic
# techniques. Psychometrika, 33, 267-334.
#  Let U'U=R be the Cholesky decomposition of R. Generate independent data
#  from some dist yielding X. Then XU has population correlation matrix R
#
if(!is.na(rho)){
if(abs(rho)>1)stop("rho must be between -1 and 1")
cmat<-matrix(rho,p,p)
diag(cmat)<-1
}
np<-n*p
x<-matrix(mar.fun(np,...),nrow=n,ncol=p)
rmat<-matsqrt(cmat)
x<-x%*%rmat
x
}


L1medcen <- function(X, tol = 1e-08, maxit = 200, m.init = apply(X, 2, median),
                     trace = FALSE)
{
  ## L1MEDIAN calculates the multivariate L1 median
  ## I/O: mX=L1median(X,tol);
  ##
  ## X  : the data matrix
  ## tol: the convergence criterium:
  ##      the iterative process stops when ||m_k - m_{k+1}|| < tol.
  ## maxit: maximum number of iterations
  ## init.m: starting value for m; typically coordinatewise median
  ##
  ## Ref: Hossjer and Croux (1995)
  ##  "Generalizing Univariate Signed Rank Statistics for Testing
  ##   and Estimating a Multivariate Location Parameter";
  ##   Non-parametric Statistics, 4, 293-308.
  ##
  ## Implemented by Kristel Joossens
  ## Many thanks to Martin Maechler for improving the program!

  ## slightly faster version of 'sweep(x, 2, m)':
  centr <- function(X,m) X - rep(m, each = n)
  ## computes objective function in m based on X and a:
  mrobj <- function(X,m) sum(sqrt(rowSums(centr(X,m)^2)))
  d <- dim(X); n <- d[1]; p <- d[2]
  m <- m.init
  if(!is.numeric(m) || length(m) != p)
      stop("'m.init' must be numeric of length p =", p)
  k <- 1
  if(trace) nstps <- 0
  while (k <= maxit) {
    mold <- m
    obj.old <- if(k == 1) mrobj(X,mold) else obj
    X. <- centr(X, m)
    Xnorms <- sqrt(rowSums(X. ^ 2))
    inorms <- order(Xnorms)
    dx <- Xnorms[inorms] # smallest first, i.e., 0's if there are
    X  <- X [inorms,]
    X. <- X.[inorms,]
    ## using 1/x weighting {MM: should this be generalized?}
    w <- ## (0 norm -> 0 weight) :
        if (all(dn0 <- dx != 0))  1/dx
        else c(rep.int(0, length(dx)- sum(dn0)), 1/dx[dn0])
    delta <- colSums(X. * rep(w,p)) / sum(w)
    nd <- sqrt(sum(delta^2))

    maxhalf <- if (nd < tol) 0 else ceiling(log2(nd/tol))
    m <- mold + delta    # computation of a new estimate
    ## If step 'delta' is too far, we try halving the stepsize
    nstep <- 0
    while ((obj <- mrobj(X, m)) >= obj.old && nstep <= maxhalf) {
      nstep <- nstep+1
      m <- mold + delta/(2^nstep)
    }
    if(trace) {
        if(trace >= 2)
            cat(sprintf("k=%3d obj=%19.12g m=(",k,obj),
                paste(formatC(m),collapse=","),
                ")", if(nstep) sprintf(" nstep=%2d halvings",nstep) else "",
                "\n", sep="")
        nstps[k] <- nstep
    }
    if (nstep > maxhalf) { ## step halving failed; keep old
        m <- mold
        ## warning("step halving failed in ", maxhalf, " steps")
        break
      }
    k <- k+1
  }
  if (k > maxit) warning("iterations did not converge in ", maxit, " steps")
  if(trace == 1)
      cat("needed", k, "iterations with a total of",
          sum(nstps), "stepsize halvings\n")
#  return(m)
list(center=m)
}
spatcen<-function(x){
#
# compute spatial median
# x is an n by p matrix
#
if(!is.matrix(x))stop("x must be a matrix")
x<-elimna(x)
START<-apply(x,2,median)
#val<-nelder(x,ncol(x),spat.sub,START=START)
#val<-optim(par=START,x,ncol(x),spat.sub,START=START)
val=optim(START,spat.sub,x=x,method='BFGS')$par             
list(center=val)
}
olswbtest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,alpha=.05){
#
# Compute confidence intervals for all OLS slopes
# using HC4 wild bootstrap and Wald test.
#
# This function calls the functions
# olshc4 and
# lstest4
#
if(SEED)set.seed(2)
x<-as.matrix(x)
# First, eliminate any rows of data with missing values.
temp <- cbind(x, y)
        temp <- elimna(temp)
        pval<-ncol(temp)-1
        x <- temp[,1:pval]
        y <- temp[, pval+1]
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
temp<-lsfit(x,y)
yhat<-mean(y)
res<-y-yhat
s<-olshc4(x, y)$cov[-1, -1]
si<-solve(s)
b<-temp$coef[2:pp]
test=abs(b)*sqrt(diag(si))
if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot)
if(!RAD){
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-(data-.5)*sqrt(12) # standardize the random numbers.
}
rvalb<-apply(data,1,olswbtest.sub,yhat,res,x) #a p by nboot matrix
rvalb=abs(rvalb)
ic=round((1-alpha)*nboot)
if(p==1)rvalb=t(as.matrix(rvalb))
temp=apply(rvalb,1,sort) # nboot by p matrix
pvals=NA
for(j in 1:p)pvals[j]=mean((rvalb[j,]>=test[j]))
cr=temp[ic,]
ci=b-cr/diag(sqrt(si)) #dividing because si is reciprocal of sq se
ci=cbind(ci,b+cr/diag(sqrt(si)))
ci=cbind(b,ci)
ci=cbind(c(1:nrow(ci)),ci,test,pvals)
dimnames(ci)<-
list(NULL,c("Slope_No.","Slope_est","Lower.ci","Upper.ci","Test.Stat","p.value"))
ci
}
olswbtest.sub<-function(vstar,yhat,res,x){
ystar <- yhat + res * vstar
p<-ncol(x)
pp<-p+1
vals<-t(as.matrix(lsfit(x,ystar)$coef[2:pp]))
sa<-olshc4(x, ystar)$cov[-1, -1]
sai<-solve(sa)
test<-vals*sqrt(diag(sai))
test
}



regpre<-function(x,y,regfun=lsfit,error=absfun,nboot=100,adz=TRUE,
mval=round(5*log(length(y))),model=NULL,locfun=mean,pr=FALSE,
xout=FALSE,outfun=out,STAND=FALSE,
plotit=TRUE,xlab="Model Number",ylab="Prediction Error",SEED=TRUE,...){
#
#   Estimate prediction error using the regression method
#   regfun. The .632 method is used.
#   (See Efron and Tibshirani, 1993, pp. 252--254)
#
#   The predictor values are assumed to be in the n-by-p matrix x.
#   The default number of bootstrap samples is nboot=100
#
#   Prediction error is the expected value of the function error.
#   The argument error defaults to squared error.
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimate of
#   the first predictor, etc.
#
#   The default value for mval, the number of observations to resample
#   for each of the B bootstrap samples is based on results by
#   Shao (JASA, 1996, 655-665). (Resampling n vectors of observations
#   model selection may not lead to the correct model as n->infinity.
#
#   The argument model should have list mode, model[[1]] indicates
#   which predictors are used in the first model. For example, storing
#   1,4 in model[[1]] means predictors 1 and 4 are being considered.
#   If model is not specified, and number of predictors is at most 5,
#   then all models are considered.
#
#   If adz=T, added to the models to be considered is where
#   all regression slopes are zero. That is, use measure of location only
#   corresponding to
#   locfun.
#
if(pr){
print("By default, least squares regression is used, ")
print("But from Wilcox, R. R. 2008, Journal of Applied Statistics, 35, 1-8")
print("Setting regfun=tsreg appears to be a better choice for general use.")
print("That is, replace least squares with the Theil-Sen estimator")
print("Note: Default for the argument error is now absfun")
print(" meaning absolute error is used")
print("To use squared error, set error=sqfun")
}
x<-as.matrix(x)
d<-ncol(x)
p1<-d+1
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
y<-temp[,d+1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
if(!STAND)flag<-outfun(x,plotit=FALSE,...)$keep
if(STAND)flag<-outpro(x,STAND=TRUE,plotit=FALSE)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(is.null(model)){
if(d<=5)model<-modgen(d,adz=adz)
if(d>5)model[[1]]<-c(1:ncol(x))
}
mout<-matrix(NA,length(model),5,dimnames=list(NULL,c("apparent.error",
"boot.est","err.632","var.used","rank")))
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=mval*nboot,replace=TRUE),nrow=nboot)
bid<-apply(data,1,idb,length(y))
#  bid is an n by nboot matrix. If the jth bootstrap sample from
#  1, ..., mval contains the value i, bid[i,j]=0; otherwise bid[i,j]=1
for (imod in 1:length(model)){
nmod=length(model[[imod]])-1
temp=c(nmod:0)
mout[imod,4]=sum(model[[imod]]*10^temp)
if(sum(model[[imod]]==0)!=1){
xx<-x[,model[[imod]]]
xx<-as.matrix(xx)
if(sum(model[[imod]]==0)!=1)bvec<-apply(data,1,regpres1,xx,y,regfun,mval,...)
# bvec is a p+1 by nboot matrix. The first row
# contains the bootstrap intercepts, the second row
# contains the bootstrap values for first predictor, etc.
if(sum(model[[imod]]==0)!=1)yhat<-cbind(1,xx)%*%bvec
if(sum(model[[imod]]==0)==1){
bvec0<-matrix(0,nrow=p1,ncol=nboot)
for(it in 1:nboot){
bvec0[1,it]<-locfun(y[data[it,]])
}
yhat<-cbind(1,x)%*%bvec0
}
# yhat is n by nboot matrix of predicted values based on
                           # bootstrap regressions.
bi<-apply(bid,1,sum) # B sub i in notation of Efron and Tibshirani, p. 253
temp<-(bid*(yhat-y))
diff<-apply(temp,1,error)
ep0<-sum(diff/bi)/length(y)
aperror<-error(regfun(xx,y,...)$resid)/length(y) # apparent error
regpre<-.368*aperror+.632*ep0
mout[imod,1]<-aperror
mout[imod,3]<-regpre
temp<-yhat-y
diff<-apply(temp,1,error)
mout[imod,2]<-sum(diff)/(nboot*length(y))
}
if(sum(model[[imod]]==0)==1){
mout[imod,3]<-locpre(y,error=error,est=locfun,SEED=SEED,mval=mval)
}}
mout[,5]=rank(mout[,3])
if(plotit)plot(c(1:nrow(mout)),mout[,3],xlab=xlab,ylab=ylab)
list(estimates=mout)
}
push<-function(mat){
#
# For every column of mat, move entry down 1
#
matn<-matrix(NA,nrow=nrow(mat),ncol=ncol(mat))
Jm<-nrow(mat)-1
for (k in 1:ncol(mat)){
temp<-mat[,k]
vec<-0
vec[2:nrow(mat)]<-temp[1:Jm]
matn[,k]<-vec
}
matn
}

ancova<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,
pr=TRUE,xout=FALSE,outfun=out,LP=TRUE,...){
#
# Compare two independent  groups using the ancova method
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  Assume data are in x1 y1 x2 and y2
#
#  sm=T will create smooths using bootstrap bagging.
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function')
if(length(x1)!=length(y1))stop("x1 and y1 have different lengths")
if(length(x2)!=length(y2))stop("x2 and y2 have different lengths")
xy=elimna(cbind(x1,y1))
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
if(pr){
print("NOTE: Confidence intervals are adjusted to control the probability")
print("of at least one Type I error.")
print("But p-values are not")
}
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
if(is.na(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,10)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi","p.value","crit.val"))
for (i in 1:5){
g1<-y1[near(x1,x1[isub[i]],fr1)]
g2<-y2[near(x2,x1[isub[i]],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test<-yuen(g1,g2,tr=tr)
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(g1)
mat[i,3]<-length(g2)
mat[i,4]<-test$dif
mat[i,5]<-test$teststat
mat[i,6]<-test$se
critv<-NA
if(alpha==.05)critv<-smmcrit(test$df,5)
if(alpha==.01)critv<-smmcrit01(test$df,5)
if(is.na(critv))critv<-smmval(test$df,5,alpha=alpha)
mat[i,10]<-critv
cilow<-test$dif-critv*test$se
cihi<-test$dif+critv*test$se
mat[i,7]<-cilow
mat[i,8]<-cihi
mat[i,9]<-test$p.value
}}
if(!is.na(pts[1])){
if(length(pts)>=29)stop("At most 28 points can be compared")
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),10)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","TEST","se","ci.low","ci.hi",
"p.value","crit.val"))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test<-yuen(g1,g2,tr=tr)
mat[i,1]<-pts[i]
mat[i,2]<-length(g1)
mat[i,3]<-length(g2)
if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i]))
if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i]))
mat[i,4]<-test$dif
mat[i,5]<-test$teststat
mat[i,6]<-test$se
if(length(pts)>=2)critv<-smmcrit(test$df,length(pts))
if(length(pts)==1)critv<-qt(.975,test$df)
cilow<-test$dif-critv*test$se
cihi<-test$dif+critv*test$se
mat[i,7]<-cilow
mat[i,8]<-cihi
mat[i,9]<-test$p.value
mat[i,10]<-critv
}}
if(plotit){
#if(xout){
#flag<-outfun(x1,...)$keep
#x1<-x1[flag]
#y1<-y1[flag]
#flag<-outfun(x2,...)$keep
#x2<-x2[flag]
#y2<-y2[flag]
#}
runmean2g(x1,y1,x2,y2,fr=fr1,est=mean,tr=tr,sm=sm,xout=FALSE,LP=LP,...)
}
list(output=mat)
}


miss2na<-function(m,na.val=NULL){
#
# Convert any missing value, indicatd by na.val,
# to NA.
#
#  Example, if 999 is missing value, use miss2na(m,999)
#
if(is.null(na.val))stop("Specify a missing value")
if(is.vector(m)){
if(!is.list(m)){
flag=(m==na.val)
m[flag]=NA
}}
if(is.matrix(m)){
for(j in 1:ncol(m)){
x=m[,j]
flag=(x==na.val)
x[flag]=NA
m[,j]=x
}}
if(is.list(m)){
for(j in 1:length(m)){
x=m[[j]]
flag=(x==na.val)
x[flag]=NA
m[[j]]=x
}}
m
}

plotCI <- function (x, y = NULL, uiw=NULL, liw = uiw, aui=NULL, ali=aui,
                    err="y", ylim=NULL, sfrac = 0.01, gap=0, add=FALSE,
                    col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab=NULL,
                    ylab=NULL, ...) {
## originally from Bill Venables, R-list
  if (is.list(x)) {
    y <- x$y
    x <- x$x
  }
  if (is.null(y)) {
    if (is.null(x))
      stop("both x and y NULL")
    y <- as.numeric(x)
    x <- seq(along = x)
  }
  if (missing(xlab)) xlab <- deparse(substitute(x))
  if (missing(ylab)) ylab <- deparse(substitute(y))
  if (missing(uiw)) { ## absolute limits
    ui <- aui
    li <- ali
  }
  else { ## relative limits
    if (err=="y") z <- y else z <- x
    if(is.null(uiw))stop("Argument uiw, the width of the interval, must be specified")
    ui <- z + uiw
    li <- z - liw
  }
  if (is.null(ylim)) ylim <- range(c(y, ui, li), na.rm=TRUE)
  if (add) {
    points(x, y, col=col, lwd=lwd, ...)
  } else {
    plot(x, y, ylim = ylim, col=col, lwd=lwd, xlab=xlab, ylab=ylab, ...)
  }
  if (gap==TRUE) gap <- 0.01 ## default gap size
  ul <- c(li, ui)
  if (err=="y") {
    gap <- rep(gap,length(x))*diff(par("usr")[3:4]) # smidge <- diff(par("usr")[1:2]) * sfrac
    smidge <- par("fin")[1] * sfrac
# segments(x , li, x, pmax(y-gap,li), col=col, lwd=lwd, lty=slty)
# segments(x , ui, x, pmin(y+gap,ui), col=col, lwd=lwd, lty=slty)
    arrows(x , li, x, pmax(y-gap,li), col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1)
    arrows(x , ui, x, pmin(y+gap,ui), col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1)
    ## horizontal segments
# x2 <- c(x, x)
# segments(x2 - smidge, ul, x2 + smidge, ul, col=col, lwd=lwd)
  }
  else if (err=="x") {
    gap <- rep(gap,length(x))*diff(par("usr")[1:2])
    smidge <- par("fin")[2] * sfrac
# smidge <- diff(par("usr")[3:4]) * sfrac
    arrows(li, y, pmax(x-gap,li), y, col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1)
    arrows(ui, y, pmin(x+gap,ui), y, col=col, lwd=lwd, lty=slty, angle=90, length=smidge, code=1)
    ## vertical segments
# y2 <- c(y, y)
# segments(ul, y2 - smidge, ul, y2 + smidge, col=col, lwd=lwd)
  }
  invisible(list(x = x, y = y))
}
bdanova2<-function(x1,x2=NULL,alpha=.05,power=.9,delta){
#
#  Do the second stage of the Bishop-Duewicz ANOVA
#
if(is.null(x2[1])){
stage1=bdanova1(x1,alpha=alpha,power=power,delta=delta)
return(list(N=stage1$N,d=stage1$d,crit=stage1$crit))
}
if(!is.null(x2[1])){
if(is.na(delta))stop("A value for delta was not specified")
if(!is.list(x1)){
if(!is.matrix(x1))stop("Data must be stored in a matrix or in list mode")
y<-x1
x1<-list()
for(j in 1:ncol(y))x1[[j]]<-y[,j]
}
if(is.na(delta))stop("A value for delta was not specified")
if(!is.list(x2)){
if(!is.matrix(x2))stop("Data must be stored in matrix or in list mode")
y<-x2
x2<-list()
for(j in 1:ncol(y))x2[[j]]<-y[,j]
}
if(length(x1)!=length(x2))stop("Length of x1 does not match the length of x2")
TT<-NA
U<-NA
J<-length(x1)
nvec<-NA
nvec2<-NA
svec<-NA
for(j in 1:length(x1)){
nvec[j]<-length(x1[[j]])
nvec2[j]<-length(x2[[j]])
svec[j]<-var(x1[[j]])
TT[j]<-sum(x1[[j]])
U[j]<-sum(x2[[j]])
}
temp<-bdanova1(x1,alpha=alpha,power=power,delta=delta)
need<-temp$N-nvec
#for(j in 1:length(x1))print(c(nvec2[j],need[j]))
for(j in 1:length(x1))if(nvec2[j]<need[j]){
print(paste("Warning: For Group", j))
print("The first stage analysis based on bdanova1 reports that a larger")
print(" sample is required than what was found in the argument x2")
}
b<-sqrt(nvec*((nvec+nvec2)*temp$d-svec)/(nvec2*svec))
b<-(b+1)/(nvec+nvec2)
xtil<-TT*(1-nvec2*b)/nvec+b*U
ftil<-sum((xtil-mean(xtil))^2)/temp$d
return(list(test.stat=ftil,crit=temp$crit))
}
}
medr<-function(x,est=median,alpha=.05,nboot=500,grp=NA,op=1,MM=FALSE,cop=3,pr=TRUE,
SEED=TRUE,...){
#
#   Test the hypothesis that the distribution for each pairwise
#   difference has a measure of location = 0
#   By default, the  median is used
#
#   The default number of bootstrap samples is nboot=500
#
#   op controls how depth is measured
#   op=1, Mahalanobis
#   op=2, Mahalanobis based on MCD covariance matrix
#   op=3, Projection distance
#   op=4, Projection distance using FORTRAN version
#
#   for arguments MM and cop, see pdis.
#
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x)){
xx<-list()
for(i in 1:ncol(x)){
xx[[i]]<-x[,i]
}
x<-xx
}
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(grp)){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]]
x<-xx
}
J<-length(x)
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
Jm<-J-1
d<-(J^2-J)/2
data<-list()
bvec<-matrix(NA,ncol=d,nrow=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
for(it in 1:nboot){
for(j in 1:J)data[[j]]<-sample(x[[j]],size=length(x[[j]]),replace=TRUE)
dval<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
dval<-dval+1
bvec[it,dval]<-loc2dif(data[[j]],data[[k]],est=est,...)
}}}}
output<-matrix(NA,nrow=d,ncol=3)
dimnames(output)<-list(NULL,c("Group","Group","psihat"))
tvec<-NA
dval<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
dval<-dval+1
output[dval,1]<-j
output[dval,2]<-k
tvec[dval]<-loc2dif(x[[j]],x[[k]],est=est,...)
output[dval,3]<-tvec[dval]
}}}
tempcen<-apply(bvec,1,mean)
vecz<-rep(0,d)
smat<-var(bvec-tempcen+tvec)
temp<-bvec-tempcen+tvec
bcon<-rbind(bvec,vecz)
if(op==1)dv<-mahalanobis(bcon,tvec,smat)
if(op==2){
smat<-cov.mcd(temp)$cov
dv<-mahalanobis(bcon,tvec,smat)
}
if(op==3){
print("Computing p-value. Might take a while with op=3")
dv<-pdis(bcon,MM=MM,cop=cop,center=tvec)
}
if(op==4)dv<-pdis.for(bcon,MM=MM,cop=cop,pr=FALSE,center=tvec)
bplus<-nboot+1
sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
if(op==4)print(sig.level)

list(p.value=sig.level,output=output)
}

rm2mcp<-function(J,K,x,est=tmean,alpha=.05,grp=NA,dif=TRUE,nboot=NA,
plotit=FALSE,BA=FALSE,hoch=FALSE,...){
#
# This function performs multiple comparisons for
# dependent groups in a within by within designs.
# It creates the linear contrasts and calls rmmcppb
# assuming that main effects and interactions for a
# two-way design are to be tested.
#
        #   The data are assumed to be stored in x in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
if(is.data.frame(x))x=as.matrix(x)
        JK <- J * K
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JK) {
                xx <- x[[j]]
               # xx[[j]] <- xx[!is.na(xx)]
               x[[j]] <- xx[!is.na(xx)]
        }
        #
        # Create the three contrast matrices
        #
temp<-con2way(J,K)
conA<-temp$conA
conB<-temp$conB
conAB<-temp$conAB
        ncon <- max(nrow(conA), nrow(conB), nrow(conAB))
FacA<-rmmcppb(x,con=conA,est=est,plotit=plotit,dif=dif,grp=grp,
nboot=nboot,BA=TRUE,hoch=FALSE,...)
FacB<-rmmcppb(x,con=conB,est=est,plotit=plotit,dif=dif,grp=grp,
nboot=nboot,BA=TRUE,hoch=FALSE,...)
FacAB<-rmmcppb(x,con=conAB,est=est,plotit=plotit,dif=dif,grp=grp,
nboot=nboot,BA=TRUE,hoch=FALSE,...)
list(Factor.A=FacA,Factor.B=FacB,Factor.AB=FacAB)

}

acbinomci<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05){
#
#  Compute a 1-alpha confidence interval for p, the probability of
#  success for a binomial distribution, using a generalization of the
#  Agresti-Coull  method that was studied by Brown, Cai DasGupta
#  (Annals of Statistics, 2002, 30, 160-201.)
#
#  y is a vector of 1s and 0s.
#  x is number of successes.
#
if(!is.null(y[1])){
y=elimna(y)
nn=length(y)
}
if(nn==1)stop("Something is wrong: number of observations is only 1")
n<-nn
if(x!=n && x!=0){
cr=qnorm(1-alpha/2)
ntil=n+cr^2
ptil=(x+cr^2/2)/ntil
lower=ptil-cr*sqrt(ptil*(1-ptil)/ntil)
upper=ptil+cr*sqrt(ptil*(1-ptil)/ntil)
}
if(x==0){
lower<-0
upper<-1-alpha^(1/n)
}
if(x==1){
upper<-1-(alpha/2)^(1/n)
lower<-1-(1-alpha/2)^(1/n)
}
if(x==n-1){
lower<-(alpha/2)^(1/n)
upper<-(1-alpha/2)^(1/n)
}
if(x==n){
lower<-alpha^(1/n)
upper<-1
}
phat<-x/n
list(phat=phat,ci=c(lower,upper))
}

covmtrim<-function(x,tr=.2,p=length(x),grp=c(1:p)){
#
#  Estimate the covariance matrix for the sample trimmed means corresponding
#  to the data in the R variable x,
#  which is assumed to be stored in list mode or a matrix.
# (x[[1]] contains the data for group 1, x[[2]] the data for group 2, etc.)
#  The function returns a p by p matrix of covariances, the diagonal
#  elements being equal to the squared standard error of the sample
#  trimmed means, where p is the number of groups to be included.
#  By default, all the groups in x are used, but a subset of
#  the groups can be used via grp.  For example, if
#  the goal is to estimate the covariances between the sample trimmed
#  means for groups 1, 2, and 5, use the command grp<-c(1,2,5)
#  before calling this function.
#
#  The default amount of trimming is 20%
#
#  Missing values (values stored as NA) are not allowed.
#
#  This function uses winvar from chapter 2.
#
if(is.list(x))x=matl(x)
x=elimna(x)
x=listm(x)
if(!is.list(x))stop("The data are not stored in list mode or a matrix.")
p<-length(grp)
pm1<-p-1
for (i in 1:pm1){
ip<-i+1
if(length(x[[grp[ip]]])!=length(x[[grp[i]]]))stop("The number of observations in each group must be equal")
}
n<-length(x[[grp[1]]])
h<-length(x[[grp[1]]])-2*floor(tr*length(x[[grp[1]]]))
covest<-matrix(0,p,p)
covest[1,1]<-(n-1)*winvar(x[[grp[1]]],tr)/(h*(h-1))
for (j in 2:p){
jk<-j-1
covest[j,j]<-(n-1)*winvar(x[[grp[j]]],tr)/(h*(h-1))
for (k in 1:jk){
covest[j,k]<-(n-1)*wincor(x[[grp[j]]],x[[grp[k]]],tr)$cov/(h*(h-1))
covest[k,j]<-covest[j,k]
}
}
covmtrim<-covest
covmtrim
}
bwwcovm<-function(J,K,L,x,tr=.2){
#
# compute covariance matrix for a between by within by within design
#
p=J*K*L
idep=K*L
mat=matrix(0,nrow=p,ncol=p)
id=c(1:idep)
for(j in 1:J){
mat[id,id]=covmtrim(x[id],tr=tr)
id=id+idep
}
mat
}
bwwmatna<-function(J,K,L,x){
#
# data are assumed to be stored in a matrix
# for a between by within by within (three-way) anova,
# for the last two factors, eliminate any missing values
# and then store the data in list mode.
#
if(is.data.frame(x))x=as.matrix(x)
y=list()
ad=K*L
ilow=1
iup=ad
ic=0
for(j in 1:J){
z=x[,ilow:iup]
d=elimna(z)
im=0
for(k in 1:K){
for(l in 1:L){
ic=ic+1
im=im+1
y[[ic]]=d[,im]
}}
ilow=ilow+ad
iup=iup+ad
}
y
}
bwwna<-function(J,K,L,x){
#
# data are assumed to be stored in list mode
# for a between by within by within (three-way) anova,
# for the last two factors, eliminate any missing values.
#
if(is.data.frame(x))x=as.matrix(x)
y=list()
ad=K*L
ilow=1
iup=ad
ic=0
for(j in 1:J){
z=x[ilow:iup]
d=elimna(matl(z))
#print(d)
im=0
for(k in 1:K){
for(l in 1:L){
ic=ic+1
im=im+1
y[[ic]]=d[,im]
}}
ilow=ilow+ad
iup=iup+ad
}
y
}
bwwtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L){
#  Perform a between by within by within (three-way) anova
#  on trimmed means where
#
#  J independent groups, KL dependent groups
#
#  The variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data][2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
if(is.data.frame(data))data=as.matrix(data)
if(is.list(data))data=bwwna(J,K,L,data) # remove missing values
if(is.matrix(data))data=bwwmatna(J,K,L,data) #remove missing values
#                                     and convert to list mode
if(!is.list(data))stop("The data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
tmeans[i]<-mean(data[[grp[i]]],tr)
h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]]))
#    h is the effective sample size
}
v=bwwcovm(J,K,L,data,tr=tr)
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
il<-matrix(c(rep(1,L)),1,L)
jm1<-J-1
cj<-diag(1,jm1,J)
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
lm1<-L-1
cl<-diag(1,lm1,L)
for (i in 1:lm1)cl[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,kron(ik,il))  # Contrast matrix for factor A
Qa=bwwtrim.sub(cmat, tmeans, v, h,p)
Qa.siglevel <- 1 - pf(Qa, J - 1, 999)
# Do test for factor B
cmat<-kron(ij,kron(ck,il))  # Contrast matrix for factor B
Qb=bwwtrim.sub(cmat, tmeans, v, h,p)
 Qb.siglevel <- 1 - pf(Qb, K - 1, 999)
# Do test for factor C
cmat<-kron(ij,kron(ik,cl))  # Contrast matrix for factor C
Qc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qc.siglevel <- 1 - pf(Qc, L - 1, 999)
# Do test for factor A by B interaction
cmat<-kron(cj,kron(ck,il))  # Contrast matrix for factor A by B
Qab<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999)
# Do test for factor A by C interaction
cmat<-kron(cj,kron(ik,cl))  # Contrast matrix for factor A by C
Qac<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999)
# Do test for factor B by C interaction
cmat<-kron(ij,kron(ck,cl))  # Contrast matrix for factor B by C
Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999)
# Do test for factor A by B by C interaction
cmat<-kron(cj,kron(ck,cl))  # Contrast matrix for factor A by B by C
Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999)
list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.crit=Qb.siglevel,
Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel,
Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel,
Qabc=Qabc,Qabc.p.value=Qabc.siglevel)
}


bbwcovm<-function(J,K,L,x,tr=.2){
#
# compute covariance matrix for a between by between by within design
#
p=J*K*L
idep=L
mat=matrix(0,nrow=p,ncol=p)
id=c(1:idep)
for(j in 1:J){
for(k in 1:K){
mat[id,id]=covmtrim(x[id],tr=tr)
id=id+idep
}}
mat
}
bbwmatna<-function(J,K,L,x){
#
# data are assumed to be stored in a matrix
# for a between by within by within (three-way) anova.
# For the last factor, eliminate any missing values
# and then store the data in list mode.
#
y=list()
ad=L
ilow=1
iup=ad
ic=0
for(j in 1:J){
for(k in 1:K){
z=x[,ilow:iup]
d=elimna(z)
im=0
for(l in 1:L){
ic=ic+1
im=im+1
y[[ic]]=d[,im]
}
ilow=ilow+ad
iup=iup+ad
}}
y
}
bbwna<-function(J,K,L,x){
#
# x: data are assumed to be stored in list mode
# for a between by within by within (three-way) anova.
# For the last factor, eliminate any missing values.
#
y=list()
ad=L
ilow=1
iup=ad
ic=0
for(j in 1:J){
for(k in 1:K){
z=x[ilow:iup]
d=as.matrix(elimna(matl(z)))
im=0
ilow=ilow+ad
iup=iup+ad
for(l in 1:L){
ic=ic+1
im=im+1
y[[ic]]=d[,im]
}}
}
y
}
bbwtrim<-function(J,K,L,data,tr=.2,alpha=.05,p=J*K*L){
#  Perform a between-between-within (three-way) anova on trimmed means where
#
#  JK independent groups, L dependent groups
#
#  The variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data][2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
if(is.data.frame(data)) data <- as.matrix(data)
if(is.list(data))data=bbwna(J,K,L,data)
if(is.matrix(data))data=bbwmatna(J,K,L,data)
grp=c(1:p)
data=bbwna(J,K,L,data)
if(!is.list(data))stop("Data are not stored in list mode")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups is")
print(length(data))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
tmeans[i]<-mean(data[[grp[i]]],tr)
h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]]))
#    h is the effective sample size
}
v=bbwcovm(J,K,L,data,tr=tr)
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
il<-matrix(c(rep(1,L)),1,L)
jm1<-J-1
cj<-diag(1,jm1,J)
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
lm1<-L-1
cl<-diag(1,lm1,L)
for (i in 1:lm1)cl[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,kron(ik,il))  # Contrast matrix for factor A
Qa=bwwtrim.sub(cmat, tmeans, v, h,p)
Qa.siglevel <- 1 - pf(Qa, J - 1, 999)
# Do test for factor B
cmat<-kron(ij,kron(ck,il))  # Contrast matrix for factor B
Qb=bwwtrim.sub(cmat, tmeans, v, h,p)
 Qb.siglevel <- 1 - pf(Qb, K - 1, 999)
# Do test for factor C
cmat<-kron(ij,kron(ik,cl))  # Contrast matrix for factor C
Qc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qc.siglevel <- 1 - pf(Qc, L - 1, 999)
# Do test for factor A by B interaction
cmat<-kron(cj,kron(ck,il))  # Contrast matrix for factor A by B
Qab<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999)
# Do test for factor A by C interaction
cmat<-kron(cj,kron(ik,cl))  # Contrast matrix for factor A by C
Qac<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999)
# Do test for factor B by C interaction
cmat<-kron(ij,kron(ck,cl))  # Contrast matrix for factor B by C
Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999)
# Do test for factor A by B by C interaction
cmat<-kron(cj,kron(ck,cl))  # Contrast matrix for factor A by B by C
Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999)
list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.crit=Qb.siglevel,
Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel,
Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel,
Qabc=Qabc,Qabc.p.value=Qabc.siglevel)
}


bwwtrim.sub<-function(cmat,vmean,vsqse,h,p){
#
#  The function computes  variation of Johansen's test statistic
#  used to test the hypothesis  C mu = 0 where
#  C is a k by p matrix of rank k and mu is a p by 1 matrix of
#  of unknown  trimmed means.
#  The argument cmat contains the matrix C.
#  vmean is a vector of length p containing the p trimmed means
#  vsqe is matrix containing the
#  estimated covariances among the trimmed means
#  h is  the sample size
#
yvec<-matrix(vmean,length(vmean),1)
test<-cmat%*%vsqse%*%t(cmat)
invc<-solve(test)
test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec
temp<-0
mtem<-vsqse%*%t(cmat)%*%invc%*%cmat
temp<-(sum(diag(mtem%*%mtem))+(sum(diag(mtem)))^2)/(h-1)
A<-.5*sum(temp)
cval<-nrow(cmat)+2*A-6*A/(nrow(cmat)+2)
test<-test/cval
test
}

ghmean<-function(g,h){
#
#Compute the mean and variance of a g-and-h distribution
#
if(h<0)stop("h must be > 0")
val=NULL
val2=NULL
if(h<1)
val=(exp(g^2/(2*(1-h)))-1)/(g*sqrt(1-h))
if(h<.5)
val2=(exp(2*g^2/(1-2*h))-2*exp(g^2/(2*(1-2*h)))+1)/(g^2*sqrt(1-2*h))-
(exp(g^2/(2*(1-h)))-1)^2/(g^2*(1-h))
list(mean=val,variance=val2)
}
skew<-function(x){
#
# Compute skew and kurtosis
#
x=elimna(x)
m1<-mean(x)
m2<-var(x)
m3<-sum((x-m1)^3)/length(x)
m4<-sum((x-m1)^4)/length(x)
sk<-m3/m2^1.5
ku<-m4/m2^2
list(skew=sk,kurtosis=ku)
}

t3pval<-function(cmat,tmeans,v,h){
alph<-c(1:99)/100
for(i in 1:99){
irem<-i
chkit<-johan(cmat,tmeans,v,h,alph[i])
if(chkit$teststat>chkit$crit)break
}
p.value <- irem/100
        if(p.value <= 0.1) {
                iup <- (irem + 1)/100
                alph <- seq(0.001, iup, 0.001)
                for(i in 1:length(alph)) {
                        p.value <- alph[i]
                        chkit<-johan(cmat,tmeans,v,h,alph[i])
if(chkit$teststat>chkit$crit)break
                }
        }
  if(p.value <= 0.001) {
                alph <- seq(0.0001, 0.001, 0.0001)
                for(i in 1:length(alph)) {
                        p.value <- alph[i]
chkit<-johan(cmat,tmeans,v,h,alph[i])
if(chkit$teststat>chkit$crit)break
                }
        }
p.value
}

t1way<-function(x,tr=.2,grp=NA,MAT=FALSE,lev.col=1,var.col=2,IV=NULL,pr=TRUE){
#
#  A heteroscedastic one-way ANOVA for trimmed means
#  using a generalization of Welch's method.
#
#  The data are assumed to be stored in $x$ in a matrix or in list mode.
#
# MAT=F, if x is a matrix, columns correspond to groups.
# if MAT=T, assumes argument
# lev.col
# indicates which column of x denotes the groups. And
#  var.col indicates the column where the data are stored.
#
# if x has list mode:
#  length(x) is assumed to correspond to the total number of groups.
#  By default, the null hypothesis is that all groups have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
#  IV, if specified, taken to be the independent variable
#      That is, the group id values
#      and x is assumed to be a vector containing all of the data
#
#  Missing values are automatically removed.
#
if(is.data.frame(x))x=as.matrix(x)
if(tr==.5)print("Warning: Comparing medians should not be done with this function")
if(!is.null(IV[1])){
if(pr)print("Assuming x is a vector containing all of the data, the dependent variable")
xi=elimna(cbind(x,IV))
x=fac2list(xi[,1],xi[,2])
}
if(MAT){
if(!is.matrix(x))stop("With MAT=T, data must be stored in a matrix")
if(length(lev.col)!=1)stop("Argument lev.col should have 1 value")
temp=selby(x,lev.col,var.col)
x=temp$x
grp2=rank(temp$grpn)
x=x[grp2]
}
if(is.matrix(x))x<-listm(x)
nv=lapply(x,length)
if(is.na(sum(grp[1])))grp<-c(1:length(x))
if(!is.list(x))stop("Data are not stored in a matrix or in list mode.")
J<-length(grp)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # h is the number of observations in the jth group after trimming.
w[j]<-h[j]*(h[j]-1)/((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr))
xbar[j]<-mean(x[[grp[j]]],tr)
}
u<-sum(w)
xtil<-sum(w*xbar)/u
A<-sum(w*(xbar-xtil)^2)/(J-1)
B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1)
TEST<-A/(B+1)
nu1<-J-1
nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1))
sig<-1-pf(TEST,nu1,nu2)
list(TEST=TEST,nu1=nu1,nu2=nu2,n=nv,p.value=sig)
}

t3wayv2<-function(J,K,L,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,MAT=FALSE,
lev.col=c(1:3),var.col=4,pr=TRUE){
#  Perform a J by K by L (three-way) anova on trimmed means where
#  all JKL groups are independent.
#
#  Same as t3way, only computes p-values
#
# if MAT=F (default)
#  The R variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data][2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  MAT=T, assumes data are stored in matrix with 3 columns indicating
#  levels of the three factors.
#  That is, this function calls selby2 for you.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
if(is.data.frame(x))x=as.matrix(x)
data=x  #Yes, odd code
if(MAT){
if(!is.matrix(data))stop("With MAT=T, data must be a matrix")
if(length(lev.col)!=3)stop("Argument lev.col should have 3 values")
temp=selby2(data,lev.col,var.col)
lev1=length(unique(temp$grpn[,1]))
lev2=length(unique(temp$grpn[,2]))
lev3=length(unique(temp$grpn[,3]))
gv=apply(temp$grpn,2,rank)
gvad=100*gv[,1]+10*gv[,2]+gv[,3]
grp=rank(gvad)
if(pr){
print(paste("Factor 1 has", lev1, "levels"))
print(paste("Factor 2 has", lev2, "levels"))
print(paste("Factor 3 has", lev3, "levels"))
}
if(J!=lev1)warning("J is being reset to the number of levels found")
if(K!=lev2)warning("K is being reset to the number of levels found")
if(L!=lev3)warning("K is being reset to the number of levels found")
J=lev1
K=lev2
L=lev2
data=temp$x
}
if(is.matrix(data))data=listm(data)
if(!is.list(data))stop("Data is not stored in list mode")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
tmeans[i]<-mean(data[[grp[i]]],tr)
h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]]))
#    h is the effective sample size
v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1))
#    v contains the squared standard errors
}
v<-diag(v,p,p)   # Put squared standard errors in a diag matrix.
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
il<-matrix(c(rep(1,L)),1,L)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
lm1<-L-1
cl<-diag(1,lm1,L)
for (i in 1:lm1)cl[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,kron(ik,il))  # Contrast matrix for factor A
Qa <- johan(cmat, tmeans, v, h, alpha)
Qa.pv=t3pval(cmat, tmeans, v, h)
# Do test for factor B
cmat<-kron(ij,kron(ck,il))  # Contrast matrix for factor B
Qb<-johan(cmat,tmeans,v,h,alpha)
Qb.pv=t3pval(cmat, tmeans, v, h)
# Do test for factor C
cmat<-kron(ij,kron(ik,cl))  # Contrast matrix for factor C
Qc<-johan(cmat,tmeans,v,h,alpha)
Qc.pv=t3pval(cmat, tmeans, v, h)
# Do test for factor A by B interaction
cmat<-kron(cj,kron(ck,il))  # Contrast matrix for factor A by B
Qab<-johan(cmat,tmeans,v,h,alpha)
Qab.pv=t3pval(cmat, tmeans, v, h)
# Do test for factor A by C interaction
cmat<-kron(cj,kron(ik,cl))  # Contrast matrix for factor A by C
Qac<-johan(cmat,tmeans,v,h,alpha)
Qac.pv=t3pval(cmat, tmeans, v, h)
# Do test for factor B by C interaction
cmat<-kron(ij,kron(ck,cl))  # Contrast matrix for factor B by C
Qbc<-johan(cmat,tmeans,v,h,alpha)
Qbc.pv=t3pval(cmat, tmeans, v, h)
# Do test for factor A by B by C interaction
cmat<-kron(cj,kron(ck,cl))  # Contrast matrix for factor A by B by C
Qabc<-johan(cmat,tmeans,v,h,alpha)
Qabc.pv=t3pval(cmat, tmeans, v, h)
list(Qa=Qa$teststat,Qa.crit=Qa$crit,Qa.p.value=Qa.pv,
Qb=Qb$teststat,Qb.crit=Qb$crit,Qb.p.value=Qb.pv,
Qc=Qc$teststat,Qc.crit=Qc$crit,Qc.p.value=Qc.pv,
Qab=Qab$teststat,Qab.crit=Qab$crit,Qab.p.value=Qab.pv,
Qac=Qac$teststat,Qac.crit=Qac$crit,Qac.p.value=Qac.pv,
Qbc=Qbc$teststat,Qbc.crit=Qbc$crit,Qbc.p.value=Qbc.pv,
Qabc=Qabc$teststat,Qabc.crit=Qabc$crit,Qabc.p.value=Qabc.pv)
}


olshc4<-function(x,y,alpha=.05,CN=FALSE,xout=FALSE,outfun=outpro,HC3=FALSE,...){
#
# Compute confidence for least squares
# regression using heteroscedastic method
# recommended by Cribari-Neto (2004).
# CN=F, degrees of freedom are n-p
# CN=T  degrees of freedom are infinite, as done by Cribari-Neto (2004)
# All indications are that CN=F is best for general use.
#
#  HC3=TRUE, will replace the HC4 estimator with the HC3 estimator.
#
x<-as.matrix(x)
if(nrow(x) != length(y))stop("Length of y does not match number of x values")
m<-cbind(x,y)
m<-elimna(m)
y<-m[,ncol(x)+1]
x=m[,1:ncol(x)]
n=length(y)
nrem=n
n.keep=length(y)
x<-as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-as.matrix(x)
x<-x[flag,]
y<-y[flag]
n.keep=length(y)
x<-as.matrix(x)
}
temp<-lsfit(x,y)
x<-cbind(rep(1,nrow(x)),x)
xtx<-solve(t(x)%*%x)
h<-diag(x%*%xtx%*%t(x))
n<-length(h)
d<-(n*h)/sum(h)
for(i in 1:length(d)){
        d[i]<-min(4, d[i])
}
if(HC3)d=2
hc4<-xtx%*%t(x)%*%diag(temp$res^2/(1-h)^d)%*%x%*%xtx
df<-nrow(x)-ncol(x)
crit<-qt(1-alpha/2,df)
if(CN)crit=qnorm(1-alpha/2)
al<-ncol(x)
p=al-1
ci<-matrix(NA,nrow=al,ncol=6)
lab.out=rep("Slope",p)
dimnames(ci)<-list(c("(Intercept)",lab.out),c("Coef.","Estimates",
"ci.lower","ci.upper","p-value","Std.Error"))
for(j in 1:al){
ci[j,1]<-j-1
ci[j,2]<-temp$coef[j]
ci[j,3]<-temp$coef[j]-crit*sqrt(hc4[j,j])
ci[j,4]<-temp$coef[j]+crit*sqrt(hc4[j,j])
test<-temp$coef[j]/sqrt(hc4[j,j])
ci[j,5]<-2*(1-pt(abs(test),df))
if(CN)ci[j,5]<-2*(1-pnorm(abs(test),df))
}
ci[,6]=sqrt(diag(hc4))
list(n=nrem,n.keep=n.keep,ci=ci, cov=hc4)
}

hc4test<-function(x,y,pval=c(1:ncol(x)),xout=FALSE,outfun=outpro,...){
#
# Perform omnibus test using OLS and HC4 estimator
# That is, test the hypothesis that all of the slope parameters
# are equal to 0 in a manner that allows heteroscedasticity.
#
# recommended by Cribari-Neto (2004).
# Seems to work well with p=1 but can be unsatisfactory wit p>4 predictors,
# Unknown how large n must be when p>1
#
x<-as.matrix(x)
if(ncol(x)>1)print("WARNING: more than 1 predictor, olstest might be better")
if(nrow(x) != length(y))stop("Length of y does not match number of x values")
m<-cbind(x,y)
m<-elimna(m)
p=ncol(x)
p1=p+1
y<-m[,p1]
x=m[,1:p]
nrem=length(y)
n.keep=n
x<-as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-as.matrix(x)
x<-x[flag,]
y<-y[flag]
n.keep=length(y)
x<-as.matrix(x)
}
n=length(y)
pvalp1<-pval+1
temp<-lsfit(x,y) # unrestricted
x<-cbind(rep(1,nrow(x)),x)
hval<-x%*%solve(t(x)%*%x)%*%t(x)
hval<-diag(hval)
hbar<-mean(hval)
delt<-cbind(rep(4,n),hval/hbar)
delt<-apply(delt,1,min)
aval<-(1-hval)^(0-delt)
x2<-x[,pvalp1]
pval<-0-pvalp1
x1<-x[,pval]
df<-length(pval)
x1<-as.matrix(x1)
imat<-diag(1,n)
M1<-imat-x1%*%solve(t(x1)%*%x1)%*%t(x1)
M<-imat-x%*%solve(t(x)%*%x)%*%t(x)
uval<-as.vector(M%*%y)
R2<-M1%*%x2
rtr<-solve(t(R2)%*%R2)
temp2<-aval*uval^2
S<-diag(aval*uval^2)
V<-n*rtr%*%t(R2)%*%S%*%R2%*%rtr
nvec<-as.matrix(temp$coef[pvalp1])
test<-n*t(nvec)%*%solve(V)%*%nvec
test<-test[1,1]
p.value<-1-pchisq(test,df)
list(n=nrem,n.keep=n.keep,test=test,p.value=p.value)
}

trimpb<-function(x,y,tr=.2,alpha=.05,nboot=2000,WIN=FALSE,win=.1,
plotit=FALSE,pop=1,null.value=0,pr=TRUE,xlab="X"){
#
#   Compute a 1-alpha confidence interval for
#   a trimmed mean.
#
#   The default number of bootstrap samples is nboot=2000
#
#   win is the amount of Winsorizing before bootstrapping
#   when WIN=T.
#
#   Missing values are automatically removed.
#
#  nv is null value. That test hypothesis trimmed mean equals nv
#
#  plotit=TRUE gives a plot of the bootstrap values
#  pop=1 results in the expected frequency curve.
#  pop=2 kernel density estimate
#  pop=3 boxplot
#  pop=4 stem-and-leaf
#  pop=5 histogram
#  pop=6 adaptive kernel density estimate.
#
if(pr){
print("The p-value returned by the this function is based on the")
print("null value specified by the argument null.value, which defaults to 0")
}
x<-x[!is.na(x)]
if(WIN){
if(win > tr)stop("The amount of Winsorizing must be <= to the amount of trimming")
x<-winval(x,win)
}
crit<-alpha/2
icl<-round(crit*nboot)+1
icu<-nboot-icl
bvec<-NA
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,mean,tr) # Bootstrapped trimmed means
bvec<-sort(bvec)
p.value<-sum(bvec<null.value)/nboot
p.value<-2*min(p.value,1-p.value)
ci<-NA
ci[1]<-bvec[icl]
ci[2]<-bvec[icu]
if(plotit){
if(pop==1)rdplot(as.vector(bvec),fr=fr,xlab=xlab)
if(pop==2)kdplot(as.vector(bvec),rval=rval)
if(pop==3)boxplot(as.vector(bvec),fr=fr)
if(pop==4)stem(as.vector(bvec))
if(pop==5)hist(as.vector(bvec))
if(pop==6)akerd(as.vector(bvec),xlab=xlab)
}
list(ci=ci,p.value=p.value)
}
standm<-function(x,locfun=lloc,est=mean,scat=var,...){
# standardize a matrix x
#
x=elimna(x)
x=as.matrix(x)
m1=lloc(x,est=est)
v1=apply(x,2,scat)
p=ncol(x)
for(j in 1:p)x[,j]=(x[,j]-m1[j])/sqrt(v1[j])
x
}

t2way<-function(J,K,x,tr=.2,grp=c(1:p),p=J*K,MAT=FALSE,
lev.col=c(1:2),var.col=3,pr=TRUE,IV1=NULL,IV2=NULL){
#  Perform a J by K  (two-way) ANOVA on trimmed means where
#  all groups are independent.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode, or a matrix with columns
#  corresponding to groups. If stored in list mode, x[[1]] contains the data
#  for the first level of all three factors: level 1,1,.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that x has length JK, the total number of
#  groups being tested.
#
#  MAT=T, assumes x are stored in matrix with 3 columns
#  with two of the columns indicated by the argument
#  lev.col
#  specifying the columns of x containing the values of the
#  levels of the two factors.
#  The outcome variable is in column
#  var.col
#  which defaults to column 3
#  That is, this function calls selby2 for you.
#
#  IV1 and IV2: if specified, taken to be the independent variable
#      That is, the group id values
#      and x is assumed to be a vector containing all of the data
#  EXAMPLE: t2way(x=data,IV1=iv1,IV2=iv2)
#  would do a two-way ANOVA based on group id's in iv1 and iv2 and
#  dependent variable data
#
if(is.data.frame(x))data=as.matrix(x)
if(tr==.5){
print("For medians, use med2way if there are no ties")
print("With ties, use linear contrasts in conjunction with medpb")
stop("")
}
if(MAT){
if(!is.matrix(x))stop("With MAT=T, data must be a matrix")
if(length(lev.col)!=2)stop("Argument lev.col should have 3 values")
temp=selby2(x,lev.col,var.col)
lev1=length(unique(temp$grpn[,1]))
lev2=length(unique(temp$grpn[,2]))
gv=apply(temp$grpn,2,rank)
gvad=10*gv[,1]+gv[,2]
grp=rank(gvad)
if(pr){
print(paste("Factor 1 has", lev1, "levels"))
print(paste("Factor 2 has", lev2, "levels"))
}
if(J!=lev1)warning("J is being reset to the number of levels found")
if(K!=lev2)warning("K is being reset to the number of levels found")
J=lev1
K=lev2
x=temp$x
}
if(!is.null(IV1[1])){
if(is.null(IV2[1]))stop("IV2 is NULL")
if(pr)print("Assuming data is a vector containing all of the data; the dependent variable")
xi=elimna(cbind(x,IV1,IV2))
J=length(unique(xi[,2]))
K=length(unique(xi[,3]))
x=fac2list(xi[,1],xi[,2:3])
}
if(is.matrix(x))x=listm(x)
if(!is.list(x))stop("Data are not stored in list mode")
if(p!=length(x)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups is")
print(length(x))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
x[[grp[i]]]=elimna(x[[grp[i]]])
tmeans[i]<-mean(x[[grp[i]]],tr)
h[i]<-length(x[[grp[i]]])-2*floor(tr*length(x[[grp[i]]]))
#    h is the effective sample size
v[i]<-(length(x[[grp[i]]])-1)*winvar(x[[grp[i]]],tr)/(h[i]*(h[i]-1))
#    v contains the squared standard errors
}
v<-diag(v,p,p)   # Put squared standard errors in a diag matrix.
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,ik)  # Contrast matrix for factor A
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
Qa<-johan(cmat,tmeans,v,h,alval[i])
if(Qa$teststat>Qa$crit)break
}
A.p.value=irem/1000
# Do test for factor B
cmat<-kron(ij,ck)  # Contrast matrix for factor B
for(i in 1:999){
irem<-i
Qb<-johan(cmat,tmeans,v,h,alval[i])
if(Qb$teststat>Qb$crit)break
}
B.p.value=irem/1000
# Do test for factor A by B interaction
cmat<-kron(cj,ck)  # Contrast matrix for factor A by B
for(i in 1:999){
irem<-i
Qab<-johan(cmat,tmeans,v,h,alval[i])
if(Qab$teststat>Qab$crit)break
}
AB.p.value=irem/1000
tmeans=matrix(tmeans,J,K,byrow=T)
list(Qa=Qa$teststat,A.p.value=A.p.value,
Qb=Qb$teststat,B.p.value=B.p.value,
Qab=Qab$teststat,AB.p.value=AB.p.value,means=tmeans)
}

mcskew <- function(z)
{
	n=length(z)
	y1=0
	y2=0
	left=0
	right=0
	q=0
	p=0
	eps=0.0000000000001
	z=-z
	xmed=pull(z,n,floor(n/2)+1)
	if (n%%2 == 0)
	{
		xmed=(xmed+pull(z,n,floor(n/2)))/2
	}
	z=z-xmed
	y=-sort(z)
	y1=y[y>-eps]
	y2=y[y<=eps]
	h1=length(y1)
	h2=length(y2)	
	left[1:h2]=1
	right[1:h2]=h1
	nl=0
	nr=h1*h2
	knew=floor(nr/2)+1
	IsFound=0
	while ((nr-nl>n) & (IsFound==0))
	{
		weight=0
		work=0
		j=1
		for (i in 1:h2)
		{
			if (left[i]<=right[i])
			{
				weight[j]=right[i]-left[i]+1
				k=left[i]+floor(weight[j]/2)
				work[j]=calwork(y1[k],y2[i],k,i,h1+1,eps)
				j=j+1
			}
		}
		trial=whimed(work,weight,j-1)
		j=1
		for (i in h2:1)
		{
			while ((j<=h1)&(calwork(y1[min(j,h1)],y2[i],j,i,h1+1,eps)>trial))
			{
				j=j+1
			}
			p[i]=j-1
		}
		j=h1
		for (i in 1:h2)
		{
			while ((j>=1)&(calwork(y1[max(j,1)],y2[i],j,i,h1+1,eps)<trial))
			{
				j=j-1
			}
			q[i]=j+1
		}
		sump=sum(p[1:h2])
		sumq=sum(q[1:h2])-h2
		if (knew<=sump)
		{
			right[1:h2]=p[1:h2]
			nr=sump
		}
		else
		{
			if (knew>sumq)
			{
				left[1:h2]=q[1:h2]
				nl=sumq
			}
			else
			{
				medc=trial
				IsFound=1
			}
		}
	}
	if (IsFound==0)
	{work=0
		j=1
		for (i in 1:h2)
		{
			if (left[i]<=right[i])
			{
				for (jj in left[i]:right[i])
				{
					work[j]=0-calwork(y1[jj],y2[i],jj,i,h1+1,eps)
					j=j+1
				}
			}
		}
		medc=0-pull(work,j-1,knew-nl)
	}
	medc
}

pull <- function(a,n,k)
{
	b=0
	b=a
	l=1
	lr=n
	while (l<lr)
	{
		ax=b[k]
		jnc=l
		j=lr
		while (jnc<=j)
		{
			while (b[jnc]<ax)
			{
				jnc=jnc+1
			}
			while (b[j]>ax)
			{
				j=j-1
			}
			if (jnc<=j)
			{
				buffer=b[jnc]
				b[jnc]=b[j]
				b[j]=buffer
				jnc=jnc+1
				j=j-1
			}
		}
		if (j<k)
		{
			l=jnc
		}
		if (k<jnc)
		{
			lr=j
		}
	}
	outp=b[k]
	outp
}

whimed <- function(a,iw,n)
{
	acand=0
	iwcand=0
	nn=n
	wtotal=sum(iw[1:nn])
	wrest=0
	IsFound=0
	while (IsFound==0)
	{
		trial=pull(a,nn,floor(nn/2)+1)

		wleft=sum(iw[c(a[1:nn]<trial,rep(F,n-nn))])
		wright=sum(iw[c(a[1:nn]>trial,rep(F,n-nn))])
		wmid=sum(iw[c(a[1:nn]==trial,rep(F,n-nn))])
		
		if ((2*wrest+2*wleft)>wtotal)
		{
			i=c(a[1:nn]<trial,rep(F,n-nn))
			acand=a[i]
			iwcand=iw[i]
#			nn=kcand_length(acand)
			nn=length(acand)
			
		}
		else
		{
			if ((2*wrest+2*wleft+2*wmid)>wtotal)
			{
				whmed=trial
				IsFound=1
			}
			else
			{
				i=c(a[1:nn]>trial,rep(F,n-nn))
				acand=a[i]
				iwcand=iw[i]
	nn=length(acand)
#		nn_kcand_length(acand)
				wrest=wrest+wleft+wmid
			}
		}
		a[1:nn]=acand[1:nn]
		iw[1:nn]=iwcand[1:nn]
	}
	whmed
}

calwork <- function(a,b,ai,bi,ab,eps)
{
	if (abs(a-b) < 2.0*eps)
	{
		if (ai+bi==ab)
		{
			cwork=0
		}
		else
		{
			if (ai+bi<ab)
			{
				cwork=1
			}
			else
			{
				cwork=0-1
			}
		}
	}
	else
	{
		cwork=(a+b)/(a-b)
	}
	cwork
}


outtbs<-function(x,SEED=FALSE,plotit=TRUE,xlab="X",ylab="Y",...){
#
# Use the tbs estimator to
# determine which points are outliers
#
if(!is.matrix(x))stop("x should be a matrix")
x<-elimna(x)
temp<-out(x,cov.fun=tbs,plotit=plotit,SEED=SEED,xlab=xlab,ylab=ylab)
outid<-temp$out.id
keep<-temp$keep
list(out.id=outid,keep=keep,distances=temp$dis)
}

erho.bt <- function(p,c1,M)
#   expectation of rho(d) under chi-squared p
    return(chi.int(p,2,M)/2
        +(M^2/2+c1*(5*c1+16*M)/30)*chi.int2(p,0,M+c1)
        +(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4))*(
chi.int(p,0,M+c1)-chi.int(p,0,M))
        +(1/2+M^4/(2*c1^4)-M^2/c1^2)*(chi.int(p,2,M+c1)-chi.int(p,2,M))
        +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*(chi.int(p,3,M+c1)-chi.int(p,3,M))
        +(3*M^2/(2*c1^4)-1/(2*c1^2))*(chi.int(p,4,M+c1)-chi.int(p,4,M))
        -(4*M/(5*c1^4))*(chi.int(p,5,M+c1)-chi.int(p,5,M))
        +(1/(6*c1^4))*(chi.int(p,6,M+c1)-chi.int(p,6,M)))
chi.int <- function(p,a,c1)
#   partial expectation d in (0,c1) of d^a under chi-squared p
  return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*pchisq(c1^2,p+a) )
chi.int2 <- function(p,a,c1)
#   partial expectation d in (c1,\infty) of d^a under chi-squared p
 return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*(1-pchisq(c1^2,p+a)))
cgen.bt <- function(n,p,r,alpha,asymp=FALSE){
#   find constants c1 and M that gives a specified breakdown r
#   and rejection point alpha
if (asymp == FALSE){if (r > (n-p)/(2*n) ) r <- (n-p)/(2*n)}
# maximum achievable breakdown
#
#   if rejection is not achievable, use c1=0 and best rejection
#
    limvec <- rejpt.bt.lim(p,r)
    if (1-limvec[2] <= alpha)
    {
        c1 <- 0
        M <- sqrt(qchisq(1-alpha,p))
    }
    else
    {
    c1.plus.M <- sqrt(qchisq(1-alpha,p))
    M <- sqrt(p)
    c1 <- c1.plus.M - M
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        deps <- 1e-4
        M.old <- M
        c1.old <- c1
        er <- erho.bt(p,c1,M)
        fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30)
        fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps
        fcM  <- (erho.bt(p,c1,M+deps)-er)/deps
        fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30)
        M <- M - fc/fcp
        if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2}
        c1 <- c1.plus.M - M
#        if (M-c1 < 0)  M <- c1.old+(M.old-c1.old)/2
        crit <- abs(fc)
        iter <- iter+1
    }
    }
list(c1=c1,M=M,r1=r)
}
erho.bt.lim <- function(p,c1)
#   expectation of rho(d) under chi-squared p
  return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1))
erho.bt.lim.p <- function(p,c1)
#   derivative of erho.bt.lim wrt c1
  return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1))


rejpt.bt.lim <- function(p,r){
#   find p-value of translated biweight limit c
#   that gives a specified breakdown
    c1 <- 2*p
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        c1.old <- c1
        fc <- erho.bt.lim(p,c1) - c1^2*r
        fcp <- erho.bt.lim.p(p,c1) - 2*c1*r
        c1 <- c1 - fc/fcp
        if (c1 < 0)  c1 <- c1.old/2
        crit <- abs(fc)
        iter <- iter+1
    }
    return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p))))
}
chi.int.p <- function(p,a,c1)
  return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 )
chi.int2.p <- function(p,a,c1)
  return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 )
ksolve.bt <- function(d,p,c1,M,b0){
#       find a constant k which satisfies the s-estimation constraint
#       for modified biweight
    k <- 1
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        k.old <- k
        fk <- mean(rho.bt(d/k,c1,M))-b0
        fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2)
        k <- k - fk/fkp
        if (k < k.old/2)  k <- k.old/2
        if (k > k.old*1.5) k <- k.old*1.5
        crit <- abs(fk)
#        print(c(iter,k.old,crit))
        iter <- iter+1
    }
#    print(c(iter,k,crit))
    return(k)
}
rho.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1*(x^2/2)
        +ivec2*(M^2/2+c1*(5*c1+16*M)/30)
        +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4)
            +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2
            +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3
            +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4
            -4*M*x^5/(5*c1^4)+x^6/(6*c1^4)))
}
psi.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2)
}
psip.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1))
}
wt.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2)
}
v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M))

rung3dlchk<-function(x,y,est=onestep,regfun=tsreg,beta=.2,plotit=FALSE,nmin=0,
fr=NA,...){
#
# running mean using interval method
# Same as runm3d, but empirically determine the span, f,
# by maximizing the percentage bend correlation using the
# leave-three-out method.
#
# x is an n by p matrix of predictors.
#
# fr controls amount of smoothing and is determined by this function.
# If fr is missing, function first considers fr=.8(.05)1.2. If
# measure of scale of residuals is mininmized for fr=.8, then consider
# fr=.2(.05).75.
#
#
if(!is.matrix(x))stop("Data are not stored in a matrix.")
plotit<-as.logical(plotit)
chkcor<-1
frtry<-c(.7,.75,.8,.85,.9,.95,1.,1.05,1.1,1.15,1.2)
if(!is.na(fr[1]))frtry<-fr
chkit<-0
for (it in 1:length(frtry)){
fr<-frtry[it]
rmd<-runm3ds1(x,y,fr,tr,FALSE,nmin)  # Using leave-three-out method.
xm<-y[!is.na(rmd)]
rmd<-rmd[!is.na(rmd)]
dif<-xm-rmd
chkcor[it]<-pbvar(dif,beta)
}
if(sum(is.na(chkcor))== length(chkcor))
{stop("A value for the span cannot be determined with these data.")}
tempc<-sort(chkcor)
chkcor[is.na(chkcor)]<-tempc[length(tempc)]
temp<-order(chkcor)
fr1<-frtry[temp[1]]
fr2<-fr1
val1<-min(chkcor)
chkcor2<-0
if(is.na(fr)){
if(temp[1] == 1){
frtry<-c(.2,.25,.3,.35,.4,.45,.5,.55,.6,.65,.7,.75)
for (it in 1:length(frtry)){
fr<-frtry[it]
rmd<-runm3ds1(x,y,fr,tr,FALSE,nmin)
xm<-y[!is.na(rmd)]
rmd<-rmd[!is.na(rmd)]
dif<-xm-rmd
chkcor2[it]<-pbvar(dif,beta)
}
tempc<-sort(chkcor2)
chkcor2[is.na(chkcor2)]<-tempc[length(tempc)]
print(chkcor2)
temp2<-order(chkcor2)
fr2<-frtry[temp2[1]]
}
}
sortc<-sort(chkcor2)
chkcor2[is.na(chkcor2)]<-sortc[length(sortc)]
val2<-min(chkcor2)
fr<-fr1
if(val2 < val1)fr<-fr2
rmd<-runm3d(x,y,fr=fr,tr,plotit=FALSE,nmin,pyhat=TRUE,pr=FALSE)
xm<-y[!is.na(rmd)]
rmd<-rmd[!is.na(rmd)]
etasq<-pbcor(rmd,xm)$cor^2
# Next, fit regression line
temp<-y-regfun(x,y)$res
pbc<-pbcor(temp,y)$cor^2
temp<-(etasq-pbc)/(1-pbc)
list(gamma.L=temp,pbcorsq=pbc,etasq=etasq,fr=fr,rmd=rmd,yused=xm,varval=chkcor)
}

near3dl1<-function(x,pt,fr=1,m){
dis<-mahalanobis(x,pt,m$cov)
sdis<-sqrt(sort(dis))
dflag<-(dis < fr & dis > sdis[3])
dflag
}

listm<-function(x){
#
# Store the data in a matrix or data frame in a new
# R variable having list mode.
# Col 1 will be stored in y[[1]], col 2 in y[[2]], and so on.
#
if(is.null(dim(x)))stop("The argument x must be a matrix or data frame")
y<-list()
for(j in 1:ncol(x))y[[j]]<-x[,j]
y
}

list2matrix=listm

pbanova<-function(x,tr=.2,alpha=.05,nboot=NA,grp=NA,WIN=FALSE,win=.1){
#
#   Test the hypothesis that J independent groups have
#   equal trimmed means using the percentile bootstrap method.
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   tr is the amount of trimming
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   WIN=T means data are Winsorized before taking bootstraps by the
#   amount win.
#
#   Missing values are allowed.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]]
x<-xx
}
J<-length(x)
tempn<-0
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
}
Jm<-J-1
if(WIN){
if(tr < .2){print("Warning: When Winsorizing,")
print("the amount of trimming should be at least.2")
}
if(win > tr)stop("Amount of Winsorizing must be <= amount of trimming")
if(min(tempn) < 15){
print("Warning: Winsorizing with sample sizes less than 15")
print("can result in poor control over the probability of a Type I error")
}
for (j in 1:J){
x[[j]]<-winval(x[[j]],win)
}
}
con<-matrix(0,J,J-1)
for (j in 1:Jm){
jp<-j+1
con[j,j]<-1
con[jp,j]<-0-1
}
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
# Determine critical values
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(Jm > 10){
avec<-.05/c(11:Jm)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(Jm > 10){
avec<-.01/c(11:Jm)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:Jm)
bvec<-matrix(NA,nrow=J,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group
}
test<-NA
for (d in 1:Jm){
dp<-d+1
test[d]<-sum(bvec[d,]>bvec[dp,])/nboot
if(test[d]> .5)test[d]<-1-test[d]
}
test<-(0-1)*sort(-2*test)
sig<-sum((test<dvec[1:Jm]))
if(sig>0)print("Significant result obtained: Reject")
if(sig==0)print("No significant result obtained: Fail to reject")
list(test.vec=test,crit.vec=dvec[1:Jm])
}

pbanovag<-function(x,alpha=.05,nboot=NA,grp=NA,est=onestep,...){
#
#   Test the hypothesis that J independent groups have
#   equal measures of location using the percentile bootstrap method.
#   (Robust measures of scale can be compared as well.)
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   est is the measure of location and defaults to a M-estimator
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){
# Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]]
x<-xx
}
J<-length(x)
tempn<-0
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
}
Jm<-J-1
icl<-ceiling(crit*nboot)
icu<-ceiling((1-crit)*nboot)
con<-matrix(0,J,J-1)
for (j in 1:Jm){
jp<-j+1
con[j,j]<-1
con[jp,j]<-0-1
}
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
# Determine critical values
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(Jm > 10){
avec<-.05/c(11:Jm)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(Jm > 10){
avec<-.01/c(11:Jm)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:Jm)
bvec<-matrix(NA,nrow=J,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # Bootstrapped trimmed means for jth group
}
test<-NA
for (d in 1:Jm){
dp<-d+1
test[d]<-sum(bvec[d,]>bvec[dp,])/nboot
if(test[d]> .5)test[d]<-1-test[d]
}
test<-(0-1)*sort(-2*test)
sig<-sum((test<dvec[1:Jm]))
if(sig>0)print("Significant result obtained: Reject")
if(sig==0)print("No significant result obtained: Fail to reject")
list(test.vec=test,crit.vec=dvec[1:Jm])
}
bootse<-function(x,nboot=1000,est=median,SEED=TRUE,...){
#
#   Compute bootstrap estimate of the standard error of the
#   estimator est
#   The default number of bootstrap samples is nboot=100
#
if(SEED)set.seed(2) # set seed of random number generator so that
#   results can be duplicated.
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,est,...)
bootse<-sqrt(var(bvec))
bootse
}


rananova<-function(x,tr=.2,grp=NA){
#
#  A heteroscedastic one-way random effects ANOVA for trimmed means.
#
#  The data are assumed to be stored in a matrix on in list mode.
#  If in list mode,
#  Length(x) is assumed to correspond to the total number of groups.
#  If the data are stored in a matrix, groups correspond to columns.
#  By default, the null hypothesis is that all group have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
if(is.matrix(x))x<-listm(x)
if(is.na(grp[1]))grp<-c(1:length(x))
if(!is.list(x))stop("Data are not stored in a matrix or in list mode")
J<-length(grp)  # The number of groups to be compared
print("The number of groups to be compared is")
print(J)
h<-1
xbar<-1
ybar<-1
wvar<-1
ell<-0
for(j in 1:J){
ell[j]<-length(x[[grp[j]]])/(length(x[[grp[j]]])+1)
h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # h is the number of observations in the jth group after trimming.
ybar[j]<-winmean(x[[grp[j]]],tr)
xbar[j]<-mean(x[[grp[j]]],tr)
wvar[j]<-winvar(x[[grp[j]]],tr)
}
q<-NA
bsst<-var(xbar)
for (j in 1:J)q[j]<-(length(x[[grp[j]]]-1)-1)*wvar[j]/(h[j]*(h[j]-1))
wssw<-mean(q)
D<-bsst/wssw
g<-q/J
nu1<-((J-1)*sum(q))^2/((sum(q))^2+(J-2)*J*sum(q^2))
nu2<-(sum(J*q))^2/sum((J*q)^2/(h-1))
sig<-1-pf(D,nu1,nu2)
# Next, estimate the Winsorized intraclass correlation
sighat<-mean(ell*(ybar-(sum(ell*ybar)/sum(ell)))^2)
rho<-sighat/(sighat+winmean(wvar,tr))
list(teststat=D,df=c(nu1,nu2),siglevel=sig,rho=rho)
}


linpbg<-function(x,con=0,alpha=.05,nboot=NA,est=mest,...){
#
#   Compute a 1-alpha confidence interval
#   for a set of d linear contrasts
#   involving trimmed means using the percentile bootstrap method.
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode or in a matrix.
#   Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc.
#   If x has list mode, length(x)=the number of groups = J, say.
#
#   Missing values are automatically removed.
#
#   con is a J by d matrix containing the
#   contrast coefficents of interest.
#   If unspecified, all pairwise comparisons are performed.
#   For example, con[,1]=c(1,1,-1,-1,0,0)
#   and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first
#   two trimmed means is
#   equal to the sum of the second two,
#   and (2) the difference between
#   the first two is equal to the difference
#   between the trimmed means of
#   groups 5 and 6.
#
#
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
J<-length(x)
for(j in 1:J){
xx<-x[[j]]
xx[[j]]<-xx[!is.na(xx)] # Remove any missing values.
}
Jm<-J-1
d<-(J^2-J)/2
if(sum(con^2)==0){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1 #If con not specified do all pairwise comparisons
con[k,id]<-0-1
}}}
if(nrow(con)!=length(x)){
stop("The number of groups does not match the number of contrast coefficients.")
}
if(is.na(nboot)){
nboot<-5000
if(ncol(con)<=4)nboot<-2000
}
m1<-matrix(0,nrow=J,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
m1[j,]<-apply(data,1,est,...)
}
testb<-NA
boot<-matrix(0,ncol(con),nboot)
testvec<-NA
for (d in 1:ncol(con)){
boot[d,]<-apply(m1,2,trimpartt,con[,d])
# A vector of length nboot containing psi hat values
# and corresponding to the dth linear contrast
testb[d]<-sum((boot[d,]>0))/nboot
testvec[d]<-min(testb[d],1-testb[d])
}
#
#  Determine critical value
#
dd<-ncol(con)
if(alpha==.05){
if(dd==1)crit<-alpha/2
if(dd==2)crit<-.014
if(dd==3)crit<-.0085
if(dd==4)crit<-.007
if(dd==5)crit<-.006
if(dd==6)crit<-.0045
if(dd==10)crit<-.0023
if(dd==15)crit<-.0016
}
else{
crit<-alpha/(2*dd)
}
icl<-round(crit*nboot)
icu<-round((1-crit)*nboot)
psihat<-matrix(0,ncol(con),4)
test<-matrix(0,ncol(con),3)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
dimnames(test)<-list(NULL,c("con.num","test","crit.val"))
for (d in 1:ncol(con)){
test[d,1]<-d
psihat[d,1]<-d
testit<-lincon(x,con[,d],tr)
test[d,2]<-testvec[d]
temp<-sort(boot[d,])
psihat[d,3]<-temp[icl]
psihat[d,4]<-temp[icu]
psihat[d,2]<-testit$psihat[1,2]
test[d,3]<-crit
}
list(psihat=psihat,test=test,con=con)
}




lintpb<-function(x,con=0,tr=.2,alpha=.05,nboot=NA){
#
#   Compute a 1-alpha confidence interval
#   for a set of d linear contrasts
#   involving trimmed means using the percentile bootstrap method.
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode or in a matrix.
#   Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc.
#   If x has list mode, length(x)=the number of groups = J, say.
#
#   Missing values are automatically removed.
#
#   con is a J by d matrix containing the
#   contrast coefficents of interest.
#   If unspecified, all pairwise comparisons are performed.
#   For example, con[,1]=c(1,1,-1,-1,0,0)
#   and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first
#   two trimmed means is
#   equal to the sum of the second two,
#   and (2) the difference between
#   the first two is equal to the difference
#   between the trimmed means of
#   groups 5 and 6.
#
#
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
J<-length(x)
for(j in 1:J){
xx<-x[[j]]
xx[[j]]<-xx[!is.na(xx)] # Remove any missing values.
}
Jm<-J-1
d<-(J^2-J)/2
if(sum(con^2)==0){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1 #If con not specified do all pairwise comparisons
con[k,id]<-0-1
}}}
if(nrow(con)!=length(x)){
stop("The number of groups does not match the number of contrast coefficients.")
}
if(is.na(nboot)){
nboot<-5000
if(ncol(con)<=4)nboot<-2000
}
m1<-matrix(0,nrow=J,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
m1[j,]<-apply(data,1,mean,tr)
}
testb<-NA
boot<-matrix(0,ncol(con),nboot)
testvec<-NA
for (d in 1:ncol(con)){
boot[d,]<-apply(m1,2,trimpartt,con[,d])
# A vector of length nboot containing psi hat values
# and corresponding to the dth linear contrast
testb[d]<-sum((boot[d,]>0))/nboot
testvec[d]<-min(testb[d],1-testb[d])
}
#
#  Determine critical value
#
dd<-ncol(con)
if(alpha==.05){
if(dd==1)crit<-alpha/2
if(dd==2)crit<-.014
if(dd==3)crit<-.0085
if(dd==4)crit<-.007
if(dd==5)crit<-.006
if(dd==6)crit<-.0045
if(dd==10)crit<-.0023
if(dd==15)crit<-.0016
}
else{
crit<-alpha/(2*dd)
}
icl<-round(crit*nboot)
icu<-round((1-crit)*nboot)
psihat<-matrix(0,ncol(con),4)
test<-matrix(0,ncol(con),3)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
dimnames(test)<-list(NULL,c("con.num","test","crit.val"))
for (d in 1:ncol(con)){
test[d,1]<-d
psihat[d,1]<-d
testit<-lincon(x,con[,d],tr)
test[d,2]<-testvec[d]
temp<-sort(boot[d,])
psihat[d,3]<-temp[icl]
psihat[d,4]<-temp[icu]
psihat[d,2]<-testit$psihat[1,2]
test[d,3]<-crit
}
list(psihat=psihat,test=test,con=con)
}




t2waypb<-function(J,K,x,tr=.2,alpha=.05,nboot=NA,grp=NA){
#
#   Two-way ANOVA for independent groups based on trimmed
#   means and a percentile bootstrap method.

#   The data are assumed to be stored in x in list mode or in a matrix.
       #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
#
#   Missing values are automatically removed.
#
if(is.data.frame(x))x=as.matrix(x)
JK<-J*K
if(is.matrix(x))x<-listm(x)
if(!is.na(grp)){
yy<-x
for(j in 1:length(grp))
x[[j]]<-yy[[grp[j]]]
}
if(!is.list(x))stop("Data must be stored in list mode or a matrix.")
for(j in 1:JK){
xx<-x[[j]]
xx[[j]]<-xx[!is.na(xx)] # Remove any missing values.
}
#
# Create the three contrast matrices
#
       ij <- matrix(c(rep(1, J)), 1, J)
        ik <- matrix(c(rep(1, K)), 1, K)
       jm1 <- J - 1
        cj <- diag(1, jm1, J)
        for(i in 1:jm1)
                cj[i, i + 1] <- 0 - 1
        km1 <- K - 1
        ck <- diag(1, km1, K)
        for(i in 1:km1)
                ck[i, i + 1] <- 0 - 1
conA<-t(kron(cj,ik))
conB<-t(kron(ij,ck))
conAB<-t(kron(cj,ck))
ncon<-max(nrow(conA),nrow(conB),nrow(conAB))
if(JK!=length(x)){
print("Warning: The number of groups does not match")
print(" the number of contrast coefficients.")
}
if(is.na(nboot)){
nboot<-5000
if(ncon<=4)nboot<-2000
}
m1<-matrix(0,nrow=JK,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:JK){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
m1[j,]<-apply(data,1,mean,tr)
}
bootA<-matrix(0,ncol(conA),nboot)
bootB<-matrix(0,ncol(conB),nboot)
bootAB<-matrix(0,ncol(conAB),nboot)
testA<-NA
testB<-NA
testAB<-NA
testvecA<-NA
testvecB<-NA
testvecAB<-NA
for (d in 1:ncol(conA)){
bootA[d,]<-apply(m1,2,trimpartt,conA[,d])
# A vector of length nboot containing psi hat values
# corresponding to the dth linear contrast
testA[d]<-sum((bootA[d,]>0))/nboot
testA[d]<-min(testA[d],1-testA[d])
}
for (d in 1:ncol(conB)){
bootB[d,]<-apply(m1,2,trimpartt,conB[,d])
# A vector of length nboot containing psi hat values
# corresponding to the dth linear contrast
testB[d]<-sum((bootB[d,]>0))/nboot
testB[d]<-min(testB[d],1-testB[d])
}
for (d in 1:ncol(conAB)){
bootAB[d,]<-apply(m1,2,trimpartt,conAB[,d])
# A vector of length nboot containing psi hat values
# corresponding to the dth linear contrast
testAB[d]<-sum((bootAB[d,]>0))/nboot
testAB[d]<-min(testAB[d],1-testAB[d])
}
#
#  Determine critical value
#
Jm<-J-1
Km<-K-1
JKm<-(J-1)*(K-1)
dvecA <- alpha/c(1:Jm)
dvecB <- alpha/c(1:Km)
dvecAB <- alpha/c(1:JKm)
testA<-(0 - 1) * sort(-2 * testA)
testB<-(0 - 1) * sort(-2 * testB)
testAB<-(0 - 1) * sort(-2 * testAB)
sig <- sum((testA < dvecA[1:Jm]))
if(sig > 0)
print("Significant result obtained for Factor A: Reject")
if(sig == 0)
print("No significant result Factor A: Fail to reject")
sig <- sum((testB < dvecB[1:Km]))
if(sig > 0)
print("Significant result obtained for Factor B: Reject")
if(sig == 0)
print("No significant result Factor B: Fail to reject")
sig <- sum((testAB < dvec[1:JKm]))
if(sig > 0)
print("Significant Interaction: Reject")
if(sig == 0)
print("No significant Interaction: Fail to reject")
list(testA=testA,crit.vecA=dvecA,testB=testB,crit.vecB=dvecB,testAB=testAB,crit.vecAB=dvecAB)
}


t2waypbg<-function(J,K,x,alpha=.05,nboot=NA,grp=NA,est=onestep,...){
#
#   Two-way ANOVA for independent groups based on
#   robust measures of location
#   and a percentile bootstrap method.

#   The data are assumed to be stored in x in list mode or in a matrix.
       #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
#
#   Missing values are automatically removed.
#
if(is.data.frame(x))x=as.matrix(x)
JK<-J*K
if(is.matrix(x))x<-listm(x)
if(!is.na(grp)){
yy<-x
for(j in 1:length(grp))
x[[j]]<-yy[[grp[j]]]
}
if(!is.list(x))stop("Data must be stored in list mode or a matrix.")
for(j in 1:JK){
xx<-x[[j]]
xx[[j]]<-xx[!is.na(xx)] # Remove any missing values.
}
#
# Create the three contrast matrices
#
       ij <- matrix(c(rep(1, J)), 1, J)
        ik <- matrix(c(rep(1, K)), 1, K)
       jm1 <- J - 1
        cj <- diag(1, jm1, J)
        for(i in 1:jm1)
                cj[i, i + 1] <- 0 - 1
        km1 <- K - 1
        ck <- diag(1, km1, K)
        for(i in 1:km1)
                ck[i, i + 1] <- 0 - 1
conA<-t(kron(cj,ik))
conB<-t(kron(ij,ck))
conAB<-t(kron(cj,ck))
ncon<-max(nrow(conA),nrow(conB),nrow(conAB))
if(JK!=length(x)){
print("Warning: The number of groups does not match")
print("the number of contrast coefficients.")
}
if(is.na(nboot)){
nboot<-5000
if(ncon<=4)nboot<-2000
}
m1<-matrix(0,nrow=JK,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:JK){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
m1[j,]<-apply(data,1,est,...)
}
bootA<-matrix(0,ncol(conA),nboot)
bootB<-matrix(0,ncol(conB),nboot)
bootAB<-matrix(0,ncol(conAB),nboot)
testA<-NA
testB<-NA
testAB<-NA
testvecA<-NA
testvecB<-NA
testvecAB<-NA
for (d in 1:ncol(conA)){
bootA[d,]<-apply(m1,2,trimpartt,conA[,d])
# A vector of length nboot containing psi hat values
# corresponding to the dth linear contrast
testA[d]<-sum((bootA[d,]>0))/nboot
testA[d]<-min(testA[d],1-testA[d])
}
for (d in 1:ncol(conB)){
bootB[d,]<-apply(m1,2,trimpartt,conB[,d])
# A vector of length nboot containing psi hat values
# corresponding to the dth linear contrast
testB[d]<-sum((bootB[d,]>0))/nboot
testB[d]<-min(testB[d],1-testB[d])
}
for (d in 1:ncol(conAB)){
bootAB[d,]<-apply(m1,2,trimpartt,conAB[,d])
# A vector of length nboot containing psi hat values
# corresponding to the dth linear contrast
testAB[d]<-sum((bootAB[d,]>0))/nboot
testAB[d]<-min(testAB[d],1-testAB[d])
}
#
#  Determine critical value
#
Jm<-J-1
Km<-K-1
JKm<-(J-1)*(K-1)
dvecA <- alpha/c(1:Jm)
dvecB <- alpha/c(1:Km)
dvecAB <- alpha/c(1:JKm)
testA<-(0 - 1) * sort(-2 * testA)
testB<-(0 - 1) * sort(-2 * testB)
testAB<-(0 - 1) * sort(-2 * testAB)
sig <- sum((testA < dvecA[1:Jm]))
if(sig > 0)
print("Significant result obtained for Factor A: Reject")
if(sig == 0)
print("No significant result Factor A: Fail to reject")
sig <- sum((testB < dvecB[1:Km]))
if(sig > 0)
print("Significant result obtained for Factor B: Reject")
if(sig == 0)
print("No significant result Factor B: Fail to reject")
sig <- sum((testAB < dvec[1:JKm]))
if(sig > 0)
print("Significant Interaction: Reject")
if(sig == 0)
print("No significant Interaction: Fail to reject")
list(testA=testA,crit.vecA=dvecA,testB=testB,crit.vecB=dvecB,testAB=testAB,crit.vecAB=dvecAB)
}

regout<-function(x,y,regest=stsreg,plotit=TRUE,mbox=T){
#
# Check for regression outliers by fitting a
# a line to data using regest and then applying
# a boxplot rule to the residuals.
# mbox=T uses Carling's method
# mbox=F uses ideal fourths with conventional boxplot rules.
#
chk<-regest(x,y)
flag<-outbox(chk$residuals,mbox=mbox)$out.id
if(plotit){
plot(x,y)
points(x[flag],y[flag],pch="o")
abline(chk$coef)
}
list(out.id=flag)
}

stsregp1<-function(x,y,sc=pbvar,xout=FALSE,outfun=out,...){
#
# Compute the S-type modification of
# the Theil-Sen regression estimator.
# Only a single predictor is allowed in this version
#
xy=elimna(cbind(x,y))
p=ncol(as.matrix(x))
if(p!=1)stop("Current version is limited to one predictor")
p1=p+1
x=xy[,1:p]
y=xy[,p1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
ord<-order(x)
xs<-x[ord]
ys<-y[ord]
vec1<-outer(ys,ys,"-")
vec2<-outer(xs,xs,"-")
v1<-vec1[vec2>0]
v2<-vec2[vec2>0]
slope<-v1/v2
allvar<-NA
for(i in 1:length(slope))allvar[i]<-sc(y-slope[i]*x,...)
temp<-order(allvar)
coef<-0
coef[2]<-slope[temp[1]]
coef[1]<-median(y)-coef[2]*median(x)
res<-y-coef[2]*x-coef[1]
list(coef=coef,residuals=res)
}

stsreg<-function(x,y,xout=FALSE,outfun=out,iter=10,sc=pbvar,varfun=pbvar,
corfun=pbcor,plotit=FALSE,...){
#
#  Compute Theil-Sen regression estimator
#
#  Use Gauss-Seidel algorithm
#  when there is more than one predictor
#
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(ncol(x)==1){
temp1<-stsregp1(x,y,sc=sc)
coef<-temp1$coef
res<-temp1$res
}
if(ncol(x)>1){
for(p in 1:ncol(x)){
temp[p]<-tsp1reg(x[,p],y)$coef[2]
}
res<-y-x%*%temp
alpha<-median(res)
r<-matrix(NA,ncol=ncol(x),nrow=nrow(x))
tempold<-temp
for(it in 1:iter){
for(p in 1:ncol(x)){
r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p]
temp[p]<-stsregp1(x[,p],r[,p],sc=sc)$coef[2]
}
alpha<-median(y-x%*%temp)
tempold<-temp
}
coef<-c(alpha,temp)
res<-y-x%*%temp-alpha
}
yhat<-y-res
stre=NULL
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
e.pow=as.numeric(e.pow)
stre=sqrt(e.pow)
}
list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow)
}



yuend<-function(x,y,tr=.2,alpha=.05){
#
#  Compare the trimmed means of two dependent random variables
#  using the data in x and y.
#  The default amount of trimming is 20%
#
#  Any pair with a missing value is eliminated
#  The function rm2miss allows missing values.
#
#  A confidence interval for the trimmed mean of x minus the
#  the trimmed mean of y is computed and returned in yuend$ci.
#  The significance level is returned in yuend$p.value
#
#  This function uses winvar from chapter 2.
#
if(length(x)!=length(y))stop("The number of observations must be equal")
m<-cbind(x,y)
m<-elimna(m)
x<-m[,1]
y<-m[,2]
h1<-length(x)-2*floor(tr*length(x))
q1<-(length(x)-1)*winvar(x,tr)
q2<-(length(y)-1)*winvar(y,tr)
q3<-(length(x)-1)*wincor(x,y,tr)$cov
df<-h1-1
se<-sqrt((q1+q2-2*q3)/(h1*(h1-1)))
crit<-qt(1-alpha/2,df)
dif<-mean(x,tr)-mean(y,tr)
low<-dif-crit*se
up<-dif+crit*se
test<-dif/se
yuend<-2*(1-pt(abs(test),df))
list(ci=c(low,up),p.value=yuend,est1=mean(x,tr),est2=mean(y,tr),dif=dif,se=se,teststat=test,n=length(x),df=df)
}

rmmcppbtm<-function(x,alpha=.05,con=0,tr=.2,grp=NA,nboot=NA){
#
#   Using the percentile bootstrap method,
#   compute a .95 confidence interval for all linear contasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#
#   The trimmed means of dependent groups are being compared.
#   By default, 20% trimming is used.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of contrasts there are.
#
#   x can be an n by J matrix or it can have list mode
#
#   For alpha=.05, some critical values have been
#   determined via simulations and are used by this function;
#   otherwise an approximation is used.
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
}}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
mat<-x
}
if(is.matrix(x))mat<-x
if(!is.na(sum(grp)))mat<-mat[,grp]
mat<-elimna(mat) # Remove rows with missing values.
J<-ncol(mat)
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
if(is.na(crit) && tr != .2){
print("A critical value must be specified when")
stop("the amount of trimming differs from .2")
}
if(is.na(nboot)){
if(d<=3)nboot<-1000
if(d==6)nboot<-2000
if(d==10)nboot<-4000
if(d==15)nboot<-8000
if(d==21)nboot<-8000
if(d==28)nboot<-10000
}
n<-nrow(mat)
crit<-NA
if(alpha==.05){
if(d==1)crit<-alpha/2
if(d==3){
crit<-.004
if(n>=15)crit<-.006
if(n>=30)crit<-.007
if(n>=40)crit<-.008
if(n>=100)crit<-.009
}
if(d==6){
crit<-.001
if(n>=15)crit<-.002
if(n>=20)crit<-.0025
if(n>=30)crit<-.0035
if(n>=40)crit<-.004
if(n>=60)crit<-.0045
}
if(d==10){
crit<-.00025
if(n>=15)crit<-.00125
if(n>=20)crit<-.0025
}
if(d==15){
crit<-.0005
if(n>=20)crit<-.0010
if(n>=30)crit<-.0011
if(n>=40)crit<-.0016
if(n>=100)crit<-.0019
}
if(d==21){
crit<-.00025
if(n>=20)crit<-.00037
if(n>=30)crit<-.00075
if(n>=40)crit<-.00087
if(n>=60)crit<-.00115
if(n>=100)crit<-.00125
}
if(d==28){
crit<-.0004
if(n>=30)crit<-.0006
if(n>=60)crit<-.0008
if(n>=100)crit<-.001
}
}
if(is.na(crit)){
crit<-alpha/(2*d)
if(n<20)crit<-crit/2
if(n<=10)crit<-crit/2
}
icl<-ceiling(crit*nboot)+1
icu<-ceiling((1-crit)*nboot)
connum<-ncol(con)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# data is an nboot by n matrix
xbars<-matrix(0,nboot,ncol(mat))
psihat<-matrix(0,connum,nboot)
print("Taking bootstrap samples. Please wait.")
bvec<-bootdep(mat,tr,nboot)
#
# Now have an nboot by J matrix of bootstrap values.
#
test<-1
for (ic in 1:connum){
psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic])
test[ic]<-sum((psihat[ic,]>0))/nboot
test[ic]<-min(test[ic],1-test[ic])
}
print("Reminder: Test statistic must be less than critical value in order to reject.")
output<-matrix(0,connum,5)
dimnames(output)<-list(NULL,c("con.num","psihat","test","ci.lower","ci.upper"))
tmeans<-apply(mat,2,mean,trim=tr)
psi<-1
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(psihat[ic,])
output[ic,4]<-temp[icl]
output[ic,5]<-temp[icu]
}
list(output=output,crit=crit,con=con)
}

mcppb20<-function(x,crit=NA,con=0,tr=.2,alpha=.05,nboot=2000,grp=NA,WIN=FALSE,
win=.1){
#
#   Compute a 1-alpha confidence interval for a set of d linear contrasts
#   involving trimmed means using the percentile bootstrap method.
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode.  Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#
#   By default, all pairwise comparisons are performed, but contrasts
#   can be specified with the argument con.
#   The columns of con indicate the contrast coefficients.
#   Con should have J rows, J=number of groups.
#   For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first two trimmed means is
#   equal to the sum of the second two, and (2) the difference between
#   the first two is equal to the difference between the trimmed means of
#   groups 5 and 6.
#
#   The default number of bootstrap samples is nboot=2000
#
#
con<-as.matrix(con)
if(is.matrix(x)){
xx<-list()
for(i in 1:ncol(x)){
xx[[i]]<-x[,i]
}
x<-xx
}
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]]
x<-xx
}
J<-length(x)
tempn<-0
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
}
Jm<-J-1
d<-ifelse(sum(con^2)==0,(J^2-J)/2,ncol(con))
if(is.na(crit) && tr != .2){
print("A critical value must be specified when")
stop("the amount of trimming differs from .2")
}
if(WIN){
if(tr < .2){
print("Warning: When Winsorizing, the amount")
print("of trimming should be at least .2")
}
if(win > tr)stop("Amount of Winsorizing must <= amount of trimming")
if(min(tempn) < 15){
print("Warning: Winsorizing with sample sizes")
print("less than 15 can result in poor control")
print("over the probability of a Type I error")
}
for (j in 1:J){
x[[j]]<-winval(x[[j]],win)
}
}
if(is.na(crit)){
if(d==1)crit<-alpha/2
if(d==2 && alpha==.05 && nboot==1000)crit<-.014
if(d==2 && alpha==.05 && nboot==2000)crit<-.014
if(d==3 && alpha==.05 && nboot==1000)crit<-.009
if(d==3 && alpha==.05 && nboot==2000)crit<-.0085
if(d==3 && alpha==.025 && nboot==1000)crit<-.004
if(d==3 && alpha==.025 && nboot==2000)crit<-.004
if(d==3 && alpha==.01 && nboot==1000)crit<-.001
if(d==3 && alpha==.01 && nboot==2000)crit<-.001
if(d==4 && alpha==.05 && nboot==2000)crit<-.007
if(d==5 && alpha==.05 && nboot==2000)crit<-.006
if(d==6 && alpha==.05 && nboot==1000)crit<-.004
if(d==6 && alpha==.05 && nboot==2000)crit<-.0045
if(d==6 && alpha==.025 && nboot==1000)crit<-.002
if(d==6 && alpha==.025 && nboot==2000)crit<-.0015
if(d==6 && alpha==.01 && nboot==2000)crit<-.0005
if(d==10 && alpha==.05 && nboot<=2000)crit<-.002
if(d==10 && alpha==.05 && nboot==3000)crit<-.0023
if(d==10 && alpha==.025 && nboot<=2000)crit<-.0005
if(d==10 && alpha==.025 && nboot==3000)crit<-.001
if(d==15 && alpha==.05 && nboot==2000)crit<-.0016
if(d==15 && alpha==.025 && nboot==2000)crit<-.0005
if(d==15 && alpha==.05 && nboot==5000)crit<-.0026
if(d==15 && alpha==.025 && nboot==5000)crit<-.0006
}
if(is.na(crit) && alpha==.05)crit<-0.0268660714*(1/d)-0.0003321429
if(is.na(crit))crit<-alpha/(2*d)
if(d> 10 && nboot <5000){
print("Warning: Suggest using nboot=5000")
print("when the number of contrasts exceeds 10.")
}
icl<-round(crit*nboot)+1
icu<-round((1-crit)*nboot)
if(sum(con^2)==0){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
psihat<-matrix(0,ncol(con),6)
dimnames(psihat)<-list(NULL,c("con.num","psihat","se","ci.lower",
"ci.upper","p-value"))
if(nrow(con)!=length(x)){
print("The number of groups does not match")
stop("the number of contrast coefficients.")
}
bvec<-matrix(NA,nrow=J,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group
}
test<-NA
for (d in 1:ncol(con)){
top<-0
for (i in 1:J){
top<-top+con[i,d]*bvec[i,]
}
test[d]<-(sum(top>0)+.5*sum(top==0))/nboot
test[d]<-min(test[d],1-test[d])
top<-sort(top)
psihat[d,4]<-top[icl]
psihat[d,5]<-top[icu]
}
for (d in 1:ncol(con)){
psihat[d,1]<-d
testit<-lincon(x,con[,d],tr,pr=FALSE)
psihat[d,6]<-2*test[d]
psihat[d,2]<-testit$psihat[1,2]
psihat[d,3]<-testit$test[1,4]
}
list(psihat=psihat,crit.p.value=2*crit,con=con)
}

comvar2d<-function(x,y,SEED=TRUE){
#
#  Compare the variances of two dependent groups.
#
nboot<-599
m<-cbind(x,y)
m<-elimna(m) # Remove missing values
U<-m[,1]-m[,2]
V<-m[,1]+m[,2]
ci<-pcorb(U,V,SEED=SEED)$ci
list(ci=ci)
}
mom<-function(x,bend=2.24,na.rm=TRUE){
#
#  Compute MOM-estimator of location.
#  The default bending constant is 2.24
#
if(na.rm)x<-x[!is.na(x)] #Remove missing values
flag1<-(x>median(x)+bend*mad(x))
flag2<-(x<median(x)-bend*mad(x))
flag<-rep(T,length(x))
flag[flag1]<-F
flag[flag2]<-F
mom<-mean(x[flag])
mom
}


momci<-function(x,alpha=.05,nboot=2000,bend=2.24){
#
#   Compute a bootstrap, .95 confidence interval for the
#   MOM-estimator of location based on Huber's Psi.
#   The default number of bootstrap samples is nboot=500
#
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,mom,bend)
bvec<-sort(bvec)
low<-round((alpha/2)*nboot)
up<-nboot-low
low<-low+1
list(ci=c(bvec[low],bvec[up]))
}


rmanogsub<-function(isub,x,est=onestep,...){
tsub <- est(x[isub],...)
tsub
}

bd1way1<-function(isub,xcen,est,misran,...){
#
#  Compute test statistic for bd1way
#
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  xcen is an n by J matrix containing the input data
#
val<-vector("numeric")
for (j in 1:ncol(xcen))val[j]<-est(xcen[isub,j],na.rm=misran,...)
bd1way1<-(length(val)-1)*var(val)
bd1way1
}


bicovm<-function(x){
#
# compute a biweight midcovariance matrix for the vectors of
# observations in x, where x is assumed to have list mode, or
# x is an n by p matrix
#
if(is.matrix(x)){
mcov<-matrix(0,ncol(x),ncol(x))
mcor<-matrix(0,ncol(x),ncol(x))
for (i in 1:ncol(x)){
for (j in 1:ncol(x))mcov[i,j]<-bicov(x[,i],x[,j])
}
}
if(is.list(x)){
mcov<-matrix(0,length(x),length(x))
mcor<-matrix(0,length(x),length(x))
for (i in 1:length(x)){
for (j in 1:length(x))mcov[i,j]<-bicov(x[[i]],x[[j]])
}
}
for (i in 1:ncol(mcov)){
for (j in 1:ncol(mcov))mcor[i,j]<-mcov[i,j]/sqrt(mcov[i,i]*mcov[j,j])
}
list(mcov=mcov,mcor=mcor)
}


apdis<-function(m,est=sum,...){
#
# For bivariate data,
# compute distance between each pair
# of points and measure depth of a point
# in terms of its  distance to all
# other points
#
#  m is an n by 2 matrix
#  (In this version, ncol(m)=2 only, for general
#  case, use apgdis
#
m<-elimna(m)  # eliminate any missing values
disx<-outer(m[,1],m[,1],"-")
disy<-outer(m[,2],m[,2],"-")
temp<-sqrt(disx^2+disy^2)
dis<-apply(temp,1,est,...)
dis
temp2<-order(dis)
center<-m[temp2[1],]
list(center=center,distance=dis)
}

onesampb<-function(x,est=onestep,alpha=.05,nboot=2000,SEED=TRUE,nv=0,...){
#
#   Compute a bootstrap, .95 confidence interval for the
#   measure of location corresponding to the argument est.
#   By default, a one-step
#   M-estimator of location based on Huber's Psi is used.
#   The default number of bootstrap samples is nboot=500
#
#    nv=null value when  computing a p-value
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
x=elimna(x)
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,est,...)
bvec<-sort(bvec)
low<-round((alpha/2)*nboot)
up<-nboot-low
low<-low+1
pv=mean(bvec>nv)+.5*mean(bvec==nv)
pv=2*min(c(pv,1-pv))
estimate=est(x,...)
list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv)
}


pdep<-function(x,y,alpha=.05){
#
# For two dependent variables, x and y,
# estimate p=P(X<Y)
#
dif<-(x<y)
temp<-binomci(y=dif)
phat<-temp$phat
ci<-temp$ci
list(phat=phat,ci=ci)
}

tsplit<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K){
#  Perform a J by K anova on trimmed means with
#  repeated measures on the second factor. That is, a split-plot design
#  is assumed, with the first factor consisting of independent groups.
#
#  The  variable data is assumed to contain the raw
#  data stored in list mode or a matrix.
#  If in list mode, data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(is.data.frame(data))data=as.matrix(data)
x<-data
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                data <- y
        }
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp)){
print("Apparently a subset of the groups was specified")
print("that does not match the total number of groups ")
stop("indicated by the values for J and K.")
}
tmeans<-0
h<-0
v<-matrix(0,p,p)
klow<-1-K
kup<-0
for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr,na.rm=TRUE)
for (j in 1:J){
h[j]<-length(data[[grp[j]]])-2*floor(tr*length(data[[grp[j]]]))
#    h is the effective sample size for the jth level of factor A
#   Use covmtrim to determine blocks of squared standard errors and
#   covariances.
klow<-klow+K
kup<-kup+K
sel<-c(klow:kup)
v[sel,sel]<-covmtrim(data[grp[klow:kup]],tr)
}
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,ik)  # Contrast matrix for factor A
Qa<-johansp(cmat,tmeans,v,h,J,K)
# Do test for factor B
cmat<-kron(ij,ck)  # Contrast matrix for factor B
Qb<-johansp(cmat,tmeans,v,h,J,K)
# Do test for factor A by B interaction
cmat<-kron(cj,ck)  # Contrast matrix for factor A by B
Qab<-johansp(cmat,tmeans,v,h,J,K)
list(Qa=Qa$teststat,Qa.siglevel=Qa$siglevel,
Qb=Qb$teststat,Qb.siglevel=Qb$siglevel,
Qab=Qab$teststat,Qab.siglevel=Qab$siglevel)
}


gvar<-function(m){
#
# Compute the generalized variance of a matrix m
#
m<-elimna(m)
temp<-var(m)
gvar<-prod(eigen(temp)$values)
gvar
}

mgvfreg<-function(x,y,regfun=tsreg,outfun=outbox,plotit=TRUE){
#
# Do regression on points not labled outliers
# Use the faster inward mgv method
#
m<-cbind(x,y)
m<-elimna(m) # elminate any rows with missing data
flag<-outmgvf(m,outfun=outfun,plotit,SEED=SEED)$out.id
ivec<-rep(T,nrow(m))
ivec[flag]<-F
x<-as.matrix(x)
temp<-regfun(x[ivec,],y[ivec])
coef<-temp$coef
if(plotit && ncol(m)==2)abline(coef)
residuals<-temp$residuals
list(coef=coef,residuals=residuals)
}

indepth<-function(m){
#
# Compute the inward depth of all points in m
# based on the generalized variance.
#
m<-as.matrix(m)
dep<-NA
n<-nrow(m)
flag<-rep(T,n)
for(i in 1:n){
flag[i]<-F
dep[i]<-gvar(m[flag,])
flag[i]<-T
}
dep
}

regbootg<-function(isub,x,y,regfun,...){
#
#  Perform regression using x[isub] to predict y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  regfun is some regression method already stored in R
#  It is assumed that regfun$coef contains the  intercept and slope
#  estimates produced by regfun.  The regression methods written for
#  this  book, plus regression functions in R, have this property.
#
#  x is assumed to be a matrix containing values of the predictors.
#
#xmat<-matrix(x[isub,],nrow(x),ncol(x))
xmat<-x[isub,]
yy<-y[isub]
regboot<-regfun(xmat,y[isub])$coef
#regboot<-regboot$coef
regboot
}

rregci<-function(x,y,regfun=chreg,nboot=599,alpha=.05){
#
#   Compute a .95 confidence interval for each of the parameters of
#   a linear regression equation. The default regression method is
#   a bounded influence M-regression with Schweppe weights
#   (the Coakley-Hettmansperger estimator).
#
#   When using the least squares estimator, and when n<250, use
#   lsfitci instead.
#
#   The predictor values are assumed to be in the n by p matrix x.
#   The default number of bootstrap samples is nboot=599
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimated of
#   the first predictor, etc.
#
x<-as.matrix(x)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regboot,x,y,regfun,...) # A p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
p1<-ncol(x)+1
regci<-matrix(0,p1,2)
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
se<-NA
pvec<-NA
for(i in 1:p1){
bsort<-sort(bvec[i,])
pvec[i]<-sum(bvec[i,]<0)/nboot
if(pvec[i]>.5)pvec[i]<-1-pvec[i]
regci[i,1]<-bsort[ilow]
regci[i,2]<-bsort[ihi]
se[i]<-sqrt(var(bvec[i,]))
}
pvec<-2*pvec
list(regci=regci,p.value=pvec,se=se)
}


pbcan<-function(x,nboot=1000,grp=NA,est=onestep,...){
#
#   Test the hypothesis that J independent groups have
#   equal measures of location using the percentile bootstrap method.
#   in conjunction with a partially centering technique.
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   est is the measure of location and defaults to an M-estimator
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]]
x<-xx
}
J<-length(x)
tempn<-0
vecm<-0
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
vecm[j]<-est(x[[j]],...)
}
xcen<-list()
flag<-rep(T,J)
for(j in 1:J){
flag[j]<-F
temp<-mean(vecm[flag])
xcen[[j]]<-x[[j]]-temp
flag[j]<-T
}
icrit<-round((1-alpha)*nboot)
bvec<-matrix(NA,nrow=J,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
paste("Working on group ",j)
data<-matrix(sample(xcen[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group
}
vvec<-NA
for(j in 1:J){
vvec[j]<-sum((bvec[j,]-vecm[j])^2)/(nboot-1)
}
dis<-NA
for(i in 1:nboot){
dis[i]<-sum((bvec[,i]-vecm)^2/vvec)
}
tvec<-sum((0-vecm)^2/vvec)
dis<-sort(dis)
print(tvec)
print(dis[icrit])
print(vecm)
sig<-1-sum((tvec>=dis))/nboot
list(p.value=sig)
}

ddep<-function(x,est=onestep,alpha=.05,grp=NA,nboot=2000,plotit=TRUE,SEED=TRUE,pr=TRUE,...){
#
#   Do ANOVA on dependent groups
#   using the partially centered method plus
#   depth of zero among  bootstrap values.
#
#   Might not be level robust when there is heteroscedasticity
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=500
#
if(pr)print("Warning: Might not be level robust if there is heteroscedasticity and n is small")
if(pr)print("To reduce this problem, use ddepv2")
if(is.list(x)){
nv<-NA
for(j in 1:length(x))nv[j]<-length(x[[j]])
if(var(nv) !=0){
stop("The groups are stored in list mode and appear to have different sample sizes")
}
temp<-matrix(NA,ncol=length(x),nrow=nv[1])
for(j in 1:length(x))temp[,j]<-x[[j]]
x<-temp
}
J<-ncol(x)
if(!is.na(grp[1])){ #Select the groups of interest
J<-length(grp)
for(j in 1:J)temp[,j]<-x[,grp[j]]
x<-temp
}
x<-elimna(x) # Remove any rows with missing values.
bvec<-matrix(0,ncol=J,nrow=nboot)
hval<-vector("numeric",J)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
n<-nrow(x)
totv<-apply(x,2,est,...)
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot)bvec[ib,]<-apply(x[data[ib,],],2,est,...) #nboot by J matrix
gv<-rep(mean(totv),J) #Grand mean
bplus<-nboot+1
m1<-rbind(bvec,gv)
center<-totv
cmat<-var(bvec)
discen<-mahalanobis(m1,totv,cmat)
print("Bootstrap complete; computing significance level")
if(plotit && ncol(x)==2){
plot(bvec,xlab="Group 1",ylab="Group 2")
temp.dis<-order(discen[1:nboot])
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
sig.level<-sum(discen[bplus]<=discen)/bplus
list(p.value=sig.level,center=totv,grand.mean=gv)
}

rmaseq<-function(x,est=onestep,alpha=.05,grp=NA,nboot=NA,...){
#
#   Using the percentile bootstrap method,
#   test hypothesis that all marginal distributions
#   among J dependent groups
#   have a common measure of location.
#   This is done by using a sequentially rejective method
#   of J-1 pairs of groups.
#   That is, compare group 1 to group 2, group 2 to group 3, etc.
#
#   By default, onestep M-estimator is used.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of groups
#
#   x can be an n by J matrix or it can have list mode
#   grp can be used to specify a subset of the groups for analysis
#
#   the argument ... can be used to specify options associated
#   with the argument est.
#
if(!is.list(x) && !is.matrix(x)){
stop("Data must be stored in a matrix or in list mode.")
}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat<-x
mat<-elimna(mat) # Remove rows with missing values.
J<-ncol(mat)
Jm<-J-1
con<-matrix(0,ncol=Jm,nrow=J)
for(j in 1:Jm){
jp<-j+1
for(k in j:jp){
con[j,j]<-1
con[jp,j]<-0-1
}}
rmmcp(x,est=est,alpha=alpha,con=con,nboot=nboot,...)
}

rmanog<-function(x,alpha=.05,est=onestep,grp=NA,nboot=NA,...){
#
#   Using the percentile bootstrap method,
#   test the hypothesis that all differences among J
#   dependent groups have a
#   measure of location equal to zero.
#   That is, if
#   Dij is the difference between ith observations
#   in groups j and j+1,
#   and Dij has measure of location  muj
#   the goal is to test
#   H0: mu1=mu2=...=0
#
#   By default, an M-estimator is used.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of groups
#
#   x can be an n by J matrix or it can have list mode
#   grp can be used to specify a subset of the groups for analysis
#
#   the argument ... can be used to specify options associated
#   with the argument est.
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat<-x
mat<-elimna(mat) # Remove rows with missing values.
J<-ncol(mat)
Jm<-J-1
jp<-0
dif<-matrix(NA,nrow=nrow(mat),ncol=Jm)
for(j in 1:Jm){
jp<-j+1
dif[,j]<-mat[,j]-mat[,jp]
}
if(is.na(nboot)){
nboot<-5000
if(Jm <= 4)nboot<-1000
}
print("Taking bootstrap samples. Please wait.")
data <- matrix(sample(nrow(mat), size = nrow(mat) * nboot, replace = T),
                nrow = nboot)
bvec <- matrix(NA, ncol = ncol(dif), nrow = nboot)
        for(j in 1:ncol(dif)) {
                temp <- dif[, j]
                bvec[, j] <- apply(data, 1., rmanogsub, temp, est)
        }  #bvec is an nboot by Jm matrix
testvec<-NA
for(j in 1:Jm){
testvec[j]<-sum(bvec[,j]>0)/nboot
if(testvec[j] > .5)testvec[j]<-1-testvec[j]
}
critvec<-alpha/c(1:Jm)
#testvec<-2*testvec[order(-1*testvec)]
test<-2*testvec
test.sort<-order(-1*test)
chk<-sum((test.sort <= critvec))
if(chk > 0)print("Significant difference found")
output<-matrix(0,Jm,6)
dimnames(output)<-list(NULL,c("con.num","psihat","sig","crit.sig","ci.lower","ci.upper"))
tmeans<-apply(dif,2,est,...)
psi<-1
output[,2]<-tmeans
for (ic in 1:Jm){
output[ic,1]<-ic
output[ic,3]<-test[ic]
crit<-critvec[ic]
output[test.sort[ic],4]<-crit
}
for(ic in 1:Jm){
icrit<-output[ic,4]
icl<-round(icrit*nboot/2)+1
icu<-round((1-icrit/2)*nboot)
temp<-sort(bvec[,ic])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
list(output=output)
}

ecor<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,outkeep=FALSE,outfun=outmgvf){
#
# Estimate the explanatory correlation between x and y
#
# It is assumed that x is a vector or a matrix having one column only
xx<-elimna(cbind(x,y)) # Remove rows with missing values
x<-xx[,1]
y<-xx[,2]
x<-as.matrix(x)
if(ncol(x) > 1)stop("x must be a vector or matrix with one column")
flag<-rep(T,nrow(x))
if(!outkeep){
temp<-outfun(cbind(x,y))$out.id
flag[temp]<-F
}
coef<-regfun(x,y)$coef
ip<-ncol(x)+1
yhat<-x %*% coef[2:ip] + coef[1]
if(pcor)epow2<-cor(yhat[flag],y[flag])^2
if(!pcor)epow2<-corfun(yhat[flag],y[flag])$cor^2
ecor<-sqrt(epow2)*sign(coef[2])
ecor
}
ocor<-function(x,y,corfun=pbcor,outfun=outmgvf,pcor=FALSE,plotit=FALSE){
#
#  Compute a correlation when outliers are ignored.
#
xx<-elimna(cbind(x,y)) # Remove rows with missing values
x<-xx[,1]
y<-xx[,2]
flag<-rep(T,length(x))
temp<-outfun(cbind(x,y),plotit=plotit)$out.id
flag[temp]<-F
if(pcor)ocor<-cor(x[flag],y[flag])
if(!pcor)ocor<-corfun(x[flag],y[flag])$cor
list(cor=ocor)
}


rmdzero<-function(x,est=onestep,grp=NA,nboot=500,SEED=TRUE,...){
#
#   Do ANOVA on dependent groups
#   using #   depth of zero among  bootstrap values
#   based on difference scores.
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=500
#
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat<-x
if(!is.na(grp[1])){
mat<-mat[,grp]
}
mat<-elimna(mat) # Remove rows with missing values.
J<-ncol(mat)
jp<-0
Jall<-(J^2-J)/2
dif<-matrix(NA,nrow=nrow(mat),ncol=Jall)
ic<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic<-ic+1
dif[,ic]<-mat[,j]-mat[,k]
}}}
dif<-as.matrix(dif)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data <- matrix(sample(nrow(mat), size = nrow(mat) * nboot, replace = T),
                nrow = nboot)
bvec <- matrix(NA, ncol = ncol(dif), nrow = nboot)
        for(j in 1:ncol(dif)) {
paste("Working on contrast ",j, "of ",ncol(dif))
                temp <- dif[, j]
                bvec[, j] <- apply(data, 1., rmanogsub, temp, est)
        }  #bvec is an nboot by Jm matrix
center<-apply(dif,2,est,...)
bcen<-apply(bvec,2,mean)
cmat<-var(bvec-bcen+center)
zvec<-rep(0,Jall)
m1<-rbind(bvec,zvec)
bplus<-nboot+1
discen<-mahalanobis(m1,center,cmat)
sig.level<-sum(discen[bplus]<=discen)/bplus
list(p.value=sig.level,center=center)
}

con2way<-function(J,K){
#
# For a  J by K ANOVA design
# create the contrast coefficients for
# doing all pairwise comparisons of
# main effects for Factor A and B and all interactions
#
JK <- J * K
Ja<-(J^2-J)/2
Ka<-(K^2-K)/2
JK<-J*K
conA<-matrix(0,nrow=JK,ncol=Ja)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
ic<-ic+1
mat<-matrix(0,nrow=J,ncol=K)
mat[j,]<-1
mat[jj,]<-0-1
conA[,ic]<-t(mat)
}}}
conB<-matrix(0,nrow=JK,ncol=Ka)
ic<-0
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
mat<-matrix(0,nrow=J,ncol=K)
mat[,k]<-1
mat[,kk]<-0-1
conB[,ic]<-t(mat)
}}}
conAB<-matrix(0,nrow=JK,ncol=Ka*Ja)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
mat<-matrix(0,nrow=J,ncol=K)
mat[j,k]<-1
mat[j,kk]<-0-1
mat[jj,k]<-0-1
mat[jj,kk]<-1
}
conAB[,ic]<-t(mat)
}}}}}
list(conA=conA,conB=conB,conAB=conAB)
}

outmve<-function(x,mve.flag=TRUE,plotit=TRUE,SEED=TRUE){
#
#  Search for outliers using the minimum volume ellipsoid method.
#
#  x is an n by p matrix
#
#  The function returns the number of the rows in x that are identified
#  as outliers. (The row numbers are stored in outliers.)
#  It also returns the distance of the points identified as outliers
#  in the variable dis.
#
#  If mve.flag=T, use the mve estimator, otherwise use the mcd
#
#  If plotit=TRUE, plot points and circle outliers.
#
library(MASS)
if(SEED)set.seed(12)
if(!is.matrix(x)){
x<-x[!is.na(x)]
dis<-mahalanobis(x,median(x),mad(x)^2)
crit<-sqrt(qchisq(.975,1))
vec<-c(1:length(x))
}
if(is.matrix(x)){
x<-elimna(x) # remove any missing values
if(mve.flag)mve<-cov.mve(x)
if(!mve.flag)mve<-cov.mcd(x)
dis<-mahalanobis(x,mve$center,mve$cov)
crit<-sqrt(qchisq(.975,ncol(x)))
vec<-c(1:nrow(x))
}
dis<-sqrt(dis)
chk<-ifelse(dis>crit,1,0)
id<-vec[chk==1]
keep<-vec[chk==0]
x<-as.matrix(x)
if(plotit && ncol(x)==2){
plot(x[,1],x[,2],xlab="X",ylab="Y",type="n")
flag<-rep(T,nrow(x))
flag[id]<-F
points(x[flag,1],x[flag,2])
if(sum(chk)!=0)points(x[!flag,1],x[!flag,2],pch="o")
}
list(out.id=id,keep.id=keep,dis=dis,crit=crit)
}

rundis<-function(x,y,est=onestep,plotit=TRUE,pyhat=FALSE,...){
#
# Do a smooth where x is discrete with a
# relatively small number of values.
#
temp<-sort(unique(x))
yhat<-NA
for(i in 1:length(temp)){
flag<-(temp[i]==x)
yhat[i]<-est(y[flag],...)
}
plot(x,y)
lines(temp,yhat)
output<-"Done"
if(pyhat)output<-yhat
output
}

bdm<-function(x,grp=NA){
#
# Perform the Brunner, Dette, Munk rank-based ANOVA
# (JASA, 1997, 92, 1494--1502)
#
# x can be a matrix with columns corresponding to groups
# or it can have list mode.
#
if(is.matrix(x))x<-listm(x)
J<-length(x)
xx<-list()
if(is.na(grp[1]))grp<-c(1:J)
for(j in 1:J)xx[[j]]<-x[[grp[j]]]
Ja<-matrix(1,J,J)
Ia<-diag(1,J)
Pa<-Ia-Ja/J
cona<-Pa
outA<-bdms1(xx,cona)
outA
}
cori<-function(x,y,z,pt=median(z),fr=.8,est=onestep,corfun=pbcor,testit=FALSE,
nboot=599,sm=FALSE,xlab="X",ylab="Y",...){
#
# Split the data according to whether z is < or > pt, then
# use runmean2g to plot a smooth of the regression
# lines corresponding to these two groups.
#
# If testit=T, the hypothesis of equal correlations is tested using the
#  the R function twocor
#
m<-cbind(x,y,z)
m<-elimna(m)
x<-m[,1]
y<-m[,2]
z<-m[,3]
flag<-(z<pt)
runmean2g(x[flag],y[flag],x[!flag],y[!flag],fr=fr,est=est,sm=sm,
xlab=xlab,ylab=ylab,...)
output<-"Done"
if(testit){
output<-twocor(x[flag],y[flag],x[!flag],y[!flag],corfun=corfun,nboot=nboot,plotit=FALSE)
}
output
}


wsp1reg<-function(x,y,plotit=FALSE){
#
# Compute the Wilcoxon R estimate of the slope
# Only a single predictor is allowed in this version
#
temp<-matrix(c(x,y),ncol=2)
temp<-elimna(temp)     # Remove any pairs with missing values
x<-temp[,1]
y<-temp[,2]
ord<-order(x)
xs<-x[ord]
ys<-y[ord]
vec1<-outer(ys,ys,"-")
vec2<-outer(xs,xs,"-")
v1<-vec1[vec2>0]
v2<-vec2[vec2>0]
slope<-v1/v2
tmin<-wrregfun(slope[1],x,y)
ikeep<-1
for(i in 2:length(slope)){
tryit<-wrregfun(slope[i],x,y)
if(tryit<tmin){
tmin<-tryit
ikeep<-i
}}
coef<-NA
coef[2]<-slope[ikeep]
coef[1]<-median(y-coef[2]*x)
if(plotit){
plot(x,y,xlab="X",ylab="Y")
abline(coef)
}
res<-y-coef[2]*x-coef[1]
list(coef=coef,residuals=res)
}

wreg<-function(x,y,iter=10){
#
#  Compute Wilcoxon R estimate
#
#  Use Gauss-Seidel algorithm
#  when there is more than one predictor
#
#  Argument iter is used when there is more than one
#  predictor and indicates maximum number
#  of iterations used by Gauss-Seidel algoritm.
#
temp<-NA
x<-as.matrix(x)
if(ncol(x)==1){
temp1<-wsp1reg(x,y)
coef<-temp1$coef
res<-temp1$res
}
if(ncol(x)>1){
for(p in 1:ncol(x)){
temp[p]<-wsp1reg(x[,p],y)$coef[2]
}
res<-y-x%*%temp
alpha<-median(res)
r<-matrix(NA,ncol=ncol(x),nrow=nrow(x))
tempold<-temp
for(it in 1:iter){
for(p in 1:ncol(x)){
r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p]
temp[p]<-wsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2]
}
alpha<-median(y-x%*%temp)
if(max(abs(tempold-temp))<.0001)break
tempold<-temp
}
coef<-c(alpha,temp)
res<-y-x%*%temp-alpha
}
list(coef=coef,residuals=res)
}

mgvar<-function(m,se=FALSE,op=0,cov.fun=covmve,SEED=TRUE){
#
# Find the center of a scatterplot, add point that
# increases the generalized variance by smallest amount
# continue for all points
# return the generalized variance
#  values corresponding to each point.
# The central values and point(s) closest to it get NA
#
# op=0 find central points using pairwise differences
# op!=0 find central points using measure of location
# used by cov.fun
#
# choices for cov.fun include
# covmve
# covmcd
# tbs (Rocke's measures of location
# rmba (Olive's median ball algorithm)
#
if(op==0)temp<-apgdis(m,se=se)$distance
if(op!=0)temp<-out(m,cov.fun=cov.fun,plotit=FALSE,SEED=SEED)$dis
flag<-(temp!=min(temp))
temp2<-temp
temp2[!flag]<-max(temp)
flag2<-(temp2!=min(temp2))
flag[!flag2]<-F
varvec<-NA
while(sum(flag)>0){
ic<-0
chk<-NA
remi<-NA
for(i in 1:nrow(m)){
if(flag[i]){
ic<-ic+1
chk[ic]<-gvar(rbind(m[!flag,],m[i,]))
remi[ic]<-i
}}
sor<-order(chk)
k<-remi[sor[1]]
varvec[k]<-chk[sor[1]]
flag[k]<-F
}
varvec
}

outmgv<-function(x,y=NULL,plotit=TRUE,outfun=outbox,se=TRUE,op=1,
cov.fun=rmba,xlab="X",ylab="Y",SEED=TRUE,STAND=FALSE,...){
#
# Check for outliers using mgv method
#
# NOTE: if columns of the input matrix are reordered, this can
# have an effect on the results due to rounding error when calling
# the R function eigen.
#
#  (Argument STAND is included simply to avoid programming issues when outmgv is called by other functions.)
#
if(is.null(y[1]))m<-x
if(!is.null(y[1]))m<-cbind(x,y)
m=elimna(m)
m=as.matrix(m)
nv=nrow(m)
temp<-mgvar(m,se=se,op=op,cov.fun=cov.fun,SEED=SEED)
#if(fast)temp<-mgvdep.for(m,se=se)$distance
temp[is.na(temp)]<-0
if(ncol(m)==1){
temp2=outpro(m)
nout=temp2$n.out
keep=temp2$keep
temp2=temp2$out.id
}
if(ncol(m)>1){
if(ncol(m)==2)temp2<-outfun(temp,...)$out.id
if(ncol(m)>2)temp2<-outbox(temp,mbox=TRUE,gval=sqrt(qchisq(.975,ncol(m))))$out.id
vec<-c(1:nrow(m))
flag<-rep(T,nrow(m))
flag[temp2]<-F
vec<-vec[flag]
vals<-c(1:nrow(m))
keep<-vals[flag]
if(plotit && ncol(m)==2){
x<-m[,1]
y<-m[,2]
plot(x,y,type="n",xlab=xlab,ylab=ylab)
flag<-rep(T,length(y))
flag[temp2]<-F
points(x[flag],y[flag],pch="*")
points(x[temp2],y[temp2],pch="o")
}
nout=0
if(!is.na(temp2[1]))nout=length(temp2)
}
list(n=nv,n.out=nout,out.id=temp2,keep=keep)
}

outmgvf<-function(x,y=NA,plotit=TRUE,outfun=outbox,se=TRUE,...){
#
# Check for outliers using inward mgv method
# This method is faster than outmgv.
#
if(is.na(y[1]))m<-x
if(!is.na(y[1]))m<-cbind(x,y)
m<-elimna(m) # eliminate any rows with missing datatemp2<-out
if(se){
for(i in 1:ncol(m))m[,i]<-(m[,i]-median(m[,i]))/mad(m[,i])
}
iflag<-rep(T,nrow(m))
dval<-0
for(i in 1:nrow(m)){
iflag[i]<-F
dval[i]<-gvar(m[iflag,])
iflag[i]<-T
}
temp2<-outfun(dval,...)$out.id
vals<-c(1:nrow(m))
flag3<-rep(T,nrow(m))
flag3[temp2]<-F
keep<-vals[flag3]
if(plotit && ncol(m)==2){
x<-m[,1]
y<-m[,2]
plot(x,y,type="n",xlab="X",ylab="Y")
flag<-rep(T,length(y))
flag[temp2]<-F
points(x[flag],y[flag])
points(x[temp2],y[temp2],pch="o")
}
list(out.id=temp2,keep=keep,out.val=m[temp2,],depth.values=dval)
}

epow<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,outkeep=FALSE,outfun=outmgvf,varfun=pbvar,op=TRUE){
#
# Estimate the explanatory power between x and y
#
xx<-elimna(cbind(x,y))
pval<-1
if(is.matrix(x))pval<-ncol(x)
pp<-pval+1
x<-xx[,1:pval]
y<-xx[,pp]
x<-as.matrix(x)
flag<-rep(T,nrow(x))
temp<-regfun(x,y)
ip<-ncol(x)+1
yhat<-y-temp$res
if(!outkeep){
temp<-outfun(cbind(x,y),plotit=FALSE)$out.id
flag[temp]<-F
}
epow1<-varfun(yhat[flag])/varfun(y[flag])
if(pcor)epow2<-cor(yhat[flag],y[flag])^2
if(!pcor)epow2<-corfun(yhat[flag],y[flag])$cor^2
if(op)est<-epow2
if(!op)est<-epow1
est
}

cmanova<-function(J,K,x,grp=c(1:JK),JK=J*K){
#
# Perform the Choi and Marden
# multivariate one-way rank-based ANOVA
# (Choi and Marden, JASA, 1997, 92, 1581-1590.
#
# x can be a matrix with columns corresponding to groups
# or it can have list mode.
#
# Have a J by K design with J independent levels and K dependent
# measures
#
#
x=elimna(x)
if(is.matrix(x))x<-listm(x)
xx<-list()
nvec<-NA
jk<-0
for(j in 1:J){
for(k in 1:K){
jk<-jk+1
xx[[jk]]<-x[[grp[jk]]]
if(k==1)nvec[j]<-length(xx[[jk]])
}}
N<-sum(nvec)
RVALL<-matrix(0,nrow=N,K)
x<-xx
jk<-0
rmean<-matrix(NA,nrow=J,ncol=K)
for(j in 1:J){
RV<-matrix(0,nrow=nvec[j],ncol=K)
jk<-jk+1
temp1<-matrix(x[[jk]],ncol=1)
for(k in 2:K){
jk<-jk+1
temp1<-cbind(temp1,x[[jk]])
}
X<-temp1
if(j==1)XALL<-X
if(j>1)XALL<-rbind(XALL,X)
n<-nvec[j]
for(i in 1:n){
for (ii in 1:n){
temp3<-sqrt(sum((X[i,]-X[ii,])^2))
if(temp3 != 0)RV[i,]<-RV[i,]+(X[i,]-X[ii,])/temp3
}
RV[i,]<-RV[i,]/nvec[j]
if(j==1 && i==1)sighat<-RV[i,]%*%t(RV[i,])
if(j>1 || i>1)sighat<-sighat+RV[i,]%*%t(RV[i,])
}
}
# Assign ranks to pooled data and compute R bar for each group
for(i in 1:N){
for (ii in 1:N){
temp3<-sqrt(sum((XALL[i,]-XALL[ii,])^2))
if(temp3 != 0)RVALL[i,]<-RVALL[i,]+(XALL[i,]-XALL[ii,])/temp3
}
RVALL[i,]<-RVALL[i,]/N
}
bot<-1-nvec[1]
top<-0
for(j in 1:J){
bot<-bot+nvec[j]
top<-top+nvec[j]
flag<-c(bot:top)
rmean[j,]<-apply(RVALL[flag,],2,mean)
}
sighat<-sighat/(N-J)
shatinv<-solve(sighat)
KW<-0
for(j in 1:J){
KW<-KW+nvec[j]*t(rmean[j,])%*%shatinv%*%rmean[j,]
}
df<-K*(J-1)
sig.level<-1-pchisq(KW,df)
list(test.stat=KW[1,1],df=df,p.value=sig.level)
}



rimul<-function(J,K,x,alpha=.05,p=J*K,grp=c(1:p)){
#
#  Rank-based multiple comparisons for all interactions
#  in J by K design. The method is based on an
#  extension of Cliff's heteroscedastic technique for
#  handling tied values.
#
#  The familywise type I error probability is controlled by using
#  a critical value from the Studentized maximum modulus distribution.
#
#  It is assumed all groups are independent.
#
#  Missing values are automatically removed.
#
#  The default value for alpha is .05. Any other value results in using
#  alpha=.01.
#
#  Argument grp can be used to rearrange the order of the data.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
CCJ<-(J^2-J)/2
CCK<-(K^2-K)/2
CC<-CCJ*CCK
test<-matrix(NA,CC,7)
test.p<-matrix(NA,CC,7)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
}
mat<-matrix(grp,ncol=K,byrow=T)
dimnames(test)<-list(NULL,c("Factor A","Factor A","Factor B","Factor B","delta","ci.lower","ci.upper"))
jcom<-0
crit<-smmcrit(200,CC)
if(alpha!=.05)crit<-smmcrit01(200,CC)
alpha<-1-pnorm(crit)
for (j in 1:J){
for (jj in 1:J){
if (j < jj){
for (k in 1:K){
for (kk in 1:K){
if (k < kk){
jcom<-jcom+1
test[jcom,1]<-j
test[jcom,2]<-jj
test[jcom,3]<-k
test[jcom,4]<-kk
temp1<-cid(x[[mat[j,k]]],x[[mat[j,kk]]])
temp2<-cid(x[[mat[jj,k]]],x[[mat[jj,kk]]])
delta<-temp2$d-temp1$d
sqse<-temp1$sqse.d+temp2$sqse.d
test[jcom,5]<-delta/2
test[jcom,6]<-delta/2-crit*sqrt(sqse/4)
test[jcom,7]<-delta/2+crit*sqrt(sqse/4)
}}}}}}
list(test=test)
}

signt<-function(x,y=NULL,alpha=.05,AC=FALSE){
#
#  Do a sign test on data in x and y
#  If y=NA, assume x is a matrix with
#  two columns or has list mode.
#
#  Returns n, the original sample size
#  N, number of paired observations that are not equal to one another.
#  phat, an estimate of p, the probability that x<y.
#  ci, a confidence interval for p
#
#  Pratt's method is used by default.
#  AC=T, the Agresti-Coull method is used instead.
#
if(is.null(y[1])){
if(ncol(as.matrix(x))!=2)stop('y is null so x should be a matrix or data frame with two columns')
if(is.matrix(x)||is.data.frame(x))dif<-x[,1]-x[,2]
if(is.list(x))dif<-x[[1]]-x[[2]]
}
if(!is.null(y[1]))dif<-x-y
dif=elimna(dif)
n<-length(dif)
dif<-dif[dif!=0]  # Remove any zero values.
flag<-(dif<0)
if(!AC)temp<-binomci(y=flag,alpha=alpha)
if(AC)temp<-acbinomciv2(y=flag,alpha=alpha)
list(Prob_x_less_than_y=temp$phat,ci=temp$ci,n=n,N=length(flag))
}

signtpv<-function(x,y,nullval=.5,alpha=.05,AC=FALSE){
#
#  Sign test for two dependent groups.
# Same as the R function signt, only a p-value is returned.
#
ci<-signt(x,y,alpha=alpha,AC=AC)
alph<-c(1:99)/100
for(i in 1:99){
irem<-i
chkit<-signt(x,y,alpha=alph[i],AC=AC)$ci
if(chkit[1]>nullval || chkit[2]<nullval)break
}
p.value<-irem/100
if(p.value<=.1){
iup<-(irem+1)/100
alph<-seq(.001,iup,.001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-signt(x,y,alpha=alph[i],AC=AC)$ci
if(chkit[1]>nullval || chkit[2]<nullval)break
}}
if(p.value<=.001){
alph<-seq(.0001,.001,.0001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-signt(x,y,alpha=alph[i],AC=AC)$ci
if(chkit[1]>nullval || chkit[2]<nullval)break
}}
list(output=ci,p.value=p.value)
}


sisplit<-function(J,K,x){
#
# Check for interactions by comparing binomials
# Here, have J by K (between by within) design
# Only alpha=.05 is allowed.
#
p<-J*K
connum<-(J^2-J)*(K^2-K)/4
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode")
imap<-matrix(c(1:p),J,K,byrow=T)
outm<-matrix(NA,ncol=8,nrow=connum)
dimnames(outm)<-list(NULL,c("Fac.A","Fac.A","Fac.B","Fac.B","p1","p2","ci.low","ci.up"))
ic<-0
if(connum <= 28)qval<-smmcrit(999,connum)
if(connum > 28)qval<-2.383904*connum^.1-.202
aval<-4*(1-pnorm(qval))
if(J==2 && K==2)aval<-.05
if(J==5 && K==2)aval<-2*(1-pnorm(qval))
if(J==3 && K==2)aval<-3*(1-pnorm(qval))
if(J==4 && K==2)aval<-3*(1-pnorm(qval))
if(J==2 && K==3)aval<-3*(1-pnorm(qval))
for (j in 1:J){
for (jj in 1:J){
if(j<jj){
for (k in 1:K){
for (kk in 1:K){
if(k<kk){
dif<-x[[imap[j,k]]]-x[[imap[j,kk]]]
dif<-dif[dif!=0]
dif1<-(dif<0)
dif<-(x[[imap[jj,k]]]-x[[imap[jj,kk]]])
dif<-dif[dif!=0]
dif2<-(dif<0)
ic<-ic+1
outm[ic,1]<-j
outm[ic,2]<-jj
outm[ic,3]<-k
outm[ic,4]<-kk
temp<-twobici(x=dif1,y=dif2,alpha=aval)
outm[ic,5]<-temp$p1
outm[ic,6]<-temp$p2
outm[ic,7]<-temp$ci[1]
outm[ic,8]<-temp$ci[2]
}}}}}}
outm
}


rmmcppbd<-function(x,y=NULL,alpha=.05,con=0,est=onestep,plotit=TRUE,grp=NA,nboot=NA,
hoch=TRUE,SEED=TRUE,...){
#
#   Use a percentile bootstrap method to  compare dependent groups
#   based on difference scores.
#   By default,
#   compute a .95 confidence interval for all linear contrasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#   If con is not specified, all pairwise comparisons are done.
#
#   By default, one-step M-estimator is used
#    and a sequentially rejective method
#   is used to control the probability of at least one Type I error.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of contrasts there are.
#
#   x can be an n by J matrix or it can have list mode
#   for two groups, data for second group can be put in y
#   otherwise, assume x is a matrix (n by J) or has list mode.
#
#   A sequentially rejective method is used to control alpha.
#   If n>=80, hochberg's method is used.
#
if(!is.null(y[1]))x<-cbind(x,y)
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
}}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matl(x)
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
mat<-x
}
if(is.matrix(x))mat<-x
if(!is.na(sum(grp)))mat<-mat[,grp]
x<-mat
mat<-elimna(mat) # Remove rows with missing values.
x<-mat
J<-ncol(mat)
n=nrow(mat)
if(n>=80)hoch=T
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
if(is.na(nboot)){
nboot<-5000
if(d<=10)nboot<-3000
if(d<=6)nboot<-2000
if(d<=4)nboot<-1000
}
n<-nrow(mat)
crit.vec<-alpha/c(1:d)
connum<-ncol(con)
# Create set of differences based on contrast coefficients
xx<-x%*%con
xx<-as.matrix(xx)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
psihat<-matrix(0,connum,nboot)
bvec<-matrix(NA,ncol=connum,nrow=nboot)
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
# data is an nboot by n matrix
if(ncol(xx)==1){
for(ib in 1:nboot)psihat[1,ib]<-est(xx[data[ib,]],...)
}
if(ncol(xx)>1){
for(ib in 1:nboot)psihat[,ib]<-apply(xx[data[ib,],],2,est,...)
}
#
# Now have an nboot by connum matrix of bootstrap values.
#
test<-1
for (ic in 1:connum){
test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot
test[ic]<-min(test[ic],1-test[ic])
}
test<-2*test
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvec[2]<-alpha/2
}
if(hoch)dvec<-alpha/(2*c(1:ncon))
dvec<-2*dvec
if(plotit && connum==1){
plot(c(psihat[1,],0),xlab="",ylab="Est. Difference")
points(psihat[1,])
abline(0,0)
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper"))
tmeans<-apply(xx,2,est,...)
psi<-1
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tmeans[ic]
output[ic,1]<-ic
output[ic,3]<-test[ic]
output[temp2,4]<-zvec
temp<-sort(psihat[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}


bdms1<-function(x,con){
# This function is used by bdm
#
# Pool all data and rank
pool<-x[[1]]
JK<-length(x)
for (j in 2:JK)pool<-c(pool,x[[j]])
N<-length(pool)
rval<-rank(pool)
rvec<-list()
up<-length(x[[1]])
rvec[[1]]<-rval[1:up]
rbar<-mean(rvec[[1]])
nvec<-length(rvec[[1]])
for(j in 2:JK){
down<-up+1
up<-down+length(x[[j]])-1
rvec[[j]]<-rval[down:up]
nvec[j]<-length(rvec[[j]])
rbar[j]<-mean(rvec[[j]])
}
phat<-(rbar-.5)/N
phat<-as.matrix(phat)
svec<-NA
for(j in 1:JK)svec[j]<-sum((rvec[[j]]-rbar[j])^2)/(nvec[j]-1)
svec<-svec/N^2
VN<-N*diag(svec/nvec)
top<-con[1,1]*sum(diag(VN))
Ftest<-N*(t(phat)%*%con%*%phat)/top
nu1<-con[1,1]^2*sum(diag(VN))^2/sum(diag(con%*%VN%*%con%*%VN))
lam<-diag(1/(nvec-1))
nu2<-sum(diag(VN))^2/sum(diag(VN%*%VN%*%lam))
sig<-1-pf(Ftest,nu1,nu2)
list(F=Ftest,nu1=nu1,nu2=nu2,q.hat=phat,p.value=sig)
}

r1mcp<-function(x,alpha=.05,bhop=F){
#
# Do all pairwise comparisons using a modification of
# the Brunner, Dette and Munk (1997) rank-based method.
# FWE is controlled using Rom's technique.
#
#  Setting bhop=T, FWE is controlled using the
#  Benjamini-Hochberg Method.
#
#  The data are assumed to be stored in x in list mode or in a matrix.
#
#   Missing values are automatically removed.
#
        if(is.matrix(x))x <- listm(x)
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
J<-length(x)
        for(j in 1:J) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)] # Remove missing values
        }
#
CC<-(J^2-J)/2
# Determine critical values
ncon<-CC
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
output<-matrix(0,CC,5)
dimnames(output)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit"))
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
ic<-ic+1
output[ic,1]<-j
output[ic,2]<-jj
temp<-bdm(x[c(j,jj)])
output[ic,3]<-temp$output$F
output[ic,4]<-temp$output$sig
}}}
temp2<-order(0-output[,4])
output[temp2,5]<-dvec[1:length(temp2)]
list(output=output)
}


tamhane<-function(x,x2=NA,cil=NA,crit=NA){
#
# First stage of Tamhane's method
#
# x contains first stage data
# x2 contains second stage data
#
# cil is the desired length of the confidence intervals.
# That is, cil is the distance between the upper and lower
# ends of the confidence intervals.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
J<-length(x)
tempn<-0
svec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
svec[j]<-var(temp)
}
A<-sum(1/(tempn-1))
df<-J/A
paste("The degrees of freedom are:",df)
if(is.na(crit))stop("Enter a critical value and reexecute this function")
if(is.na(cil))stop("To proceed, you must specify the length of the confidence intervals.")
d<-(cil/(2*crit))^2
n.vec<-NA
for(j in 1:J){
n.vec[j]<-max(tempn[j]+1,floor(svec[j]/d)+1)
}
ci.mat<-NA
if(!is.na(x2[1])){
if(is.matrix(x2))x2<-listm(x2)
if(!is.list(x2))stop("Data must be stored in list mode or in matrix mode.")
TT<-NA
U<-NA
J<-length(x)
nvec2<-NA
for(j in 1:length(x)){
nvec2[j]<-length(x2[[j]])
if(nvec2[j] <n.vec[j]-tempn[j]){
paste("The required number of observations for group",j," in the second stage is ")
paste(n.vec[j]-tempn[j]," but only ",nvec2[j]," are available")
stop()
}
TT[j]<-sum(x[[j]])
U[j]<-sum(x2[[j]])
print(c(TT[j],U[j],nvec2[j]))
}
b<-sqrt(tempn*((tempn+nvec2)*d-svec)/(nvec2*svec))
b<-(b+1)/(tempn+nvec2)
print(c(b,svec))
xtil<-TT*(1-nvec2*b)/tempn+b*U
print(xtil)
jall<-(J^2-J)/2
ci.mat<-matrix(0,ncol=4,nrow=jall)
dimnames(ci.mat)<-list(NULL,c("Group","Group","ci.low","ci.high"))
ic<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic<-ic+1
ci.mat[ic,1]<-j
ci.mat[ic,2]<-k
ci.mat[ic,3]<-xtil[j]-xtil[k]-cil/2
ci.mat[ic,4]<-xtil[j]-xtil[k]+cil/2
}}}}
list(n.vec=n.vec,ci.mat=ci.mat)
}


r2mcp<-function(J,K,x,grp=NA,alpha=.05,bhop=F){
#
# Do all pairwise comparisons of
# main effects for Factor A and B and all interactions
# using a rank-based method that tests for equal distributions.
#
#  The data are assumed to be stored in x in list mode or in a matrix.
#  If grp is unspecified, it is assumed x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#  x[[j+1]] is the data for level 2,1, etc.
#  If the data are in the wrong order, grp can be used to rearrange the
#  groups. For example, for a two by two design, grp<-c(2,4,3,1)
#  indicates that the second group corresponds to level 1,1;
#  group 4 corresponds to level 1,2; group 3 is level 2,1;
#  and group 1 is level 2,2.
#
#   Missing values are automatically removed.
#
        JK <- J * K
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                x<-list()
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JK) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)] # Remove missing values
        }
        #
if(JK != length(x)){
print("Warning: The number of groups does not match")
print("the number of contrast coefficients.")
}
for(j in 1:JK){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
#
CC<-(J^2-J)/2
# Determine critical values
ncon<-CC*(K^2-K)/2
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
Fac.A<-matrix(0,CC,5)
dimnames(Fac.A)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit"))
mat<-matrix(c(1:JK),ncol=K,byrow=T)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
ic<-ic+1
Fac.A[ic,1]<-j
Fac.A[ic,2]<-jj
temp<-bdm2way(2,K,x[c(mat[j,],mat[jj,])])
Fac.A[ic,3]<-temp$outputA$F
Fac.A[ic,4]<-temp$outputA$sig
}}}
temp2<-order(0-Fac.A[,4])
Fac.A[temp2,5]<-dvec[1:length(temp2)]
CCB<-(K^2-K)/2
ic<-0
Fac.B<-matrix(0,CCB,5)
dimnames(Fac.B)<-list(NULL,c("Level","Level","test.stat","p.value","p.crit"))
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
Fac.B[ic,1]<-k
Fac.B[ic,2]<-kk
mat1<-cbind(mat[,k],mat[,kk])
temp<-bdm2way(J,2,x[c(mat1[,1],mat1[,2])])
Fac.B[ic,3]<-temp$outputB$F
Fac.B[ic,4]<-temp$outputB$sig
}}}
temp2<-order(0-Fac.B[,4])
Fac.B[temp2,5]<-dvec[1:length(temp2)]
CCI<-CC*CCB
Fac.AB<-matrix(0,CCI,7)
dimnames(Fac.AB)<-list(NULL,c("Lev.A","Lev.A","Lev.B","Lev.B","test.stat",
"p.value","p.crit"))
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
Fac.AB[ic,1]<-j
Fac.AB[ic,2]<-jj
Fac.AB[ic,3]<-k
Fac.AB[ic,4]<-kk
val<-c(mat[j,k],mat[j,kk],mat[jj,k],mat[jj,kk])
temp<-bdm2way(2,2,x[val])
Fac.AB[ic,5]<-temp$outputAB$F
Fac.AB[ic,6]<-temp$outputAB$sig
}}}}}}
temp2<-order(0-Fac.AB[,6])
Fac.AB[temp2,7]<-dvec[1:length(temp2)]
list(Factor.A=Fac.A,Factor.B=Fac.B,Factor.AB=Fac.AB)
}


spmcpa<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),con=0,avg=FALSE,alpha=.05,
nboot=NA,pr=TRUE,...){
#
# A percentile bootstrap for multiple comparisons among
# all main effects for independent groups in a split-plot design
# The analysis is done by generating bootstrap samples and
# using an appropriate linear contrast.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}
if(pr)print("As of Sept. 2005, est defaults to tmean")
JK<-J*K
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
jp<-1-K
kv<-0
kv2<-0
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}}
xx<-x
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[jp]])
}
if(avg){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
Jm<-J-1
for (j in 1:Jm){
jp<-j+1
for(k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
if(!avg){
MJK<-K*(J^2-J)/2 # NUMBER OF COMPARISONS
JK<-J*K
MJ<-(J^2-J)/2
cont<-matrix(0,nrow=J,ncol=MJ)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j<jj){
ic<-ic+1
cont[j,ic]<-1
cont[jj,ic]<-0-1
}}}
tempv<-matrix(0,nrow=K-1,ncol=MJ)
con1<-rbind(cont[1,],tempv)
for(j in 2:J){
con2<-rbind(cont[j,],tempv)
con1<-rbind(con1,con2)
}
con<-con1
if(K>1){
for(k in 2:K){
con1<-push(con1)
con<-cbind(con,con1)
}}}
d<-ncol(con)
if(is.na(nboot)){
if(d<=4)nboot<-1000
if(d>4)nboot<-5000
}
#
# Now take bootstrap samples from jth level
# of Factor A and average K  corresponding estimates
# of location.
#
bloc<-matrix(NA,nrow=J,ncol=nboot)
print("Taking bootstrap samples. Please wait.")
mvec<-NA
ik<-0
for(j in 1:J){
paste("Working on level ",j," of Factor A")
x<-matrix(NA,nrow=nvec[j],ncol=K)
#
for(k in 1:K){
ik<-ik+1
x[,k]<-xx[[ik]]
if(!avg)mvec[ik]<-est(xx[[ik]],...)
}
tempv<-apply(x,2,est,...)
data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot)
bvec<-matrix(NA,ncol=K,nrow=nboot)
mat<-listm(x)
for(k in 1:K){
temp<-x[,k]
bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by K matrix
}
if(avg){
mvec[j]<-mean(tempv)
bloc[j,]<-apply(bvec,1,mean)
}
if(!avg){
if(j==1)bloc<-bvec
if(j>1)bloc<-cbind(bloc,bvec)
}
}
if(avg)bloc<-t(bloc)
connum<-d
psihat<-matrix(0,connum,nboot)
test<-1
for (ic in 1:connum){
psihat[ic,]<-apply(bloc,1,bptdpsi,con[,ic])
#test[ic]<-sum((psihat[ic,]>0))/nboot
test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot
test[ic]<-min(test[ic],1-test[ic])
}
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvec[1]<-alpha/2
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.sig","ci.lower","ci.upper"))
tmeans<-mvec
psi<-1
output[temp2,4]<-zvec
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(psihat[ic,])
temp3<-round(output[ic,4]*nboot)+1
icl<-round(dvec[ncon]*nboot)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
output[,3]<-2*output[,3]
output[,4]<-2*output[,4]
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}

spmcpi<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),alpha=.05,nboot=NA,
SEED=TRUE,pr=TRUE,...){
#
# Multiple comparisons for interactions
# in a split-plot design.
# The analysis is done by taking difference scores
# among all pairs of dependent groups and
# determining which of
# these differences differ across levels of Factor A.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}
if(pr)print("As of Sept. 2005, est defaults to tmean")
JK<-J*K
if(JK!=length(x)){
print("Something is wrong.")
paste(" Expected ",JK," groups but x contains ", length(x), "groups instead.")
stop()
}
MJ<-(J^2-J)/2
MK<-(K^2-K)/2
JMK<-J*MK
Jm<-J-1
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
jp<-1-K
kv<-0
kv2<-0
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}}
xx<-x
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[jp]])
}
#
MJMK<-MJ*MK
con<-matrix(0,nrow=JMK,ncol=MJMK)
cont<-matrix(0,nrow=J,ncol=MJ)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j<jj){
ic<-ic+1
cont[j,ic]<-1
cont[jj,ic]<-0-1
}}}
tempv<-matrix(0,nrow=MK-1,ncol=MJ)
con1<-rbind(cont[1,],tempv)
for(j in 2:J){
con2<-rbind(cont[j,],tempv)
con1<-rbind(con1,con2)
}
con<-con1
if(MK>1){
for(k in 2:MK){
con1<-push(con1)
con<-cbind(con,con1)
}}
d<-ncol(con)
if(is.na(nboot)){
if(d<=4)nboot<-1000
if(d>4)nboot<-5000
}
connum<-d
psihat<-matrix(0,connum,nboot)
#
# Now take bootstrap samples from jth level
# of Factor A and average K  corresponding estimates
# of location.
#
bloc<-matrix(NA,ncol=J,nrow=nboot)
print("Taking bootstrap samples. Please wait.")
mvec<-NA
it<-0
for(j in 1:J){
paste("Working on level ",j," of Factor A")
x<-matrix(NA,nrow=nvec[j],ncol=MK)
#
im<-0
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
im<-im+1
kp<-j*K+k-K
kpp<-j*K+kk-K
x[,im]<-xx[[kp]]-xx[[kpp]]
it<-it+1
mvec[it]<-est(x[,im],...)
}}}
data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot)
bvec<-matrix(NA,ncol=MK,nrow=nboot)
mat<-listm(x)
for(k in 1:MK){
temp<-x[,k]
bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by MK matrix
}
if(j==1)bloc<-bvec
if(j>1)bloc<-cbind(bloc,bvec)
}
test<-1
for (ic in 1:connum){
psihat[ic,]<-apply(bloc,1,bptdpsi,con[,ic])
#test[ic]<-sum((psihat[ic,]>0))/nboot
test[ic]<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot
test[ic]<-min(test[ic],1-test[ic])
}
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvec[1]<-alpha/2
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","sig","crit.sig","ci.lower","ci.upper"))
tmeans<-mvec
psi<-1
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
output[temp2,4]<-zvec
temp<-sort(psihat[ic,])
icl<-round(dvec[ncon]*nboot)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
output[,3]<-2*output[,3]
output[,4]<-2*output[,4]
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}

sppbb<-function(J,K,x,est=onestep,JK=J*K,grp=c(1:JK),nboot=500,SEED=TRUE,...){
#
# A percentile bootstrap for main effects
# among dependent groups in a split-plot design
# The analysis is done based on all pairs
# of difference scores. The null hypothesis is that
# all such differences have a typical value of zero.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}

JK<-J*K
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
jp<-1-K
kv<-0
kv2<-0
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}}
xx<-x
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[jp]])
}
#
# Now stack the data in an N by K matrix
#
x<-matrix(NA,nrow=nvec[1],ncol=K)
#
for(k in 1:K)x[,k]<-xx[[k]]
kc<-K
for(j in 2:J){
temp<-matrix(NA,nrow=nvec[j],ncol=K)
for(k in 1:K){
kc<-kc+1
temp[,k]<-xx[[kc]]
}
x<-rbind(x,temp)
}
# Now call function rmdzero to do the analysis
temp<-rmdzero(x,est=est,nboot=nboot,...)
list(p.value=temp$p.value,center=temp$center)
}

spmcpb<-function(J,K,x,est=tmean,JK=J*K,grp=c(1:JK),dif=TRUE,alpha=.05,
nboot=NA,pr=TRUE,...){
#
# A percentile bootstrap for all pairwise
# multiple comparisons
# among dependent groups in a split-plot design
#
# If dif=T, the analysis is done based on all pairs
# of difference scores.
# Otherwise, marginal measures of location are used.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}
if(pr)print("As of Sept. 2005, est defaults to tmean")
JK<-J*K
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
jp<-1-K
kv<-0
kv2<-0
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}}
xx<-x
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[jp]])
}
#
# Now stack the data in an N by K matrix
#
x<-matrix(NA,nrow=nvec[1],ncol=K)
#
for(k in 1:K)x[,k]<-xx[[k]]
kc<-K
for(j in 2:J){
temp<-matrix(NA,nrow=nvec[j],ncol=K)
for(k in 1:K){
kc<-kc+1
temp[,k]<-xx[[kc]]
x<-rbind(x,temp)
}}
# Now call function rmmcppb to do the analysis
temp<-rmmcppb(x,est=est,nboot=nboot,dif=dif,alpha=alpha,plotit=FALSE,...)
list(output=temp$output,con=temp$con,num.sig=temp$num.sig)
}

bwamcp<-function(J,K,x,tr=.2,JK=J*K,grp=c(1:JK),alpha=.05,op=T){
#
# All pairwise comparisons among levels of Factor A
# in a split-plot design using trimmed means.
#
# Data among dependent groups are pooled for each level
# of Factor A.
# Then this function calls lincon.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}

JK<-J*K
if(!op){
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
data<-list()
jp<-1-K
kv<-0
for(j in 1:J){
jp<-jp+K
for(k in 1:K){
kv<-kv+1
if(k==1)temp<-x[[jp]]
if(k>1)temp<-c(temp,x[[kv]])
}
data[[j]]<-temp
}
print("Group numbers refer to levels of Factor A")
temp<-lincon(data,tr=tr,alpha=alpha)
}
if(op){
MJK<-K*(J^2-J)/2 # NUMBER OF COMPARISONS
JK<-J*K
MJ<-(J^2-J)/2
cont<-matrix(0,nrow=J,ncol=MJ)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j<jj){
ic<-ic+1
cont[j,ic]<-1
cont[jj,ic]<-0-1
}}}
tempv<-matrix(0,nrow=K-1,ncol=MJ)
con1<-rbind(cont[1,],tempv)
for(j in 2:J){
con2<-rbind(cont[j,],tempv)
con1<-rbind(con1,con2)
}
con<-con1
if(K>1){
for(k in 2:K){
con1<-push(con1)
con<-cbind(con,con1)
}}
print("Contrast Matrix Used:")
print(con)
temp<-lincon(x,con=con,tr=tr,alpha=alpha)
}
temp
}

pcor<-function(x,y=NA){
if(!is.na(y[1]))temp<-wincor(x,y,tr=0)
if(is.na(y[1]))temp<-winall(x,tr=0)
list(cor=temp$cor,siglevel=temp$siglevel)
}

apgdis<-function(m,est=sum,se=TRUE,...){
#
# For multivariate data,
# compute distance between each pair
# of points and measure depth of a point
# in terms of its  distance to all
# other points
#
#  Using se=T ensures that ordering of distance
# will not change with a change in scale.
#
#  m is an n by p matrix
#
m<-elimna(m)  # eliminate any missing values
temp<-0
if(se){
for(j in 1:ncol(m))m[,j]<-(m[,j]-median(m[,j]))/mad(m[,j])
}
for(j in 1:ncol(m)){
disx<-outer(m[,j],m[,j],"-")
temp<-temp+disx^2
}
temp<-sqrt(temp)
dis<-apply(temp,1,est,...)
temp2<-order(dis)
center<-m[temp2[1],]
list(center=center,distance=dis)
}


rd2plot<-function(x,y,fr=.8,xlab="",ylab=""){
#
# Expected frequency curve
# for two groups.
#
# fr controls amount of smoothing
x<-elimna(x)
y<-elimna(y)
rmdx<-NA
rmdy<-NA
for(i in 1:length(x)){
rmdx[i]<-sum(near(x,x[i],fr))
}
for(i in 1:length(y)){
rmdy[i]<-sum(near(y,y[i],fr))
}
rmdx<-rmdx/length(x)
rmdy<-rmdy/length(y)
plot(c(x,y),c(rmdx,rmdy),type="n",ylab=ylab,xlab=xlab)
sx<-sort(x)
xorder<-order(x)
sysm<-rmdx[xorder]
lines(sx,sysm)
sy<-sort(y)
yorder<-order(y)
sysm<-rmdy[yorder]
lines(sy,sysm,lty=2)
}

depth2<-function(x,pts=NA,plotit=TRUE,xlab="VAR 1",ylab="VAR 2"){
#
#   Compute exact depths for bivariate data
if(ncol(x)!=2)stop("x must be a matrix with 2 columns")
x<-elimna(x)
if(is.na(pts[1]))pts<-x
if(ncol(pts)!=2)stop("Argument pts must be stored as a matrix with 2 columns")
pts<-as.matrix(pts)
ndepth<-NA
for(i in 1:nrow(pts)){
ndepth[i]<-depth(pts[i,1],pts[i,2],x)
}
if(plotit){
m<-x
plot(m,xlab=xlab,ylab=ylab)
flag<-(ndepth==max(ndepth))
if(sum(flag)==1)center<-m[flag,]
if(sum(flag)>1)center<-apply(m[flag,],2,mean)
points(center[1],center[2],pch="+")
temp<-ndepth
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
ndepth
}

fdepth<-function(m,pts=NA,plotit=TRUE,cop=2,center=NA,xlab="VAR 1",
ylab="VAR 2"){
#
# Determine depth of points in pts,  relative to
# points in m. If pts is not specified,
# depth of all points in m are determined.
#
# m and pts can be vectors or matrices with
# p columns (the number of variables).
#
# Determine center, for each point, draw a line
# connecting it with center, project points onto this line
# and determine depth of the projected points.
# The final depth of a point is its minimum depth
# among all projections.
#
# plotit=TRUE creates a scatterplot when working with
# bivariate data and pts=NA
#
#  There are three options for computing the center of the
#  cloud of points when computing projections, assuming center=NA:
#
#  cop=2 uses MCD center
#  cop=3 uses median of the marginal distributions.
#  cop=4 uses MVE center
#
#  If a value for center is passed to this function,
#  this value is used to determine depths.
#
#  When plotting,
#  center is marked with a cross, +.
#
library(MASS)
if(cop!=2 && cop!=3 && cop!=4)stop("Only cop=2, 3 or 4 is allowed")
if(is.list(m))stop("Store data in a matrix; might use function listm")
m<-as.matrix(m)
pts<-as.matrix(pts)
if(!is.na(pts[1]))remm<-m
nm<-nrow(m)
nm1<-nm+1
if(!is.na(pts[1])){
if(ncol(m)!=ncol(pts))stop("Number of columns of m is not equal to number of columns for pts")
}
m<-elimna(m) # Remove missing values
m<-as.matrix(m)
if(ncol(m)==1)dep<-unidepth(as.vector(m[,1]),pts=pts)
if(ncol(m)>1){
if(is.na(center[1])){
if(cop==2){
center<-cov.mcd(m)$center
}
if(cop==4){
center<-cov.mve(m)$center
}
if(cop==3){
center<-apply(m,2,median)
}}
if(is.na(pts[1])){
mdep <- matrix(NA,nrow=nrow(m),ncol=nrow(m))
}
if(!is.na(pts[1])){
mdep <- matrix(NA,nrow=nrow(m),ncol=nrow(pts))
}
for (i in 1:nrow(m)){
B<-m[i,]-center
dis<-NA
BB<-B^2
bot<-sum(BB)
if(bot!=0){
if(is.na(pts[1])){
for (j in 1:nrow(m)){
A<-m[j,]-center
temp<-sum(A*B)*B/bot
dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2))
}}
if(!is.na(pts[1])){
m<-rbind(remm,pts)
for (j in 1:nrow(m)){
A<-m[j,]-center
temp<-sum(A*B)*B/bot
dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2))
}}
#
# For ith projection, store depths of
# points in mdep[i,]
#
if(is.na(pts[1]))mdep[i,]<-unidepth(dis)
if(!is.na(pts[1])){
mdep[i,]<-unidepth(dis[1:nm],dis[nm1:nrow(m)])
}}
if(bot==0)mdep[i,]<-rep(0,ncol(mdep))
}
dep<-apply(mdep,2,min)
if(ncol(m)==2 && is.na(pts[1])){
flag<-chull(m)
dep[flag]<-min(dep)
}
}
if(ncol(m)==2){
if(is.na(pts[1]) && plotit){
plot(m,xlab=xlab,ylab=ylab)
points(center[1],center[2],pch="+")
x<-m
temp<-dep
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}}
dep<-round(dep*nrow(m))/nrow(m)
dep
}

unidepth<-function(x,pts=NA){
#
# Determine depth of points in the vector x
#
if(!is.vector(x))stop("x should be a vector")
if(is.na(pts[1]))pts<-x
pup<-apply(outer(pts,x,FUN="<="),1,sum)/length(x)
pdown<-apply(outer(pts,x,FUN="<"),1,sum)/length(x)
pdown<-1-pdown
m<-matrix(c(pup,pdown),nrow=2,byrow=T)
dep<-apply(m,2,min)
dep
}

opreg<-function(x,y,regfun=tsreg,cop=3,MC=FALSE,varfun=pbvar,corfun=pbcor,STAND=FALSE){
#
# Do regression on points not labled outliers
# using projection-type outlier detection method
#
if(MC)library(parallel)
x<-as.matrix(x)
m<-cbind(x,y)
m<-elimna(m) # eliminate any rows with missing data
if(!MC)ivec<-outpro(m,plotit=FALSE,cop=cop,STAND=STAND)$keep
if(MC)ivec<-outproMC(m,plotit=FALSE,cop=cop,STAND=STAND)$keep
np1<-ncol(x)+1
coef<-regfun(m[ivec,1:ncol(x)],m[ivec,np1])$coef
vec<-rep(1,length(y))
residuals<-y-cbind(vec,x)%*%coef
stre=NULL
yhat<-y-residuals
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
stre=sqrt(e.pow)
}
list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow)
}

mgvdep<-function(m,se=F){
#
# Find the center of a scatterplot, add point that
# increases the generalized variance by smallest amount
# continue for all points
# return the MGV depths.
#
# Essentially the same as mgvar which
# determine MGV distances, only here,
# follow convention that deepest points
# have the largest numerical value. Here
# depth of the deepest values equal one.
#
temp<-apgdis(m,se=se)$distance
icen<-ncol(m)
temp3<-order(temp)
chkit<-sum(duplicated(temp[temp3[1:icen]]))
icen<-icen+chkit
flag<-rep(T,length(temp))
flag[temp3[1:icen]]<-F
# set duplicated central values to F
varvec<-0
varvec[!flag]<-NA
while(sum(flag)>0){
ic<-0
chk<-NA
remi<-NA
for(i in 1:nrow(m)){
if(flag[i]){
ic<-ic+1
chk[ic]<-gvar(rbind(m[!flag,],m[i,]))
remi[ic]<-i
}}
sor<-order(chk)
k<-remi[sor[1]]
varvec[k]<-chk[sor[1]]
flag[k]<-F
}
varvec[is.na(varvec)]<-0
varvec<-1/(1+varvec)
varvec
}


fdepthv2<-function(m,pts=NA,plotit=TRUE){
#
# Determine depth of points in pts relative to
# points in m
#
# Draw a line between each pair of distinct points
# and determine depth of the projected points.
# The final depth of a point is its minimum depth
# among all projections.
#
# This function is slower than fdepth and requires
# space for a nc by nc matrix, nc=(n^2-n)/2.
# But it allows
# data to have a singular covariance matrix
# and it provides a more accurate approximation of
# halfspace depth.
#
# plotit=TRUE creates a scatterplot when working with
# bivariate data and pts=NA
#
#  When plotting,
#  center is marked with a cross, +.
#
m<-elimna(m) # Remove missing values
if(!is.na(pts[1]))remm<-m
if(!is.matrix(m))dep<-unidepth(m)
if(is.matrix(m)){
nm<-nrow(m)
nt<-nm
nm1<-nm+1
if(!is.na(pts[1])){
if(ncol(m)!=ncol(pts))stop("Number of columns of m is not equal to number of columns for pts")
nt<-nm+nrow(pts)
}}
if(ncol(m)==1)depth<-unidepth(m)
if(ncol(m)>1){
m<-elimna(m) # Remove missing values
nc<-(nrow(m)^2-nrow(m))/2
if(is.na(pts[1]))mdep <- matrix(0,nrow=nc,ncol=nrow(m))
if(!is.na(pts[1])){
mdep <- matrix(0,nrow=nc,ncol=nrow(pts))
}
ic<-0
for (iall in 1:nm){
for (i in 1:nm){
if(iall < i){
ic<-ic+1
B<-m[i,]-m[iall,]
dis<-NA
BB<-B^2
bot<-sum(BB)
if(bot!=0){
if(is.na(pts[1])){
for (j in 1:nrow(m)){
A<-m[j,]-m[iall,]
temp<-sum(A*B)*B/bot
dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2))
}}
if(!is.na(pts[1])){
m<-rbind(remm,pts)
for (j in 1:nrow(m)){
A<-m[j,]-m[iall,]
temp<-sum(A*B)*B/bot
dis[j]<-sign(sum(A*B))*sqrt(sum(temp^2))
}}
#
# For ic_th projection, store depths of
# points in mdep[ic,]
#
if(is.na(pts[1]))mdep[ic,]<-unidepth(dis)
if(!is.na(pts[1])){
mdep[ic,]<-unidepth(dis[1:nm],dis[nm1:nrow(m)])
}}
if(bot==0)mdep[ic,]<-rep(0,ncol(mdep))
}}}
dep<-apply(mdep,2,min)
}
if(ncol(m)==2 &&is.na(pts[1])){
flag<-chull(m)
dep[flag]<-min(dep)
}
if(ncol(m)==2){
if(is.na(pts[1]) && plotit){
plot(m)
x<-m
temp<-dep
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}}
dep
}

g2plot<-function(x1,x2,op=4,rval=15,fr=.8,aval=.5,xlab="X",ylab=""){
#
# plot estimates of the density functions for two groups.
#
# op=1: Use Rosenblatt shifted histogram
#
# op=2:
# Use kernel density estimate
# Using the built-in S+ function density,
#
# op=3: Use expected frequency curve.
#
# op=4: Use adaptive kernel estimator
#
x1<-elimna(x1)
x2<-elimna(x2)
if(op==3){
rd2plot(x1,x2,fr=fr,xlab=xlab,ylab=ylab)
print("Might consider using op=4 if graph is ragged")
}
if(op==2){
#tempx<-density(x1,na.rm=TRUE,width=bandwidth.sj(x1,method="dpi"),n=256)
tempx<-density(x1,na.rm=TRUE,kernel="epanechnikov")
#tempy<-density(x2,na.rm=TRUE,width=bandwidth.sj(x2,method="dpi"),n=256)
tempy<-density(x2,na.rm=TRUE,kernel="epanechnikov")
plot(c(tempx$x,tempy$x),c(tempx$y,tempy$y),type="n",xlab=xlab,ylab=ylab)
lines(tempx$x,tempx$y)
lines(tempy$x,tempy$y,lty=2)
}
if(op==1){
        y1 <- sort(x1)
        z1 <- 1
        z2 <- 1
        par(yaxt = "n")
        temp <- floor(0.01 * length(x1))
        if(temp == 0)
                temp <- 5
        ibot <- y1[temp]
        itop <- y1[floor(0.99 * length(x1))]
        xaxis1 <- seq(ibot, itop, length = rval)
        for(i in 1:rval)
                z1[i] <- kerden(x1, 0, xaxis1[i])
        y2 <- sort(x2)
         temp <- floor(0.01 * length(x2))
        if(temp == 0)
                temp <- 5
        ibot <- y2[temp]
        itop <- y2[floor(0.99 * length(x2))]
        xaxis2 <- seq(ibot, itop, length = rval)
        for(i in 1:rval)
                z2[i] <- kerden(x2, 0, xaxis2[i])
plot(c(xaxis1,xaxis2),c(z1,z2), xlab =xlab, ylab =ylab, type = "n")
lines(xaxis1,z1)
lines(xaxis2,z2,lty=2)
}
if(op==4){
x1<-sort(x1)
x2<-sort(x2)
z1<-akerd(x1,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE)
z2<-akerd(x2,aval=aval,fr=fr,pyhat=TRUE,plotit=FALSE)
plot(c(x1,x2),c(z1,z2), xlab =xlab, ylab =ylab, type = "n")
lines(x1,z1)
lines(x2,z2,lty=2)
}
}

mulwmw<-function(m1,m2,plotit=TRUE,cop=3,alpha=.05,nboot=1000,pop=4,fr=.8,pr=FALSE){
#
#
# Determine center correpsonding to two
# independent groups, project all  points onto line
# connecting the centers,
# then based on the projected distances,
# estimate p=probability that a randomly sampled
# point from group 1 is less than a point from group 2
# based on the projected distances.
#
# plotit=TRUE creates a plot of the projected data
# pop=1 plot two dotplots based on projected distances
# pop=2 boxplots
# pop=3 expected frequency curve.
# pop=4 adaptive kernel density
#
#  There are three options for computing the center of the
#  cloud of points when computing projections:
#  cop=1 uses Donoho-Gasko median
#  cop=2 uses MCD center
#  cop=3 uses median of the marginal distributions.
#
#  When using cop=2 or 3, default critical value for outliers
#  is square root of the .975 quantile of a
#  chi-squared distribution with p degrees
#  of freedom.
#
#  Donoho-Gasko (Tukey) median is marked with a cross, +.
#
if(is.null(dim(m1))||dim(m1)[2]<2){print("Data are assumed to be stored in")
print(" a matrix or data frame having two or more columns.")
stop(" For univariate data, use the function outbox or out")
}
m1<-elimna(m1) # Remove missing values
m2<-elimna(m2)
n1=nrow(m1)
n2=nrow(m2)
if(cop==1){
if(ncol(m1)>2){
center1<-dmean(m1,tr=.5)
center2<-dmean(m2,tr=.5)
}
if(ncol(m1)==2){
tempd<-NA
for(i in 1:nrow(m1))
tempd[i]<-depth(m1[i,1],m1[i,2],m1)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center1<-m1[flag,]
if(sum(flag)>1)center1<-apply(m1[flag,],2,mean)
for(i in 1:nrow(m2))
tempd[i]<-depth(m2[i,1],m2[i,2],m2)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center2<-m2[flag,]
if(sum(flag)>1)center2<-apply(m2[flag,],2,mean)
}}
if(cop==2){
center1<-cov.mcd(m1)$center
center2<-cov.mcd(m2)$center
}
if(cop==3){
center1<-apply(m1,2,median)
center2<-apply(m2,2,median)
}
if(cop==4){
center1<-smean(m1)
center2<-smean(m2)
}
center<-(center1+center2)/2
B<-center1-center2
if(sum(center1^2)<sum(center2^2))B<-(0-1)*B
BB<-B^2
bot<-sum(BB)
disx<-NA
disy<-NA
if(bot!=0){
for (j in 1:nrow(m1)){
AX<-m1[j,]-center
tempx<-sum(AX*B)*B/bot
disx[j]<-sign(sum(AX*B))*sqrt(sum(tempx^2))
}
for (j in 1:nrow(m2)){
AY<-m2[j,]-center
tempy<-sum(AY*B)*B/bot
disy[j]<-sign(sum(AY*B))*sqrt(sum(tempy^2))
}
}
if(plotit){
if(pop==1){
par(yaxt="n")
xv<-rep(2,length(disx))
yv<-rep(1,length(disy))
plot(c(disx,disy),c(xv,yv),type="n",xlab="",ylab="")
xv<-rep(1.6,length(disx))
yv<-rep(1.4,length(disy))
points(disx,xv)
points(disy,yv)
}
if(pop==2)boxplot(disx,disy)
if(pop==3)rd2plot(disx,disy,fr=fr)
if(pop==4)g2plot(disx,disy,fr=fr)
}
m<-outer(disx,disy,FUN="-")
m<-sign(m)
phat<-(1-mean(m))/2
if(bot==0)phat<-.5
print("Computing critical values")
m1<-t(t(m1)-center1)
m2<-t(t(m2)-center2)
v1<-mulwmwcrit(m1,m2,cop=cop,alpha=alpha,iter=nboot,pr=pr)
list(phat=phat,lower.crit=v1[1],upper.crit=v1[2],n1=n1,n2=n2)
}

mulwmwcrit<-function(mm1,mm2,plotit=TRUE,cop=3,iter=1000,alpha=.05,SEED=NA,pr=FALSE){
#
#
# Determine critical value for the function mulwmw
#
if(!is.matrix(mm1))stop("Data are assumed to be stored in a matrix having two or more columns. For univariate data, use the function outbox or out")
if(is.na(SEED))set.seed(2)
if(!is.na(SEED))set.seed(SEED)
val<-NA
n1<-nrow(mm1)
n2<-nrow(mm2)
for(it in 1:iter){
ivec1<-sample(c(1:n1),replace=TRUE)
ivec2<-sample(c(1:n2),replace=TRUE)
m1<-mm1[ivec1,]
m2<-mm2[ivec2,]
if(cop==1){
if(ncol(m1)>2){
center1<-dmean(m1,tr=.5)
center2<-dmean(m2,tr=.5)
}
if(ncol(m1)==2){
tempd<-NA
for(i in 1:nrow(m1))
tempd[i]<-depth(m1[i,1],m1[i,2],m1)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center1<-m1[flag,]
if(sum(flag)>1)center1<-apply(m1[flag,],2,mean)
for(i in 1:nrow(m2))
tempd[i]<-depth(m2[i,1],m2[i,2],m2)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center2<-m2[flag,]
if(sum(flag)>1)center2<-apply(m2[flag,],2,mean)
}}
if(cop==2){
center1<-cov.mcd(m1)$center
center2<-cov.mcd(m2)$center
}
if(cop==3){
center1<-apply(m1,2,median)
center2<-apply(m2,2,median)
}
center<-(center1+center2)/2
B<-center1-center2
if(sum(center1^2)>sum(center2^2))B<-(0-1)*B
BB<-B^2
bot<-sum(BB)
disx<-NA
disy<-NA
if(bot!=0){
for (j in 1:nrow(m1)){
AX<-m1[j,]-center
tempx<-sum(AX*B)*B/bot
disx[j]<-sign(sum(AX*B))*sqrt(sum(tempx^2))
}
for (j in 1:nrow(m2)){
AY<-m2[j,]-center
tempy<-sum(AY*B)*B/bot
disy[j]<-sign(sum(AY*B))*sqrt(sum(tempy^2))
}}
m<-outer(disx,disy,FUN="-")
m<-sign(m)
val[it]<-(1-mean(m))/2
if(bot==0)val[it]<-.5
if(pr)print(paste("Iteration ",it," of ",iter," complete"))
}
val<-sort(val)
low<-round(alpha*iter/2)+1
up<-iter-low
crit<-NA
crit[1]<-val[low]
crit[2]<-val[up]
crit
}


dmean<-function(m,tr=.2,dop=1,cop=2){
#
# Compute multivariate measure of location
# using Donoho-Gasko method.
#
# dop=1, use fdepth to compute depths
# dop=2, use fdepthv2  to compute depths
#
# cop=1, Tukey median; can't be used here.
# cop=2, use MCD in fdepth
# cop=3, use marginal medians in fdepth
# cop=4, use MVE in fdepth
#
if(is.list(m))m<-matl(m)
if(!is.matrix(m))stop("Data must be stored in a matrix or in list mode.")
if(ncol(m)==1){
if(tr==.5)val<-median(m)
if(tr>.5)stop("Amount of trimming must be at most .5")
if(tr<.5)val<-mean(m,tr)
}
if(ncol(m)>1){
temp<-NA
if(ncol(m)!=2){
# Use approximate depth
if(dop==1)temp<-fdepth(m,plotit=FALSE,cop=cop)
if(dop==2)temp<-fdepthv2(m)
}
#  Use exact depth if ncol=2
if(ncol(m)==2){
for(i in 1:nrow(m))
temp[i]<-depth(m[i,1],m[i,2],m)
}
mdep<-max(temp)
flag<-(temp==mdep)
if(tr==.5){
if(sum(flag)==1)val<-m[flag,]
if(sum(flag)>1)val<-apply(m[flag,],2,mean)
}
if(tr<.5){
flag2<-(temp>=tr)
if(sum(flag2)==0 && sum(flag)>1)val<-apply(as.matrix(m[flag,]),2,mean)
if(sum(flag2)==0 && sum(flag)==1)val=m[flag,]
if(sum(flag2)==1)val<-m[flag2,]
if(sum(flag2)>1)val<-apply(m[flag2,],2,mean)
}}
val
}

lsqs2<-function(x,y,MD=FALSE,tr=.05,plotit=TRUE){
#  cf Liu and Singh, JASA 1993, 252-260
#
if(is.list(x))x<-matl(x)
if(is.list(y))y<-matl(y)
disyx<-NA # depth of y in x
disxy<-NA # depth of x in y
if(!is.matrix(x) && !is.matrix(y)){
x<-x[!is.na(x)]
y<-y[!is.na(y)]
#
tempxx<-NA
for(i in 1:length(x)){
tempxx[i]<-sum(x[i]<=x)/length(x)
if(tempxx[i]>.5)tempxx[i]<-1-tempxx[i]
}
for(i in 1:length(x)){
temp<-sum(x[i]<=y)/length(y)
if(temp>.5)temp<-1-temp
disxy[i]<-mean(temp>tempxx)
}
tempyy<-NA
for(i in 1:length(y)){
tempyy[i]<-sum(y[i]<=y)/length(y)
if(tempyy[i]>.5)tempyy[i]<-1-tempyy[i]
}
for(i in 1:length(y)){
temp<-sum(y[i]<=x)/length(x)
if(temp>.5)temp<-1-temp # depth of y_i in x
disyx[i]<-mean(temp>tempyy)
}
qhatxy<-mean(disyx)
qhatyx<-mean(disxy)
qhat<-(qhatxy+qhatyx)/2
}
if(is.matrix(x) && is.matrix(x)){
if(!MD){
if(ncol(x)!=2 || ncol(y)!=2){
# Use approximate depth
tempyy<-fdepth(y)
temp<-fdepth(y,x)
for(i in 1:nrow(x)){
disxy[i]<-mean(temp[i]>tempyy)
}
tempxx<-NA
tempxx<-fdepth(x)
temp<-fdepth(x,pts=y)
for(i in 1:nrow(y)){
disyx[i]<-mean(temp[i]>tempxx)
}}
if(ncol(x)==2 && ncol(y)==2){
if(plotit){
plot(rbind(x,y),type="n",xlab="Var 1",ylab="VAR 2")
points(x)
points(y,pch="o")
temp<-NA
for(i in 1:nrow(x)){
temp[i]<-depth(x[i,1],x[i,2],x)
}
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
temp<-NA
for(i in 1:nrow(y)){
temp[i]<-depth(y[i,1],y[i,2],y)
}
flag<-(temp>=median(temp))
xx<-y[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
flag<-(temp>=median(temp))
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,],lty=2)
lines(xx[c(temp[1],temp[length(temp)]),],lty=2)
}
tempyy<-NA
for(i in 1:nrow(y))tempyy[i]<-depth(y[i,1],y[i,2],y)
for(i in 1:nrow(x)){
temp<-depth(x[i,1],x[i,2],y)
disxy[i]<-mean(temp>tempyy)
}
tempxx<-NA
for(i in 1:nrow(x))tempxx[i]<-depth(x[i,1],x[i,2],x)
for(i in 1:nrow(y)){
temp<-depth(y[i,1],y[i,2],x)
disyx[i]<-mean(temp>tempxx)
}
}}
if(MD){
mx<-apply(x,2,median)
my<-apply(y,2,median)
vx<-apply(x,2,winval,tr=tr)-apply(x,2,mean,trim=tr)+mx
vx<-var(vx)
vy<-apply(y,2,winval,tr=tr)-apply(y,2,mean,trim=tr)+my
vy<-var(vy)
tempxx<-1/(1+mahalanobis(x,mx,vx))
tempyx<-1/(1+mahalanobis(y,mx,vx))
for(i in 1:nrow(y)){
disyx[i]<-mean(tempyx[i]>tempxx)
}
tempyy<-1/(1+mahalanobis(y,my,vy))
tempxy<-1/(1+mahalanobis(x,my,vy))
for(i in 1:nrow(x)){
disxy[i]<-mean(tempxy[i]>tempyy)
}
}
qhatxy<-sum(disxy)
qhatyx<-sum(disyx)
qhat<-(qhatxy+qhatyx)/(length(disxy)+length(disyx))
}
qhatyx<-mean(disyx)
qhatxy<-mean(disxy)
list(qhatxy,qhatyx,qhat)
}

depthg2<-function(x,y,alpha=.05,nboot=500,MD=FALSE,plotit=TRUE,op=FALSE,fast=FALSE,SEED=TRUE,
xlab="VAR 1",ylab="VAR 2"){
#
#   Compare two independent groups based on p measures
#   for each group.
#
#   The method is based on Tukey's depth if MD=F;
#   otherwise the Mahalanobis depth is used.
#   If p>2, then Mahalanobis depth is used automatically
#
#   The method is designed to be sensitive to differences in scale
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
x=elimna(x)
y=elimna(y)
x=as.matrix(x)
y=as.matrix(y)
if(is.matrix(x) && is.matrix(y)){  # YES, code is odd.
nv1<-nrow(x)
nv2<-nrow(y)
if(ncol(x)!=ncol(y))stop("Number of columns of x is not equal to number for y")
if(ncol(x) >2)MD<-T
if(ncol(x)==2 && plotit){
plot(rbind(x,y),type="n",xlab=xlab,ylab=ylab)
points(x,pch="*")
points(y,pch="o")
temp<-NA
for(i in 1:nrow(x)){
temp[i]<-depth(x[i,1],x[i,2],x)
}
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
temp<-NA
for(i in 1:nrow(y)){
temp[i]<-depth(y[i,1],y[i,2],y)
}
flag<-(temp>=median(temp))
xx<-y[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
flag<-(temp>=median(temp))
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,],lty=2)
lines(xx[c(temp[1],temp[length(temp)]),],lty=2)
}
print("Taking bootstrap samples. Please wait.")
data1<-matrix(sample(nv1,size=nv1*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(nv2,size=nv2*nboot,replace=TRUE),nrow=nboot)
qhatd<-NA
dhatb<-NA
for(ib in 1:nboot){
if(op)print(paste("Bootstrap sample ",ib," of ",nboot, "is complete."))
if(!fast)temp<-lsqs2(x[data1[ib,],],y[data2[ib,],],plotit=FALSE,MD=MD)
if(fast)temp<-lsqs2.for(x[data1[ib,],],y[data2[ib,],],plotit=FALSE,MD=MD)
qhatd[ib]<-temp[[1]]-temp[[2]]
}
temp<-sort(qhatd)
lv<-round(alpha*nboot/2)
uv<-nboot-lv
difci<-c(temp[lv+1],temp[uv])
}
#
if(!is.matrix(x) && !is.matrix(y)){
nv1<-length(x)
nv2<-length(y)
print("Taking bootstrap samples. Please wait.")
data1<-matrix(sample(nv1,size=nv1*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(nv2,size=nv2*nboot,replace=TRUE),nrow=nboot)
qhatd<-NA
dhatb<-NA
for(ib in 1:nboot){
if(!fast)temp<-lsqs2(x[data1[ib,]],y[data2[ib,]],plotit=FALSE,MD=MD)
if(fast)temp<-lsqs2.for(x[data1[ib,]],y[data2[ib,]],plotit=FALSE,MD=MD)
qhatd[ib]<-temp[[1]]-temp[[2]]
dhatb[ib]<-(temp[[1]]+temp[[2]])/2
#print(paste("Bootstrap sample ",ib," of ",nboot, "is complete."))
}}
temp<-sort(qhatd)
temp2<-sort(dhatb)
lv<-round(alpha*nboot/2)
uv<-nboot-lv
difci<-c(temp[lv+1],temp[uv])
list(difci=difci)
}


hochberg<-function(x,x2=NA,cil=NA,crit=NA,con=0,tr=.2,alpha=.05,iter=10000,SEED=TRUE){
#
# A generalization of Hochberg's method
# method to trimmed mean.
#
# x contains first stage data
# x2 contains second stage data
#
# cil is the desired length of the confidence intervals.
# That is, cil is the distance between the upper and lower
# ends of the confidence intervals.
#
x3<-x2
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
J<-length(x)
tempn<-0
svec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
svec[j]<-winvar(temp,tr=tr)/(1-2*tr)^2
}
tempt<-floor((1-2*tr)*tempn)
A<-sum(1/(tempt-1))
df<-J/A
print(paste("If using the tables of Studentized range distribution,"))
print(paste("the degrees of freedom are:",df))
if(!is.list(x2) && !is.matrix(x2)){
x2<-list()
for(j in 1:J)x2[[j]]<-NA
}
if(is.na(cil))stop("To proceed, you must specify the maximum length of the confidence intervals.")
if(is.na(crit)){
print("Approximating critical value")
crit<-trange(tempn-1,alpha=alpha,iter=iter,SEED=SEED)
print(paste("The critical value is ",crit))
}
#
if(con[1] == 0){
               Jm<-J-1
               ncon <- (J^2 - J)/2
                con <- matrix(0, J, ncon)
                id <- 0
                for(j in 1:Jm) {
                        jp <- j + 1
                        for(k in jp:J) {
                                id <- id + 1
                                con[j, id] <- 1
                                con[k, id] <- 0 - 1
                        }
                }
        }
        ncon <- ncol(con)
avec<-NA
for(i in 1:ncon){
temp<-con[,i]
avec[i]<-sum(temp[temp>0])
}
dvec<-(cil/(2*crit*avec))^2
d<-max(dvec)
n.vec<-NA
for(j in 1:J){
n.vec[j]<-max(tempn[j],floor(svec[j]/d)+1)
print(paste("Need an additional ", n.vec[j]-tempn[j],
" observations for group", j))
}
#
# Do second stage if data are supplied
#
if(is.matrix(x2))x2<-listm(x2)
temp2<-n.vec-tempn
if(!is.list(x3) && !is.matrix(x3) && sum(temp2)>0)stop("No second stage data supplied; this function is terminating")
if(length(x) != length(x2))warning("Number of groups in first stage data does not match the number in the second stage.")
ci.mat<-NA
if(!is.na(x2[1]) || sum(temp2)==0){
xtil<-NA
nvec2<-NA
for(j in 1:J){
nvec2[j]<-0
temp<-x2[[j]]
if(!is.na(temp[1]))nvec2[j]<-length(x2[[j]])
if(nvec2[j] <n.vec[j]-tempn[j])warning(paste("The required number of observations for group",j," in the second stage is ",n.vec[j]-tempn[j]," but only ",nvec2[j]," are available"))
xtil[j]<-mean(c(x[[j]],x2[[j]]),tr=tr,na.rm=TRUE)
}
ci.mat<-matrix(0,ncol=3,nrow=ncon)
dimnames(ci.mat)<-list(NULL,c("con.num","ci.low","ci.high"))
for(ic in 1:ncon){
ci.mat[ic,1]<-ic
bvec<-con[,ic]*sqrt(svec/(nvec2+tempn))
A<-sum(bvec[bvec>0])
C<-0-sum(bvec[bvec<0])
D<-max(A,C)
ci.mat[ic,2]<-sum(con[,ic]*xtil)-crit*D
ci.mat[ic,3]<-sum(con[,ic]*xtil)+crit*D
}}
list(ci.mat=ci.mat,con=con)
}

trange<-function(dfvec,iter=10000,alpha=.05,SEED=TRUE){
if(SEED)set.seed(1)
dfv<-length(dfvec)/sum(1/dfvec)
vals<-NA
tvals<-NA
J<-length(dfvec)
for(i in 1:iter){
for(j in 1:J){
tvals[j]<-rt(1,dfvec[j])
}
vals[i]<-max(tvals)-min(tvals)
}
vals<-sort(vals)
ival<-round((1-alpha)*iter)
qval<-vals[ival]
qval
}


lsqs3<-function(x,y,plotit=TRUE,cop=2,ap.dep=FALSE,v2=FALSE,pv=FALSE,SEED=TRUE,nboot=1000,ypch="o",xpch="+"){
#
#  Compute the typical depth of x in y,
#  Compute the typical depth of y in x,
#  use the maximum of the two typical depths
#  as a test statistic.
#  This method is designed to be sensitive to
#  shifts in location.
#
# Use Tukey's depth; bivariate case only.
#
# cop=2 use MCD location estimator when
# computing depth with function fdepth
# cop=3 uses medians
# cop=3 uses MVE
#
#  xpch="+" means when plotting the data, data from the first
#  group are indicated by a +
#  ypch="o" are data from the second group
#
if(is.list(x))x<-matl(x)
if(is.list(y))y<-matl(y)
x<-elimna(x)
y<-elimna(y)
x<-as.matrix(x)
y<-as.matrix(y)
nx=nrow(x)
ny=nrow(y)
if(ncol(x) != ncol(y))stop("Number of variables not equal")
disyx<-NA # depth of y in x
disxy<-NA # depth of x in y
#
if(ncol(x)==2){
if(plotit){
plot(rbind(x,y),type="n",xlab="VAR 1",ylab="VAR 2")
points(x,pch=xpch)
points(y,pch=ypch)
if(nrow(x)>50){
if(!ap.dep){
print("If execution time is high, might use ap.dep=F")
}
if(!ap.dep)temp<-depth2(x,plotit=FALSE)
if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop)
}
if(!ap.dep)temp<-depth2(x,plotit=FALSE)
if(ap.dep)temp<-fdepth(x,plotit=FALSE,cop=cop)
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
if(ap.dep)temp<-fdepth(y,plotit=FALSE,cop=cop)
if(!ap.dep)temp<-depth2(y,plotit=FALSE)
if(!ap.dep)temp<-depth2(y,plotit=FALSE)
if(!ap.dep)temp<-fdepth(y,plotit=FALSE)
flag<-(temp>=median(temp))
xx<-y[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
flag<-(temp>=median(temp))
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,],lty=2)
lines(xx[c(temp[1],temp[length(temp)]),],lty=2)
}
tempyx<-NA
tempxy<-NA
if(ap.dep)tempyx<-fdepth(x,y,plotit=FALSE,cop=cop)
if(!ap.dep)tempyx<-depth2(x,y,plotit=FALSE)
if(ap.dep)tempxy<-fdepth(y,x,plotit=FALSE,cop=cop)
tempxy<-depth2(y,x,plotit=FALSE)
}
if(ncol(x)==1){
tempyx<-unidepth(as.vector(x),as.vector(y))
tempxy<-unidepth(as.vector(y),as.vector(x))
}
if(ncol(x)>2){
if(!v2){
tempxy<-fdepth(y,x,plotit=FALSE,cop=cop)
tempyx<-fdepth(x,y,plotit=FALSE,cop=cop)
}
if(v2){
tempxy<-fdepthv2(y,x,plotit=FALSE,cop=cop)
tempyx<-fdepthv2(x,y,plotit=FALSE,cop=cop)
}}
qhatxy<-mean(tempxy)
qhatyx<-mean(tempyx)
qhat<-max(c(qhatxy,qhatyx))
n1<-nrow(x)
n2<-nrow(y)
nv<-(3*min(c(n1,n2))+max(c(n1,n2)))/4
if(ncol(x)==1)crit<-.2536-.4578/sqrt(nv)
if(ncol(x)==2)crit<-.1569-.3/sqrt(nv)
if(ncol(x)==3)crit<-.0861-.269/sqrt(nv)
if(ncol(x)==4)crit<-.054-.1568/sqrt(nv)
if(ncol(x)==5)crit<-.0367-.0968/sqrt(nv)
if(ncol(x)==6)crit<-.0262-.0565/sqrt(nv)
if(ncol(x)==7)crit<-.0174-.0916/sqrt(nv)
if(ncol(x)>7)crit<-.013
rej<-"Fail to reject"
if(qhat<=crit)rej<-"Reject"
testv=NULL
pval=NULL
if(pv){
if(SEED)set.seed(2)
rej="NULL"
for(i in 1:nboot)testv[i]=lsqs3.sub(rmul(n1,ncol(x)),rmul(n2,ncol(x)),cop=cop,ap.dep=ap.dep,v2=v2,)$test
pval=mean(qhat>=testv)
}
list(n1=nx,n2=ny,avg.depth.of.x.in.y=qhatxy,avg.depth.of.y.in.x=qhatyx,test=qhat,crit=crit,Decision=rej,p.value=pval)
}

kercon<-function(x,y,pyhat=FALSE,cval=NA,plotit=TRUE,eout=FALSE,xout=FALSE,
outfun=out,iran=.05,xlab="X",ylab="Y"){
#
# Compute conditional local weighted regression with Epanechnikov kernel
#
# cf. Fan, Annals of Statistics, 1993, 21, 196-217.
#
d<-ncol(x)
if(d!=2)stop("Argument x should have two columns only")
np1<-d+1
m<-elimna(cbind(x,y))
x<-m[,1:d]
y<-m[,np1]
yhat1<-NA
if(eout && xout)stop("Can't have both eout and xout=F")
if(eout){
flag<-outfun(m)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(x)$keep
m<-m[flag,]
}
x<-m[,1:d]
y<-m[,np1]
if(is.na(cval[1])){temp<-idealf(x[,2])
cval<-c(temp$ql,median(x[,2]),temp$qu)
}
xrem<-x
x2<-x[,2]
n<-nrow(x)
sig<-sqrt(var(x2))
temp<-idealf(x2)
iqr<-(temp$qu-temp$ql)/1.34
A1<-min(c(sig,iqr))
A<-1.77
hval<-A*(1/n)^(1/6)  # Silverman, 1986, p. 86
svec<-NA
for(j in 1:d){
sig<-sqrt(var(x[,j]))
temp<-idealf(x[,j])
iqr<-(temp$qu-temp$ql)/1.34
A<-min(c(sig,iqr))
svec[j]<-A
x[,j]<-x[,j]/A
}
hval<-hval*sqrt(mean(svec^2))
ilow<-round(iran*length(y))
iup<-round((1-iran)*length(y))
for(il in 1:length(cval)){
temp4<-NA
for(j in 1:nrow(x)){
temp4[j]<-((x2[j]-cval[il])/A1)^2
}
yhat<-NA
epan1<-ifelse(temp4<1,.75*(1-temp4),0) # Epanechnikov kernel for x2
for(j in 1:n){
yhat[j]<-NA
temp1<-cbind(x[,1]-x[j,1],x[,2]-cval[il]/A)/hval
temp1<-temp1^2
temp1<-apply(temp1,1,FUN="sum")
temp<-.5*(d+2)*(1-temp1)/gamma(.5)^2
epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, for both x1 and x2
if(epan1[j]>0)epan[j]<-epan[j]/epan1[j]
if(epan1[j]==0)epan[j]<-0
chkit<-sum(epan!=0)
if(chkit >= np1){
vals<-lsfit(x[,1],y,wt=epan)$coef
yhat[j]<-x[j,1]*vals[2]+vals[1]
}}
if(plotit){
xorder<-order(xrem[,1])
if(il==1)plot(xrem[,1],y,xlab=xlab,ylab=ylab)
lines(xrem[xorder[ilow:iup],1],yhat[xorder[ilow:iup]],lty=il)
}}
m<-"Done"
if(pyhat)m<-yhat
m
}

mscor<-function(m,corfun=spear,cop=3,MM=FALSE,gval=NA,ap=TRUE,pw=TRUE,STAND=FALSE){
#
# m is an n by p matrix
#
# Compute a skipped correlation matrix
#
#  corfun indicates the correlation to be used
#  corfun=pcor uses Pearson's correlation
#  corfun=spear uses Spearman's correlation
#
#  When calling outpro,
#  STAND=T means marginals are first standardized.
# This function returns the p by p matrix of correlations
#
# Method: Eliminate outliers using a projection technique.
# That is, compute Donoho-Gasko median, for each point
# consider the line between it and the median,
# project all points onto this line, and
# check for outliers using a boxplot rule.
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# cop determines how center of the scatterplot is
# estimated; see the function outpro.
# cop=l Donoho-Gasko halfspace median
# cop=2 MCD measure of location
# cop=3 marginal medians
# cop=4 MVE measure of location
#
# Eliminate any outliers and compute
# correlations using remaining data.
#
# gval is critical value for determining whether a point
# is an outlier. It is determined automatically if not specified,
# assuming that Spearman's correlation is used. Critical
# values when using some other correlation have not been
# determined.
#
# Hypothesis of zero correlations tested with FWE=.05
#
# AGRUMENTS:
# MM; see function outpro
# ap=T all pairwise comparisons are tested
# ap=F first variable is tested versus all others
# (for a total of p-1 tests).
# pw=T, print message about high execution time
# pw=F, suppress the message.
#
m<-elimna(m)
p<-ncol(m)
pm<-p-1
n<-nrow(m)
if(p<2)stop("Something wrong; number of variables is < 2")
if(pw && cop==1){
print("If execution time is too high,")
print("use cop=2 or 4 rather than the default value of 1")
}
if(ap){
inter<-c(2.374,2.780,3.030,3.208,3.372,3.502,3.722,3.825,3.943)
slope<-c(5.333,8.8,25.67,32.83,51.53,75.02,111.34,123.16,126.72)
expo<-c(-1,-1,-1.2,-1.2,-1.3,-1.4,-1.5,-1.5,-1.5)
if(p>10){
qvec<-NA
for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i]
pval<-c(2:10)
temp<-lsfit(pval,qvec)$coef
}
}
if(!ap){
inter<-c(2.374,2.54,2.666,2.92,2.999,3.097,3.414,3.286,3.258)
slope<-c(5.333,8.811,14.89,20.59,51.01,52.15,58.498,64.934,59.127)
expo<-c(-1,-1,-1.2,-1.2,-1.5,-1.5,-1.5,-1.5,-1.5)
if(p>10){
qvec<-NA
for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i]
pval<-c(1:9)
temp<-lsfit(pval,qvec)$coef
}
}
if(p<=10)crit<-inter[pm]+slope[pm]*n^expo[pm]
if(p>10)crit<-temp[2]*p+temp[1]
if(cop!=1 && is.na(gval))gval<-sqrt(qchisq(.975,ncol(m)))
temp<-outpro(m,plotit=FALSE,MM=MM,gval=gval,cop=cop,STAND=STAND)$keep
mcor<-corfun(m[temp,])$cor
test<-abs(mcor*sqrt((nrow(m)-2)/(1-mcor^2)))
diag(test) <- NA
if(!ap){
test<-as.matrix(test[1,])
}
list(cor=mcor,crit.val=crit,test.stat=test)
}

dfried<-function(m,plotit=TRUE,pop=0,fr=.8,v2=FALSE,op=FALSE){
#
# Compare dependent groups using halfspace depth of
# 0 relative to distribution of differences.
#
# When plotting differences scores:
# pop=1 Plot expected frequency curve
# pop=2 kernel density estimate
# pop=3 S+ kernel density estimate
# pop=4 boxplot
#
if(is.list(m))m<-matl(m)
if(!is.matrix(m))stop("m should be a matrix having at least 2 columns.")
m<-elimna(m)
library(MASS)
K<-ncol(m)
n<-nrow(m)
if(n<=10 && !op)print("With n<=10, might want to use op=T")
J<-(K^2-K)/2
dcen<-cov.mcd(m)$center
center<-NA
pval<-matrix(NA,ncol=J,nrow=nrow(m))
zvec<-rep(0,J)
ic<-0
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
pval[,ic]<-m[,k]-m[,kk]
center[ic]<-dcen[k]-dcen[kk]
}}}
pval0<-rbind(pval,zvec)
if(ncol(pval)==1)temp<-unidepth(as.vector(pval0))
if(!v2){
if(ncol(pval)>1)temp<-fdepth(pval0,center=center)
}
if(v2){
if(ncol(pval)>1)temp<-fdepthv2(pval0)
}
big.dep<-max(temp)
if(op){
v3<-dmean(pval,tr=.5,dop=2)
v3<-t(as.matrix(v3))
big.dep<-max(max(temp),fdepthv2(pval0,v3))
}
phat<-temp[nrow(m)+1]/big.dep
# Determine critical value
if(K==2)crit<-0.95-1.46/n^.5
if(K==3)crit<-1.00-1.71/n^.5
if(K==4)crit<-1.06-1.77/n^.5
if(K==5)crit<-1.11-1.76/n^.5
if(K==6)crit<-1.41-1.62/n^.3
if(K==7)crit<-1.49-1.71/n^.3
if(K>=8)crit<-1.39-1.38/n^.3
crit<-min(c(crit,1))
if(plotit && ncol(pval)==1){
if(pop==0)akerd(pval,fr=fr)
if(pop==1)rdplot(pval,fr=fr)
if(pop==2)kdplot(pval)
if(pop==3)skerd(pval)
if(pop==4)boxplot(pval)
}
list(phat=phat,crit.val=crit)
}

wrregfun<-function(slope,x=x,y=y){
x<-as.matrix(x)
res<-y-x%*%slope
v1<-rank(res)
v2<-sqrt(12)*(v1/(length(y)+1)-.5)
wrregfun<-sum(v2*res)
wrregfun
}

spat.sub<-function(x,theta){
xx<-x
for(i in 1:ncol(x))xx[,i]<-x[,i]-theta[i]
xx<-xx^2
temp<-sqrt(apply(xx,1,sum))
val<-mean(temp)
val
}
spat<-function(x){
#
# compute spatial median
# x is an n by p matrix
#
if(!is.matrix(x))stop("x must be a matrix")
x<-elimna(x)
START<-apply(x,2,median)
#val<-nelder(x,ncol(x),spat.sub,START=START)
val=optim(START,spat.sub,x=x,method='BFGS')$par             
val
}

rungen<-function(x,y,est=onestep,fr=1,plotit=TRUE,scat=TRUE,pyhat=FALSE,
eout=FALSE,xout=FALSE,xlab="x",ylab="y",outfun=out,LP=TRUE,...){
#
# running  interval smoother that can  be used  with any measure
# of location or scale. By default, an M-estimator is used.
#
# LP=TRUE, the plot is further smoothed via lows
#
# fr controls amount of smoothing
plotit<-as.logical(plotit)
scat<-as.logical(scat)
m<-cbind(x,y)
m<-elimna(m)
if(eout && xout)stop("Not allowed to have eout=xout=T")
if(eout){
flag<-outfun(m,plotit=FALSE)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(m[,1])$keep
m<-m[flag,]
}
x=m[,1]
y=m[,2]
rmd<-c(1:length(x))
for(i in 1:length(x))rmd[i]<-est(y[near(x,x[i],fr)],...)
if(LP){
ord=order(x)
x=x[ord]
rmd=rmd[ord]
rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
}
if(plotit){
#ord=order(x)
#x=x[ord]
#rmd=rmd[ord]
#if(LP)rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
if(scat){
plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type="n")
points(x,y)
}
if(!scat)plot(c(x,x),c(y,rmd),type="n",ylab=ylab,xlab=xlab)
points(x,rmd,type="n")
sx<-sort(x)
xorder<-order(x)
sysm<-rmd[xorder]
lines(sx,sysm)
}
if(pyhat)output<-rmd
if(!pyhat)output<-"Done"
list(output=output)
}

pmodchk<-function(x,y,regfun=tsreg,gfun=runm3d,op=1,eout=FALSE,xout=FALSE,fr=.8,...){
#
# Compare  regression fit to smooth
#
fit1<-y-regfun(x,y)$res
fit2<-gfun(x,y,pyhat=TRUE,plotit=FALSE,...)
if(op==0)plot(fit1,fit2,xlab="Reg. Fit",ylab="Gen. Fit")
if(op==1)lplot(fit1,fit2,eout=eout,xout=xout)
if(op==2)runmean(fit1,fit2,eout=eout,xout=xout,fr=fr)
abline(0,1)
}

adpchk<-function(x,y,adfun=adrun,gfun=runm3d,xlab="First Fit",
ylab="Second Fit",...){
#
# Compare adfun, usually an additive fit, to fit
# based on gfun.
#
fit1<-adfun(x,y,pyhat=TRUE,plotit=FALSE)
if(is.list(fit1))fit1=fit1$yhat
fit2<-gfun(x,y,pyhat=TRUE,plotit=FALSE)
if(is.list(fit2))fit2=fit2$yhat
plot(fit1,fit2,xlab=xlab,ylab=ylab)
abline(0,1)
}


adrun<-function(x,y,est=tmean,iter=10,pyhat=FALSE,plotit=TRUE,fr=1,xlab="X",
ylab="Y",zlab="",
theta=50,phi=25,expand=.5,scale=FALSE,zscale=TRUE,xout=FALSE,eout=xout,outfun=out,ticktype=
"simple",...){
#
# additive model based on running interval smoother
# and backfitting algorithm
#
m<-elimna(cbind(x,y))
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
x=x[flag,]
y=y[flag]
}
x<-as.matrix(x)
p<-ncol(x)
if(p==1)val<-rungen(x[,1],y,est=est,pyhat=TRUE,plotit=plotit,fr=fr,
xlab=xlab,ylab=ylab,...)$output
if(p>1){
library(MASS)
library(akima)
np<-p+1
x<-m[,1:p]
y<-m[,np]
fhat<-matrix(NA,ncol=p,nrow=length(y))
fhat.old<-matrix(NA,ncol=p,nrow=length(y))
res<-matrix(NA,ncol=np,nrow=length(y))
dif<-1
for(i in 1:p)
fhat.old[,i]<-rungen(x[,i],y,est=est,pyhat=TRUE,plotit=FALSE,fr=fr,...)$output
eval<-NA
for(it in 1:iter){
for(ip in 1:p){
res[,ip]<-y
for(ip2 in 1:p){
if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2]
}
fhat[,ip]<-rungen(x[,ip],res[,ip],est=est,pyhat=TRUE,plotit=FALSE,fr=fr,...)$output
}
eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2))))
if(it > 1){
itm<-it-1
dif<-abs(eval[it]-eval[itm])
}
fhat.old<-fhat
if(dif<.01)break
}
val<-apply(fhat,1,sum)
aval<-est(y-val,...)
val<-val+aval
if(plotit && p==2){
fitr<-val
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fitr<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand,
scale=scale,ticktype=ticktype)
}}
if(!pyhat)val<-"Done"
val
}

riplot<-function(x,y,adfun=adrun,plotfun=lplot,eout=FALSE,xout=TRUE,scale=FALSE){
#
# Plot used to investigate regression interaction
# (the extent a generalized additive model does not fit data).
# Compute additive fit, plot residuals
# versus x, an n by 2 matrix.
#
if(!is.matrix(x))stop(" x must be a matrix")
if(ncol(x)!=2)stop(" x must have two columns only")
yhat<-adfun(x,y,pyhat=TRUE,eout=eout,xout=xout,plotit=FALSE)
plotfun(x,y-yhat,eout=eout,xout=xout,scale=scale)
}

adtestv2<-function(x,y,est=tmean,nboot=500,alpha=.05,fr=NA,xout=TRUE,outfun=outpro,com.pval=FALSE,SEED=TRUE,qval=.5,...){
#
# For two predictors, test the hypothesis that the regression model is additive. That is, there is no interaction.
#  In essence, for the model Y=g_1(X_1)+g_2(X_2)+g_3(X_1X_2), test H_0: g_3(X_1X_2)=0
#
# The method fits an additive model using running interval smoother and the backfitting
# algorithm and then tests the hypothesis that the median of X_1X_2, given the residuals,
# is a straight horizontal line.
#
if(ncol(x)!=2)stop("There should be two predictors")
temp<-cbind(x,y)
p<-ncol(x)
p1<-p+1
temp<-elimna(temp)
x<-temp[,1:p]
x<-as.matrix(x)
y<-temp[,p1]
if(xout){
keepit<-rep(T,nrow(x))
flag<-outfun(x,plotit=FALSE,...)$out.id
keepit[flag]<-FALSE
x<-x[keepit,]
y<-y[keepit]
}
if(alpha<.05 && nboot<=100)warning("You used alpha<.05 and nboot<=100")
if(is.na(fr)){
fr<-.8
if(ncol(x)==2){
nval<-c(20,30,50,80,100,200,300,400)
fval<-c(0.40,0.36,0.3,0.25,0.23,.12,.08,.015)
if(length(y)<=400)fr<-approx(nval,fval,length(y))$y
if(length(y)>400)fr<-.01
}
}
if(SEED)set.seed(2)
x<-as.matrix(x)
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
yhat<-adrun(x,y,est=est,plotit=FALSE,fr=fr,pyhat=T)
regres<-y-yhat
test2=medind(regres,x[,1]*x[,2],qval=qval,nboot=nboot,com.pval=com.pval,SEED=SEED,alpha=alpha,
pr=TRUE,xout=xout,outfun=outfun,...)
test2
}


adtests1<-function(vstar,yhat,res,mflag,x,fr){
ystar<-yhat+res*vstar
bres<-adrun(x,ystar,fr=fr,pyhat=TRUE,plotit=FALSE)
bres<-ystar-bres
rval<-0
for (i in 1:nrow(x)){
rval[i]<-sum(bres[mflag[,i]])
}
rval
}
runsm2g<-function(x1,y1,x2,val=median(x2),est=tmean,sm=FALSE,fr=.8,xlab="X",
ylab="Y",...){
#
# Plot of running interval smoother for two groups
# Groups are defined according to whether x2<val,
# then a smooth of x1 versus y1 for both groups is created.
#
# fr controls amount of smoothing
# tr is the amount of trimming
# est is measure of location which defaults to 20% trimming
#
# Missing values are automatically removed.
#
m<-cbind(x1,y1,x2)
m<-elimna(m)
x2<-m[,3]
flag<-(x2<val)
x1<-m[flag,1]
y1<-m[flag,2]
x2<-m[!flag,1]
y2<-m[!flag,2]
runmean2g(x1, y1, x2, y2, fr = fr, est = est,sm=sm,xlab=xlab,
ylab=ylab,...)
}
rung3hat<-function(x,y,est=onestep,pts,fr=1,...){
#
# Compute y hat for each row of data in the matrix pts
# using a running  interval method
#
# fr controls amount of smoothing
# tr is the amount of trimming
# x is an n by p matrix of predictors.
# pts is an m by p matrix, m>=1.
#
if(!is.matrix(x))stop("Predictors are not stored in a matrix.")
if(!is.matrix(pts))stop("The third argument, pts, must be a matrix.")
library(MASS)
m<-cov.mve(x)
rmd<-1 # Initialize rmd
nval<-1
for(i in 1:nrow(pts)){
rmd[i]<-est(y[near3d(x,pts[i,],fr,m)],...)
nval[i]<-length(y[near3d(x,pts[i,],fr,m)])
}
list(rmd=rmd,nval=nval)
}


lta.sub<-function(X,theta,h){
np<-ncol(X)
p<-np-1
x<-X[,1:p]
y<-X[,np]
temp<-t(t(x)*theta[2:np])
yhat<-apply(temp,1,sum)+theta[1]
res<-abs(y-yhat)
res<-sort(res)
val<-sum(res[1:h])
val
}
 ltareg<-function(x, y, tr = 0.2, h = NA,op=2)
{
        #
        # Compute the least trimmed absolute value regression estimator.
        # The default amount of trimming is .2
# op=1,  use ltsreg as initial estimate
# op!=1, use tsreg
#
# If h is specfied, use h smallest residuals, and ignore tr
#
x<-as.matrix(x)
library(MASS)
if(is.na(h)) h <- length(y) - floor(tr * length(y))
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
p<-np-1
x<-X[,1:p]
x<-as.matrix(x)
y<-X[,np]
if(op==1)temp<-ltsreg(x,y)$coef
if(op!=1)temp<-tsreg(x,y)$coef
START<-temp
coef<-nelderv2(X,np,FN=lta.sub,START=START,h=h)
        res <- y - x%*%coef[2:np] - coef[1]
        list(coef = coef, residuals = res)
}



nelderv2<-function(x,N,FN,START=c(rep(1,N)),STEP=c(rep(1,N)),
XMIN=c(rep(0,N)),XSEC=c(rep(0,N)),...){
#     NELDER-MEAD method for minimzing a function
#
#     TAKEN FROM OLSSON, J QUALITY TECHNOLOGY, 1974, 6, 56.
#
#     x= n by p matrix containing data; it is used by
#        function to be minimized.
#     N= number of parameters
#
#     FN=the function to be minimized
#     FORM: FN(x,theta), theta is vector containing
#     values for N parameters.
#
#     START = starting values.
#     STEP=initial step.
#     This function returns the N values for theta that minimize FN
#
      ICOUNT<-500
      REQMIN<-.0000001
      NN<-N+1
      P<-matrix(NA,nrow=N,ncol=NN)
      P[,NN]<-START
      PBAR<-NA
      RCOEFF<-1
      ECOEFF<-2
      CCOEFF<-.5
      KCOUNT<-ICOUNT
      ICOUNT<-0
      DABIT<-2.04067e-35
      BIGNUM<-1.e38
      KONVGE<-5
      XN<-N
      DN<-N
      Y<-rep(0,NN)
      Y[NN]<-FN(x,START,...)
      ICOUNT<-ICOUNT+1
      for(J in 1:N){
      DCHK<-START[J]
      START[J]<-DCHK+STEP[J]
      for(I in 1:N){
      P[I,J]<-START[I]
}
      Y[J]<-FN(x,START,...)
      ICOUNT<-ICOUNT+1
      START[J]<-DCHK
}
      I1000<-T
       while(I1000){
      YLO<-Y[1]
      YNEWLO<-YLO
      ILO<-1
      IHI<-1
      for(I in 2:NN){
      if(Y[I] <  YLO){
      YLO<-Y[I]
      ILO<-I}
      if(Y[I] > YNEWLO){
      YNEWLO<-Y[I]
      IHI<-I}
}
      DCHK<-(YNEWLO+DABIT)/(YLO+DABIT)-1
      if(abs(DCHK) < REQMIN){
      I1000<-F
      next
}
      KONVGE<-KONVGE-1
      if(KONVGE == 0){
      KONVGE<-5
      for(I in 1:N){
      COORD1<-P[I,1]
      COORD2<-COORD1
      for(J in 2:NN){
      if(P[I,J] < COORD1)COORD1<-P[I,J]
      if(P[I,J] > COORD2)COORD2<-P[I,J]
}     # 2010 CONTINUE
      DCHK<-(COORD2+DABIT)/(COORD1+DABIT)-1
      if(abs(DCHK) > REQMIN)break
}
}
     if(ICOUNT >= KCOUNT){
      I1000<-F
      next
}
      for(I in 1:N){
      Z<-0.0
      Z<-sum(P[I,1:NN]) # 6
      Z<-Z-P[I,IHI]
  PBAR[I]<-Z/DN
}
    PSTAR<-(1.+RCOEFF)*PBAR-RCOEFF*P[,IHI]
      YSTAR<-FN(x,PSTAR,...)
      ICOUNT<-ICOUNT+1
      if(YSTAR < YLO && ICOUNT >= KCOUNT){
       P[,IHI]<-PSTAR
       Y[IHI]<-YSTAR
       next
}
  IFLAG<-T
      if(YSTAR < YLO){
    P2STAR<-ECOEFF*PSTAR+(1-ECOEFF)*PBAR
      Y2STAR<-FN(x,P2STAR,...)
      ICOUNT<-ICOUNT+1
      if(Y2STAR >= YSTAR){
       P[,IHI]<-PSTAR
       Y[IHI]<-YSTAR
       next #In essence, go to 19 which goes to 1000
}
      IFLAG<-T
      while(YSTAR < Y[IHI]){
      P[,IHI]<-P2STAR
      Y[IHI]<-Y2STAR
      IFLAG<-F
     break
     L<-sum(Y[1:NN] > YSTAR)
      if(L > 1){
       P[,IHI]<-PSTAR
       Y[IHI]<-YSTAR
       IFLAG<-T
       break
}
       if(L > 1)break # go to 19
      if(L != 0){
      P[1:N,IHI]<-PSTAR[1:N]
      Y[IHI]<-YSTAR
}
I1000<-F
break
  if(ICOUNT >= KCOUNT){
      I1000<-F
      next
}
   P2STAR[1:N]<-CCOEFF*P[1:N,IHI]+(1-CCOEFF)*PBAR[1:N]
      Y2STAR<-FN(x,P2STAR,...)
      ICOUNT<-ICOUNT+1
}   # END WHILE
}
if(IFLAG){
for(J in 1:NN){
P[,J]=(P[,J]+P[,ILO])*.5
   XMIN<-P[,J]
      Y[J]<-FN(x,XMIN,...)
}
      ICOUNT<-ICOUNT+NN
      if(ICOUNT < KCOUNT)next
      I1000<-F
next
}
      P[1:N,IHI]<-PSTAR[1:N]
      Y[IHI]<-YSTAR
}
    for(J in 1:NN){
      XMIN[1:N]<-P[1:N,J]
}
      Y[J]<-FN(x,XMIN,...)
      YNEWLO<-BIGNUM
  for(J in 1:NN){
      if (Y[J] < YNEWLO){
      YNEWLO<-Y[J]
      IBEST<-J
}}
      Y[IBEST]<-BIGNUM
      YSEC<-BIGNUM
for(J in 1:NN){
if(Y[J] < YSEC){
      YSEC<-Y[J]
      ISEC<-J
}}
      XMIN[1:N]<-P[1:N,IBEST]
      XSEC[1:N]<-P[1:N,ISEC]
XMIN
}




nelder<-function(x,N,FN,START=c(rep(1,N)),STEP=c(rep(1,N)),
XMIN=c(rep(0,N)),XSEC=c(rep(0,N))){
#     NELDER-MEAD method for minimzing a function
#
#     TAKEN FROM OLSSON, J QUALITY TECHNOLOGY, 1974, 6, 56.
#
#     x= n by p matrix containing data; it is used by
#        function to be minimized.
#     N= number of parameters
#
#     FN=the function to be minimized
#     FORM: FN(x,theta), theta is vector containing
#     values for N parameters.
#
#     START = starting values.
#     STEP=initial step.
#     This function returns the N values for theta that minimize FN
#
      ICOUNT<-500
      REQMIN<-.0000001
      NN<-N+1
      P<-matrix(NA,nrow=N,ncol=NN)
      P[,NN]<-START
      PBAR<-NA
      RCOEFF<-1
      ECOEFF<-2
      CCOEFF<-.5
      KCOUNT<-ICOUNT
      ICOUNT<-0
      DABIT<-2.04067e-35
      BIGNUM<-1.e38
      KONVGE<-5
      XN<-N
      DN<-N
      Y<-rep(0,NN)
      Y[NN]<-FN(x,START)
      ICOUNT<-ICOUNT+1
      for(J in 1:N){
      DCHK<-START[J]
      START[J]<-DCHK+STEP[J]
      for(I in 1:N){
      P[I,J]<-START[I]
}
      Y[J]<-FN(x,START)
      ICOUNT<-ICOUNT+1
      START[J]<-DCHK
}
      I1000<-T
       while(I1000){
      YLO<-Y[1]
      YNEWLO<-YLO
      ILO<-1
      IHI<-1
      for(I in 2:NN){
      if(Y[I] <  YLO){
      YLO<-Y[I]
      ILO<-I}
      if(Y[I] > YNEWLO){
      YNEWLO<-Y[I]
      IHI<-I}
}
      DCHK<-(YNEWLO+DABIT)/(YLO+DABIT)-1
      if(abs(DCHK) < REQMIN){
      I1000<-F
      next
}
      KONVGE<-KONVGE-1
      if(KONVGE == 0){
      KONVGE<-5
      for(I in 1:N){
      COORD1<-P[I,1]
      COORD2<-COORD1
      for(J in 2:NN){
      if(P[I,J] < COORD1)COORD1<-P[I,J]
      if(P[I,J] > COORD2)COORD2<-P[I,J]
}     # 2010 CONTINUE
      DCHK<-(COORD2+DABIT)/(COORD1+DABIT)-1
      if(abs(DCHK) > REQMIN)break
}
}
     if(ICOUNT >= KCOUNT){
      I1000<-F
      next
}
      for(I in 1:N){
      Z<-0.0
      Z<-sum(P[I,1:NN]) # 6
      Z<-Z-P[I,IHI]
  PBAR[I]<-Z/DN
}
    PSTAR<-(1.+RCOEFF)*PBAR-RCOEFF*P[,IHI]
      YSTAR<-FN(x,PSTAR)
      ICOUNT<-ICOUNT+1
      if(YSTAR < YLO && ICOUNT >= KCOUNT){
       P[,IHI]<-PSTAR
       Y[IHI]<-YSTAR
       next
}
  IFLAG<-T
      if(YSTAR < YLO){
    P2STAR<-ECOEFF*PSTAR+(1-ECOEFF)*PBAR
      Y2STAR<-FN(x,P2STAR)
      ICOUNT<-ICOUNT+1
      if(Y2STAR >= YSTAR){
       P[,IHI]<-PSTAR
       Y[IHI]<-YSTAR
       next #In essence, go to 19 which goes to 1000
}
      IFLAG<-T
      while(YSTAR < Y[IHI]){
      P[,IHI]<-P2STAR
      Y[IHI]<-Y2STAR
      IFLAG<-F
     break
     L<-sum(Y[1:NN] > YSTAR)
      if(L > 1){
       P[,IHI]<-PSTAR
       Y[IHI]<-YSTAR
       IFLAG<-T
       break
}
       if(L > 1)break # go to 19
      if(L != 0){
      P[1:N,IHI]<-PSTAR[1:N]
      Y[IHI]<-YSTAR
}
I1000<-F
break
  if(ICOUNT >= KCOUNT){
      I1000<-F
      next
}
   P2STAR[1:N]<-CCOEFF*P[1:N,IHI]+(1-CCOEFF)*PBAR[1:N]
      Y2STAR<-FN(x,P2STAR)
      ICOUNT<-ICOUNT+1
}   # END WHILE
}
if(IFLAG){
for(J in 1:NN){
P[,J]<-(P[,J]+P[,ILO])*.5
   XMIN<-P[,J]
      Y[J]<-FN(x,XMIN)
}
      ICOUNT<-ICOUNT+NN
      if(ICOUNT < KCOUNT)next
      I1000<-F
next
}
      P[1:N,IHI]<-PSTAR[1:N]
      Y[IHI]<-YSTAR
}
    for(J in 1:NN){
      XMIN[1:N]<-P[1:N,J]
}
      Y[J]<-FN(x,XMIN)
      YNEWLO<-BIGNUM
  for(J in 1:NN){
      if (Y[J] < YNEWLO){
      YNEWLO<-Y[J]
      IBEST<-J
}}
      Y[IBEST]<-BIGNUM
      YSEC<-BIGNUM
for(J in 1:NN){
if(Y[J] < YSEC){
      YSEC<-Y[J]
      ISEC<-J
}}
      XMIN[1:N]<-P[1:N,IBEST]
      XSEC[1:N]<-P[1:N,ISEC]
XMIN
}

splotg2<-function(x,y,op=TRUE,xlab="X",ylab="Rel. Freq."){
#
# Frequency plot
#
x<-x[!is.na(x)]
temp<-sort(unique(x))
freqx<-NA
for(i in 1:length(temp)){
freqx[i]<-sum(x==temp[i])
}
freqx<-freqx/length(x)
y<-y[!is.na(y)]
tempy<-sort(unique(y))
freqy<-NA
for(i in 1:length(tempy)){
freqy[i]<-sum(y==tempy[i])
}
freqy<-freqy/length(y)
plot(c(temp,tempy),c(freqx,freqy),type="n",xlab=xlab,ylab=ylab)
points(temp,freqx)
points(tempy,freqy,pch="o")
if(op){
lines(temp,freqx)
lines(tempy,freqy,lty=2)
}
}


stein1.tr<-function(x,del,alpha=.05,pow=.8,tr=.2){
#
# Extension of Stein's method when performing all pairwise
# comparisons among J dependent groups.
#
# If x represents a single group, one-sample analysis is performed.
#
if(tr < 0 || tr >=.5)stop("Argument tr must be between 0 and .5")
if(is.matrix(x))m<-x
if(is.list(x))m<-matl(x)
if(!is.matrix(x) && !is.list(x))m<-matrix(x,ncol=1)
m<-elimna(m)
m<-as.matrix(m)
ntest<-1
n<-nrow(m)
J<-ncol(m)
if(ncol(m) > 1)ntest<-(J^2-J)/2
g<-floor(tr*nrow(m))
df<-n-2*g-1
t1<-qt(pow,df)
t2<-qt(alpha/(2*ntest),df)
dv<-(del/(t1-t2))^2
nvec<-NA
if(ntest > 1){
ic<-0
for (j in 1:ncol(m)){
for (jj in 1:ncol(m)){
if(j<jj){
ic<-ic+1
dif<-m[,j]-m[,jj]
nvec[ic]<-floor(trimse(dif,tr=tr)/dv)+1
}}}}
if(ntest == 1)nvec[1]<-floor(trimse(m[,1],tr=tr)/dv)+1
N<-max(c(n,nvec))
N
}

stein2.tr<-function(x,y,alpha=.05,tr=.2){
#
# Extension of Stein's method when performing all pairwise
# comparisons among J dependent groups.
#
# If x represents a single group, one-sample analysis is performed.
#
if(tr < 0 || tr >=.5)stop("Argument tr must be between 0 and .5")
if(is.matrix(x))m<-x
if(is.list(x))m<-matl(x)
if(is.list(y))y<-matl(y)
if(!is.matrix(x) && !is.list(x))m<-matrix(x,ncol=1)
if(!is.matrix(y) && !is.list(y))y<-matrix(y,ncol=1)
m<-elimna(m)
m<-as.matrix(m)
g<-floor(tr*nrow(m))
df<-nrow(m)-2*g-1
m<-rbind(m,y)
ic<-0
ntest<-(ncol(m)^2-ncol(m))/2
if(ntest==0)ntest<-1
test<-matrix(NA,ncol=3,nrow=ntest)
for (j in 1:ncol(m)){
for (jj in 1:ncol(m)){
if(j<jj){
ic<-ic+1
dif<-m[,j]-m[,jj]
test[ic,1]<-j
test[ic,2]<-jj
test[ic,3]<-sqrt(nrow(m))*(1-2*tr)*mean(dif,tr=tr,na.rm=TRUE)/sqrt(winvar(dif))
}}}
crit<-qt(1-alpha/(2*ntest),df)
if(ntest == 1)
test<-sqrt(nrow(m))*(1-2*tr)*mean(m[,1],tr=tr,na.rm=TRUE)/sqrt(winvar(m[,1]))
list(test.stat=test,crit.val=crit)
}

pdis<-function(m,MM=FALSE,cop=3,dop=1,center=NA){
#
# Compute projection distances for points in m
#
#
#
#  MM=F  Projected distance scaled
#  using interquatile range.
#  MM=T  Scale projected distances using MAD.
#
#  There are five options for computing the center of the
#  cloud of points when computing projections:
#  cop=1 uses Donoho-Gasko median
#  cop=2 uses MCD center
#  cop=3 uses median of the marginal distributions.
#  cop=4 uses MVE center
#  cop=5 uses skipped mean
#
m<-elimna(m) # Remove missing values
m<-as.matrix(m)
if(ncol(m)==1){
if(is.na(center[1]))center<-median(m)
dis<-abs(m[,1]-center)
if(!MM){
temp<-idealf(dis)
pdis<-dis/(temp$qu-temp$ql)
}
if(MM)pdis<-dis/mad(dis)
}
if(ncol(m)>1){
if(is.na(center[1])){
if(cop==1)center<-dmean(m,tr=.5,dop=dop)
if(cop==2)center<-cov.mcd(m)$center
if(cop==3)center<-apply(m,2,median)
if(cop==4)center<-cov.mve(m)$center
if(cop==5)center<-smean(m)
}
dmat<-matrix(NA,ncol=nrow(m),nrow=nrow(m))
for (i in 1:nrow(m)){
B<-m[i,]-center
dis<-NA
BB<-B^2
bot<-sum(BB)
if(bot!=0){
for (j in 1:nrow(m)){
A<-m[j,]-center
temp<-sum(A*B)*B/bot
dis[j]<-sqrt(sum(temp^2))
}
if(!MM){
temp<-idealf(dis)
dmat[,i]<-dis/(temp$qu-temp$ql)
}
if(MM)dmat[,i]<-dis/mad(dis)
}}
pdis<-apply(dmat,1,max,na.rm=TRUE)
}
pdis
}

runmbo<-function(x,y,fr=1,est=tmean,xlab="X",ylab="Y",pts=x,RNA=FALSE,atr=0,
pyhat=FALSE,eout=FALSE,outfun=out,plotit=TRUE,xout=FALSE,scat=TRUE,nboot=40,SEED=TRUE,...){
#
# running interval smooth with bagging
#
# fr controls amount of smoothing
# tr is the amount of trimming
#
# Missing values are automatically removed.
#
# RNA=F, do not remove missing values when averaging
# (computing the smooth) at x
# xout=T removes points for which x is an outlier
# eout=F removes points for which (x,y) is an outlier
# nmin  estimate y|x only when number of points close
# to x is > nmin
# atr is amount of trimming when averaging over the bagged
# values
# est is the measure of location to be estimated
# est=tmean means estimate 20% trimmed mean of y given x
#
if(SEED)set.seed(2)
temp<-cbind(x,y)
if(ncol(temp)>2)stop("Use run3bo with more than 1 predictor")
temp<-elimna(temp) # Eliminate any rows with missing values
if(eout && xout)stop("Not allowed to have eout=xout=T")
if(eout){
flag<-outfun(temp,plotit=FALSE)$keep
temp<-temp[flag,]
}
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
temp<-temp[flag,]
}
x<-temp[,1]
y<-temp[,2]
pts<-as.matrix(pts)
mat<-matrix(NA,nrow=nboot,ncol=nrow(pts))
vals<-NA
for(it in 1:nboot){
idat<-sample(c(1:length(y)),replace=TRUE)
xx<-temp[idat,1]
yy<-temp[idat,2]
mat[it,]<-runhat(xx,yy,pts=pts,est=est,fr=fr,...)
}
rmd<-apply(mat,2,mean,na.rm=RNA,tr=atr)
if(plotit){
if(scat){
plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type="n")
points(x,y)
}
if(!scat)plot(c(x,x),c(y,rmd),type="n",xlab=xlab,ylab=ylab)
points(x, rmd, type = "n")
sx <- sort(x)
xorder <- order(x)
sysm <- rmd[xorder]
lines(sx, sysm)
}
output="Done"
if(pyhat)output<-rmd
output
}


run3bo<-function(x,y,fr=1,est=tmean,theta = 50, phi = 25,nmin=0,
pyhat=FALSE,eout=FALSE,outfun=out,plotit=TRUE,xout=FALSE,nboot=40,SEED=TRUE,STAND=FALSE,
expand=.5,scale=FALSE,xlab="X",ylab="Y",zlab="",ticktype="simple",...){
#
# running mean using interval method
#
# fr controls amount of smoothing
# tr is the amount of trimming
#
# Missing values are automatically removed.
#
library(MASS)
library(akima)
if(SEED)set.seed(2)
temp<-cbind(x,y)
x<-as.matrix(x)
p<-ncol(x)
p1<-p+1
if(p>2)plotit<-F
temp<-elimna(temp) # Eliminate any rows with missing values.
x<-temp[,1:p]
x<-as.matrix(x)
y<-temp[,p1]
if(xout){
keepit<-rep(T,nrow(x))
flag<-outfun(x,plotit=FALSE,STAND=STAND,...)$out.id
keepit[flag]<-F
x<-x[keepit,]
y<-y[keepit]
}
mat<-matrix(NA,nrow=nboot,ncol=length(y))
vals<-NA
for(it in 1:nboot){
idat<-sample(c(1:length(y)),replace=TRUE)
xx<-temp[idat,1:p]
yy<-temp[idat,p1]
tmy<-rung3hat(xx,yy,pts=x,est=est,fr=fr,...)$rmd
mat[it,]<-tmy
}
rmd<-apply(mat,2,mean,na.rm=TRUE)
flag<-!is.na(rmd)
rmd<-elimna(rmd)
x<-x[flag,]
y<-y[flag]
nval<-NA
m<-cov.mve(x)
for(i in 1:nrow(x))nval[i]<-length(y[near3d(x,x[i,],fr,m)])
if(plotit && ncol(x)==2){
#if(ncol(x)!=2)stop("When plotting, x must be an n by 2 matrix")
fitr<-rmd[nval>nmin]
y<-y[nval>nmin]
x<-x[nval>nmin,]
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fit,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand,
scale=scale,ticktype=ticktype)
}
last<-"Done"
if(pyhat)last<-rmd
list(output=last)
}


ancom<-function(x1,y1,x2,y2,dchk=FALSE,plotit=TRUE,plotfun=rplot,nboot=500,
alpha=.05,SEED=TRUE,PARTEST=FALSE,tr=0,...){
#
# Omnibus ANCOVA
# tr=0 is recommended for general use. tr>0 might result in
#   poor control over the probability of a Type I error.
#  PARTEST=T will test the hypothesis of parallel regression lines.
#
# Setting plotfun=rplotsm will smooth the plots via bagging
#
# dchk=T, points in design space with a halfspace of zero are eliminated
#
# PARTEST=F tests hypothesis that regression surface is a horizontal
# plane through the origin
# PARTEST=T tests the hypothesis that the two regression surfaces
# are parallel.
#
flag1<-rep(T,length(y1))
flag2<-rep(T,length(y2))
if(dchk){
dep1<-fdepth(x2,x1) # depth of points in x1 relative to x2
dep2<-fdepth(x1,x2)
flag1<-(dep1>0)
flag2<-(dep2>0)
}
n1<-sum(flag1)
n2<-sum(flag2)
n<-n1+n2
y<-c(n2*y1[flag1]/n,0-n1*y2[flag2]/n)
x1<-as.matrix(x1)
x1<-x1[flag1,]
x2<-as.matrix(x2)
x2<-x2[flag2,]
x1<-as.matrix(x1)
x2<-as.matrix(x2)
x<-rbind(x1,x2)
if(plotit){
if(ncol(x)<=2)plotfun(x,y,...)
}
if(PARTEST)output<-indt(x,y,tr=tr,nboot=nboot,alpha=alpha,SEED=SEED)
if(!PARTEST)output<-indt0(x,y,nboot=nboot,alpha=alpha,SEED=SEED)
list(dstat=output$dstat,critd=output$critd)
}
indt0<-function(x,y,nboot=500,alpha=.05,flag=1,SEED=TRUE){
#
# Test the hypothesis that the regression plane
#   between x and y  is a flat horizontal plane with intercept 0
# The method is based on results in
# Stute et al. (1998, JASA, 93, 141-149).
#
#  flag=1 gives Kolmogorov-Smirnov test statistic
#  flag=2 gives the Cramer-von Mises test statistic
#  flag=3 causes both test statistics to be reported.
#
if(SEED)set.seed(2)
x<-as.matrix(x)
# First, eliminate any rows of data with missing values.
temp <- cbind(x, y)
        temp <- elimna(temp)
        pval<-ncol(temp)-1
        x <- temp[,1:pval]
        y <- temp[, pval+1]
x<-as.matrix(x)
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
# ith row of mflag indicates which rows of the matrix x are less
# than or equal to ith row of x
#
yhat<-0
res<-y-yhat
print("Taking bootstrap sample, please wait.")
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-(data-.5)*sqrt(12) # standardize the random numbers.
rvalb<-apply(data,1,indt0sub,yhat,res,mflag,x,tr)
# An n x nboot matrix of R values
rvalb<-rvalb/sqrt(length(y))
dstatb<-apply(abs(rvalb),2,max)
wstatb<-apply(rvalb^2,2,mean)
mstatb<-apply(abs(rvalb),2,median)
dstatb<-sort(dstatb)
wstatb<-sort(wstatb)
mstatb<-sort(mstatb)
# compute test statistic
v<-c(rep(1,length(y)))
rval<-indt0sub(v,yhat,res,mflag,x,tr)
rval<-rval/sqrt(length(y))
dstat<-NA
wstat<-NA
critd<-NA
critw<-NA
ib<-round(nboot*(1-alpha))
if(flag==1 || flag==3){
dstat<-max(abs(rval))
critd<-dstatb[ib]
}
if(flag==2 || flag==3){
wstat<-mean(rval^2)
critw<-wstatb[ib]
}
list(dstat=dstat,wstat=wstat,critd=critd,critw=critw)
}


indt0sub<-function(vstar,yhat,res,mflag,x,tr){
bres<-res*vstar
rval<-0
for (i in 1:nrow(x)){
rval[i]<-sum(bres[mflag[,i]])
}
rval
}

smeancr<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=NA,
nboot=500,plotit=TRUE,xlab="VAR 1",ylab="VAR 2",STAND=FALSE){
#
# m is an n by p matrix
#
# Test hypothesis that multivariate skipped estimators
# are all equal to the null value, which defaults to zero.
# The level of the test is .05.
#
# Eliminate outliers using a projection method
# That is, determine center of data using:
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
# cop=4 MVE
#
# For each point
# consider the line between it and the center
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# Eliminate any outliers and compute means
#  using remaining data.
#
if(is.na(SEED))set.seed(2)
if(!is.na(SEED))set.seed(SEED)
m<-elimna(m)
n<-nrow(m)
crit.level<-.05
if(n<=120)crit.level<-.045
if(n<=80)crit.level<-.04
if(n<=60)crit.level<-.035
if(n<=40)crit.level<-.03
if(n<=30)crit.level<-.025
if(n<=20)crit.level<-.02
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
val<-matrix(NA,ncol=ncol(m),nrow=nboot)
for(j in 1: nboot){
mm<-m[data[j,],]
temp<-outpro(mm,plotit=FALSE,cop=cop,STAND=STAND)$keep
val[j,]<-apply(mm[temp,],2,mean)
}
temp<-pdis(rbind(val,nullv))
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
if(ncol(m)==2 && plotit){
plot(val[,1],val[,2],xlab=xlab,ylab=ylab)
temp3<-smean(m,cop=cop,STAND=STAND)
points(temp3[1],temp3[2],pch="+")
ic<-round((1-crit.level)*nboot)
temp<-pdis(val)
temp.dis<-order(temp)
xx<-val[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(p.value=sig.level,crit.level=crit.level)
}


rplotsm<-function(x,y,est=tmean,fr=1,plotit=TRUE,pyhat=FALSE,nboot=40,atr=0,nmin=0,
outfun=outpro,eout=FALSE,xlab="X",ylab="Y",scat=TRUE,SEED=TRUE,expand=.5,scale=FALSE,STAND=FALSE,
varfun=pbvar,pr=TRUE,ticktype="simple",theta=50,phi=25,...){
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
if(ncol(x)==1){
val<-runmbo(x,y,est=est,scat=scat,fr=fr,plotit=plotit,pyhat=TRUE,STAND=STAND,
xlab=xlab,ylab=ylab,eout=eout,nboot=nboot,outfun=outfun,SEED=SEED,atr=atr,...)
}
if(ncol(x)>1){
if(ncol(x)==2 && !scale){
if(pr){
print("scale=F is specified.")
print("If there is dependence, use scale=T")
}}
if(ncol(x)>2)plotit<-F
val<-run3bo(x,y,est=est,fr=fr,nmin=nmin,plotit=plotit,pyhat=TRUE,phi=phi,
theta=theta,xlab=xlab,ylab=ylab,ticktype=ticktype,STAND=STAND,
#eout=eout,outfun=outfun,SEED=SEED,expand=expand,scale=scale,nboot=nboot,...)
SEED=SEED,expand=expand,scale=scale,nboot=nboot,...)
val<-val$output
}
E.power<-varfun(val[!is.na(val)])/varfun(y)
#if(pr)print(paste("Explanatory.power=",E.power))
#if(!pyhat)val<-"Done"
if(!pyhat)val <- NULL
E.power=as.numeric(E.power)
list(Strength.Assoc=sqrt(E.power),Explanatory.Power = E.power, yhat = val)
}

zdepth<-function(m,pts=m,zloc=median,zscale=mad){
#
# Compute depth of points as in Zuo, Annals, 2003
#
if(!is.matrix(m))stop("argument m should be a matrix")
if(!is.matrix(pts))stop("argument pts should be a matrix")
if(ncol(m)!=ncol(pts))stop("Number of columns for m and pts are not equal")
np<-ncol(m)
val<-NA
for(i in 1:nrow(pts)){
pval<-pts[i,]
START<-rep(1,np)/sqrt(np)
temp<-nelderv2(m,np,FN=zdepth.sub,START=START,zloc=zloc,zscale=zscale,pts=pval)
temp<-temp/sqrt(sum(temp^2))
y<-t(t(m)*temp)
y<-apply(y,1,sum)
ppro<-sum(pval*temp)
val[i]<-abs(ppro-zloc(y))/zscale(y)
}
val
}

zdepth.sub<-function(x,theta,zloc=median,zscale=mad,pts=NA){
theta<-theta/sqrt(sum(theta^2))
temp<-t(t(x)*theta)
ppro<-sum(t(t(pts)*theta))
yhat<-apply(temp,1,sum)
val<-0-abs(ppro-zloc(yhat))/zscale(yhat)
val
}

opregpb<-function(x,y,nboot=1000,alpha=.05,om=TRUE,ADJ=TRUE,
nullvec=rep(0,ncol(x)+1),plotit=TRUE,opdis=2,gval=sqrt(qchisq(.95,ncol(x)+1))){
#
# generate bootstrap estimates
# use projection-type outlier detection method followed by
# TS regression.
#
# om=T and ncol(x)>1, means an omnibus test is performed,
# otherwise only individual tests of parameters are performed.
#
# opdis=2, means that Mahalanobis distance is used
# opdis=1, means projection-type distance is used
#
# gval is critical value for projection-type outlier detection
# method
#
# ADJ=T, Adjust p-values as described in Section 11.1.5 of the text.
#
x<-as.matrix(x)
m<-cbind(x,y)
p1<-ncol(x)+1
m<-elimna(m) # eliminate any rows with missing data
x<-m[,1:ncol(x)]
x<-as.matrix(x)
y<-m[,p1]
if(nrow(x)!=length(y))stop("Sample size of x differs from sample size of y")
if(!is.matrix(x))stop("Data should be stored in a matrix")
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regboot,x,y,regfun=opreg)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
# using Hochberg method
bvec<-t(bvec)
dvec<-alpha/(c(1:ncol(x)))
test<-NA
icl0<-round(alpha*nboot/2)
icl<-round(alpha*nboot/(2*ncol(x)))
icu0<-nboot-icl0
icu<-nboot-icl
output<-matrix(0,p1,6)
dimnames(output)<-list(NULL,c("Param.","p.value","p.crit",
"ci.lower","ci.upper","s.e."))
pval<-NA
for(i in 1:p1){
output[i,1]<-i-1
se.val<-var(bvec[,i])
temp<-sort(bvec[,i])
output[i,6]<-sqrt(se.val)
if(i==1){
output[i,4]<-temp[icl0+1]
output[i,5]<-temp[icu0]
}
if(i>1){
output[i,4]<-temp[icl+1]
output[i,5]<-temp[icu]
}
pval[i]<-sum((temp>nullvec[i]))/length(temp)
if(pval[i]>.5)pval[i]<-1-pval[i]
}
fac<-2
if(ADJ){
# Adjust p-value if n<60
nval<-length(y)
if(nval<20)nval<-20
if(nval>60)nval<-60
fac<-2-(60-nval)/40
}
pval[1]<-2*pval[1]
pval[2:p1]<-fac*pval[2:p1]
output[,2]<-pval
temp2<-order(0-pval[2:p1])
zvec<-dvec[1:ncol(x)]
sigvec<-(test[temp2]>=zvec)
output[temp2+1,3]<-zvec
output[1,3]<-NA
output[,2]<-pval
om.pval<-NA
temp<-opreg(x,y)$coef
if(om && ncol(x)>1){
temp2<-rbind(bvec[,2:p1],nullvec[2:p1])
if(opdis==1)dis<-pdis(temp2,pr=FALSE,center=temp[2:p1])
if(opdis==2){
cmat<-var(bvec[,2:p1]-apply(bvec[,2:p1],2,mean)+temp[2:p1])
dis<-mahalanobis(temp2,temp[2:p1],cmat)
}
om.pval<-sum((dis[nboot+1]<=dis[1:nboot]))/nboot
}
# do adjusted p-value
nval<-length(y)
if(nval<20)nval<-20
if(nval>60)nval<-60
adj.pval<-om.pval/2+(om.pval-om.pval/2)*(nval-20)/40
if(ncol(x)==2 && plotit){
plot(bvec[,2],bvec[,3],xlab="Slope 1",ylab="Slope 2")
temp.dis<-order(dis[1:nboot])
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],2:3]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(output=output,om.pval=om.pval,adj.om.pval=adj.pval)
}


kslope<-function(x,y,pyhat=FALSE,pts=x){
#
# Estimate slope at points in pts using kernel method
#
# See Doksum et al. 1994, JASA, 89, 571-
#
m<-elimna(cbind(x,y))
x<-m[,1]
y<-m[,2]
n<-length(y)
sig<-sqrt(var(x))
temp<-idealf(x)
iqr<-(temp$qu-temp$ql)/1.34
A<-min(c(sig,iqr))
yhat<-NA
vval<-NA
vals<-NA
rhosq<-NA
for(k in 1:n){
temp1<-NA
for(j in 1:n){
temp1[j]<-((x[j]-x[k])/A)^2
}
epan<-ifelse(temp1<1,.75*(1-temp1),0) # Epanechnikov kernel, p. 76
chkit<-sum(epan!=0)
if(chkit >= 2){
temp4<-lsfit(x,y,wt=epan)
vals[k]<-temp4$coef[2]
}}
vals
}

nearl<-function(x,pt,fr=1){
# determine which values in x are near and less than pt
# based on fr * mad
m<-mad(x)
if(m==0){
temp<-idealf(x)
m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25))
}
if(m==0)m<-sqrt(winvar(x)/.4129)
if(m==0)stop("All measures of dispersion are equal to 0")
dis<-abs(x-pt)
dflag<-dis <= fr*m
flag2<-(x<pt)
dflag<-dflag*flag2
dflag
}
nearr<-function(x,pt,fr=1){
# determine which values in x are near and less than pt
# based on fr * mad
m<-mad(x)
if(m==0){
temp<-idealf(x)
m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25))
}
if(m==0)m<-sqrt(winvar(x)/.4129)
if(m==0)stop("All measures of dispersion are equal to 0")
dis<-abs(x-pt)
dflag<-dis <= fr*m
flag2<-(x>pt)
dflag<-dflag*flag2
dflag
}
mgvmean<-function(m,op=0,outfun=outbox,se=T){
#
# m is an n by p matrix
#
# Compute a multivariate skipped measure of location
# using the MGV method
#
# Eliminate outliers using MGV method
#
# op=0 pairwise distances of points
# op=1 MVE distances
# op=2 MCD distances
#
# outfun indicates outlier rule to be applied to
# the MGV distances.
# By default, use boxplot rule
#
# Eliminate any outliers and compute means
#  using remaining data.
#
m<-elimna(m)
temp<-outmgv(m,op=op,plotit=FALSE)$keep
val<-apply(m[temp,],2,mean)
val
}

smgvcr<-function(m,nullv=rep(0,ncol(m)),SEED=TRUE,op=0,
nboot=500,plotit=TRUE){
#
# m is an n by p matrix
#
# Test hypothesis that estimand of the MGV estimator
# is equal to the null value, which defaults to zero vector.
# The level of the test is .05.
#
# Argument op: See function outmgv
#
if(SEED)set.seed(2)
m<-elimna(m)
n<-nrow(m)
crit.level<-.05
if(n<=120)crit.level<-.045
if(n<=80)crit.level<-.04
if(n<=60)crit.level<-.035
if(n<=40)crit.level<-.03
if(n<=30)crit.level<-.025
if(n<=20)crit.level<-.02
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
val<-matrix(NA,ncol=ncol(m),nrow=nboot)
for(j in 1: nboot){
mm<-m[data[j,],]
temp<-outmgv(mm,plotit=FALSE,op=op)$keep
val[j,]<-apply(mm[temp,],2,mean)
}
temp<-mgvar(rbind(val,nullv),op=op)
flag2<-is.na(temp)
if(sum(flag2)>0)temp[flag2]<-0
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
if(ncol(m)==2 && plotit){
plot(val[,1],val[,2],xlab="VAR 1",ylab="VAR 2")
temp3<-mgvmean(m,op=op)
points(temp3[1],temp3[2],pch="+")
ic<-round((1-crit.level)*nboot)
temp<-mgvar(val)
temp.dis<-order(temp)
xx<-val[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(p.value=sig.level,crit.level=crit.level)
}


lts.sub<-function(X,theta,h){
np<-ncol(X)
p<-np-1
x<-X[,1:p]
y<-X[,np]
temp<-t(t(x)*theta[2:np])
yhat<-apply(temp,1,sum)+theta[1]
res<-(y-yhat)^2
res<-sort(res)
val<-sum(res[1:h])
val
}

ltsgreg<-function(x, y, tr = 0.2, h = NA)
{
        #
        # Compute the least trimmed absolute value regression estimator.
        # The default amount of trimming is .2
x<-as.matrix(x)
library(MASS)
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
p<-np-1
x<-X[,1:p]
x<-as.matrix(x)
y<-X[,np]
if(is.na(h)) h <- length(y) - floor(tr * length(y))
temp<-ltsReg(y~x)$coef
START<-temp
coef<-nelderv2(X,np,FN=lts.sub,START=START,h=h)
        res <- y - x%*%coef[2:np] - coef[1]
        list(coef = coef, residuals = res)
}


qest<-function(x,q=.5){
#
# Compute an estimate of qth quantile
#  using a single order statistic.
#
if(q<=0 || q>=1)stop("q must be > 0 and < 1")
n<-length(x)
xsort<-sort(x)
iq <- floor(q * n + 0.5)
flag<-(iq<=0 || iq>n)
qest<-NA
if(!flag)qest<-xsort[iq]
qest
}
smean2<-function(m1,m2,nullv=rep(0,ncol(m1)),cop=3,MM=FALSE,SEED=NA,
nboot=500,plotit=TRUE,STAND=FALSE){
#
# m is an n by p matrix
#
# For two independent groups,
# test hypothesis that multivariate skipped estimators
# are all equal.
#
# The level of the test is .05.
#
# Skipped estimator is used, i.e.,
# eliminate outliers using a projection method
# That is, determine center of data using:
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
# cop=4 MVE
#
# For each point
# consider the line between it and the center
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# Eliminate any outliers and compute means
#  using remaining data.
#
if(ncol(m1) != ncol(m2)){
stop("Number of variables in group 1 does not equal the number in group 2.")
}
if(is.na(SEED))set.seed(2)
#if(!is.na(SEED))set.seed(SEED)
m1<-elimna(m1)
m2<-elimna(m2)
n1<-nrow(m1)
n2<-nrow(m2)
n<-min(c(n1,n2))
crit.level<-.05
if(n<=120)crit.level<-.045
if(n<=80)crit.level<-.04
if(n<=60)crit.level<-.035
if(n<=40)crit.level<-.03
if(n<=30)crit.level<-.025
if(n<=20)crit.level<-.02
#data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot)
#data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot)
val<-matrix(NA,ncol=ncol(m1),nrow=nboot)
for(j in 1: nboot){
data1<-sample(n1,size=n1,replace=TRUE)
data2<-sample(n2,size=n2,replace=TRUE)
mm1<-m1[data1,]
temp<-outpro(mm1,plotit=FALSE,cop=cop,STAND=STAND)$keep
v1<-apply(mm1[temp,],2,mean)
mm2<-m2[data2,]
temp<-outpro(mm2,plotit=FALSE,cop=cop,STAND=STAND)$keep
v2<-apply(mm2[temp,],2,mean)
val[j,]<-v1-v2
}
temp<-pdis(rbind(val,nullv))
#print(temp)
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
if(ncol(m1)==2 && plotit){
print("plotting")
plot(val[,1],val[,2],xlab="VAR 1",ylab="VAR 2")
temp3<-smean(m1,cop=cop)-smean(m2,cop=cop)
points(temp3[1],temp3[2],pch="+")
ic<-round((1-crit.level)*nboot)
temp<-pdis(val)
temp.dis<-order(temp)
xx<-val[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(p.value=sig.level,crit.level=crit.level)
}
locreg<-function(x,y,pyhat=FALSE,pts=NA,np=100,plotit=TRUE,eout=FALSE,outfun=out,
xlab="X",ylab="Y"){
#
# Compute local weighted regression with Epanechnikov kernel
#
# See Fan, Annals of Statistics, 1993, 21, 196-217.
# cf. Bjerve and Doksum, Annals of Statistics, 1993, 21, 890-902
#
# With np=100, the function plots a smooth using
# middle 80% of the x values versus y
# With np=0, it plots using all x values
# or all values in pts if values are stored in it.
# With np=0, pts=x is used.
#
# pyhat=T, the function returns the estimated y values
# corresponding to x values in pts. If pts=NA, pts=x
# is assumed.
#
m<-elimna(cbind(x,y))
if(eout){
keep<-outfun(m,plotit=FALSE)$keep
m<-m[keep,]
}
x<-m[,1]
y<-m[,2]
n<-length(x)
sig<-sqrt(var(x))
temp<-idealf(x)
iqr<-(temp$qu-temp$ql)/1.34
A<-min(c(sig,iqr))
yhat<-NA
temp<-NA
if(is.na(pts[1])){
if(np>0)pts<-seq(min(x),max(x),length=np)
if(np==0)pts<-x
}
pts<-sort(pts)
for(i in 1:length(pts)){
yhat[i]<-NA
for(j in 1:length(x)){
temp[j]<-((x[j]-pts[i])/A)^2
}
epan<-ifelse(temp<1,.75*(1-temp),0)
chkit<-sum(epan!=0)
if(chkit > 1){
vals<-lsfit(x,y,wt=epan)$coef
yhat[i]<-vals[2]*pts[i]+vals[1]
}
}
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
if(np>0){
ilow<-round(.1*np)
iup<-round(.9*np)
}
if(np==0){
ilow<-1
iup<-length(pts)
}
lines(pts[ilow:iup],yhat[ilow:iup])
}
m<-"Done"
if(pyhat)m<-yhat
m
}

qreg.sub<-function(X,theta,qval=.5){
np<-ncol(X)
p<-np-1
x<-X[,1:p]
y<-X[,np]
temp<-t(t(x)*theta[2:np])
yhat<-apply(temp,1,sum)+theta[1]
res<-y-yhat
flag<-(res<=0)
rval<-(qval-flag)*res
val<-sum(rval)
val
}

rmmcppb<-function(x,y=NULL,alpha=.05,con=0,est=onestep,plotit=TRUE,dif=TRUE,grp=NA,nboot=NA,BA=FALSE,hoch=FALSE,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,...){
#
#   Use a percentile bootstrap method to  compare dependent groups.
#   By default,
#   compute a .95 confidence interval for all linear contrasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#   If con is not specified, all pairwise comparisons are done.
#
#   By default, an M-estimator is used and a sequentially rejective method
#   is used to control the probability of at least one Type I error.
#
#   dif=T indicates that difference scores are to be used
#   dif=F indicates that measure of location associated with
#   marginal distributions are used instead.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of contrasts there are.
#
#   x can be an n by J matrix or it can have list mode
#   for two groups, data for second group can be put in y
#   otherwise, assume x is a matrix (n by J) or has list mode.
#
#   A sequentially rejective method is used to control alpha.
#
#   Argument BA: When using dif=F, BA=T uses a correction term
#   when computing a p-value.
#
if(dif){
if(pr)print("dif=T, so analysis is done on difference scores")
temp<-rmmcppbd(x,y=y,alpha=.05,con=con,est,plotit=plotit,grp=grp,nboot=nboot,
hoch=TRUE,...)
output<-temp$output
con<-temp$con
}
if(!dif){
if(pr){
print("dif=F, so analysis is done on marginal distributions")
if(!BA)print("With M-estimator or MOM, suggest using BA=T and hoch=T")
}
if(!is.null(y[1]))x<-cbind(x,y)
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
}}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matl(x)
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
mat<-x
}
if(is.matrix(x))mat<-x
if(!is.na(sum(grp)))mat<-mat[,grp]
mat<-elimna(mat) # Remove rows with missing values.
x<-mat
J<-ncol(mat)
xcen<-x
for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j])
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
if(is.na(nboot)){
if(d<=4)nboot<-1000
if(d>4)nboot<-5000
}
n<-nrow(mat)
crit.vec<-alpha/c(1:d)
connum<-ncol(con)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xbars<-apply(mat,2,est)
psidat<-NA
for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars)
psihat<-matrix(0,connum,nboot)
psihatcen<-matrix(0,connum,nboot)
bvec<-matrix(NA,ncol=J,nrow=nboot)
bveccen<-matrix(NA,ncol=J,nrow=nboot)
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib,]<-apply(x[data[ib,],],2,est,...)
bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...)
}
#
# Now have an nboot by J matrix of bootstrap values.
#
test<-1
bias<-NA
for (ic in 1:connum){
psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic])
psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic])
bias[ic]<-sum((psihatcen[ic,]>0))/nboot-.5
ptemp<-(sum(psihat[ic,]>0)+.5*sum(psihat[ic,]==0))/nboot
#if(BA)test[ic]<-sum((psihat[ic,]>0))/nboot-.1*bias[ic]
if(BA)test[ic]<-ptemp-.1*bias[ic]
#if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot
if(!BA)test[ic]<-ptemp
test[ic]<-min(test[ic],1-test[ic])
test[ic]<-max(test[ic],0)
}
test<-2*test
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvecba<-dvec
dvec[2]<-alpha
}
if(hoch)dvec<-alpha/c(1:ncon)
dvec<-2*dvec
dvecba<-dvec
if(plotit && ncol(bvec)==2){
z<-c(0,0)
one<-c(1,1)
plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n")
points(bvec)
totv<-apply(x,2,est,...)
cmat<-var(bvec)
dis<-mahalanobis(bvec,totv,cmat)
temp.dis<-order(dis)
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
if(BA)zvec<-dvecba[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.sig","ci.lower","ci.upper"))
tmeans<-apply(mat,2,est,...)
psi<-1
output[temp2,4]<-zvec
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(psihat[ic,])
icl<-round(output[ic,4]*nboot/2)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}

linconb<-function(x,con=0,tr=.2,alpha=.05,nboot=599,pr=TRUE,SEED=TRUE){
#
#   Compute a 1-alpha confidence interval for a set of d linear contrasts
#   involving trimmed means using the bootstrap-t bootstrap method.
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode.  Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#
#   Missing values are automatically removed.
#
#   con is a J by d matrix containing the contrast coefficents of interest.
#   If unspecified, all pairwise comparisons are performed.
#   For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first two trimmed means is
#   equal to the sum of the second two, and (2) the difference between
#   the first two is equal to the difference between the trimmed means of
#   groups 5 and 6.
#
#   The default number of bootstrap samples is nboot=599
#
#   This function uses functions trimparts and trimpartt written for this
#   book.
#
#
#
#
if(is.data.frame(x))x=as.matrix(x)
if(pr){
print("Note: confidence intervals are adjusted to control FWE")
print("But p-values are not adjusted to control FWE")
}
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
J<-length(x)
for(j in 1:J){
xx<-x[[j]]
x[[j]]<-xx[!is.na(xx)] # Remove any missing values.
}
Jm<-J-1
d<-(J^2-J)/2
if(sum(con^2)==0){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
if(nrow(con)!=length(x))stop("The number of groups does not match the number of contrast coefficients.")
bvec<-array(0,c(J,2,nboot))
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
nsam=matl(lapply(x,length))
for(j in 1:J){
paste("Working on group ",j)
xcen<-x[[j]]-mean(x[[j]],tr)
data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row
#                     contains the bootstrap trimmed means, the second row
#                     contains the bootstrap squared standard errors.
}
m1<-bvec[,1,]  # J by nboot matrix containing the bootstrap trimmed means
m2<-bvec[,2,]  # J by nboot matrix containing the bootstrap sq. se.
boot<-matrix(0,ncol(con),nboot)
for (d in 1:ncol(con)){
top<-apply(m1,2,trimpartt,con[,d])
#            A vector of length nboot containing psi hat values
consq<-con[,d]^2
bot<-apply(m2,2,trimpartt,consq)
boot[d,]<-abs(top)/sqrt(bot)
}
testb<-apply(boot,2,max)
ic<-floor((1-alpha)*nboot)
testb<-sort(testb)
psihat<-matrix(0,ncol(con),4)
test<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
dimnames(test)<-list(NULL,c("con.num","test","se","p.value"))
for (d in 1:ncol(con)){
test[d,1]<-d
psihat[d,1]<-d
testit<-lincon(x,con[,d],tr,pr=FALSE)
test[d,2]<-testit$test[1,2]
pval<-mean((abs(testit$test[1,2])<boot[d,]))
test[d,4]<-pval
psihat[d,3]<-testit$psihat[1,2]-testb[ic]*testit$test[1,4]
psihat[d,4]<-testit$psihat[1,2]+testb[ic]*testit$test[1,4]
psihat[d,2]<-testit$psihat[1,2]
test[d,3]<-testit$test[1,4]
}
list(n=nsam,psihat=psihat,test=test,crit=testb[ic],con=con)
}

pdclose<-function(x,pts=x,fr=1,MM=FALSE,MC=FALSE,STAND=FALSE){
#
# For each point in pts, determine which points
# (values in x)
# are close to it based on projected distances.
#
x<-as.matrix(x)
pts<-as.matrix(pts)
if(ncol(x)>1){
if(STAND){
x=standm(x)
m1=apply(x,1,mean)
v=apply(x,1,sd)
for(j in 1:ncol(x))pts[,j]=(pts[,j]-m1[j])/v[j]
}}
outmat<-matrix(NA,ncol=nrow(x),nrow=nrow(pts))
for(i in 1:nrow(pts)){
center<-pts[i,]
if(!MC)blob<-pdis(x,center=center,MM=MM)
if(MC)blob<-pdisMC(x,center=center,MM=MM)
#
# Note: distances already divided by
# interquartile range
#
# Determine which points in m are close to pts
flag2<-(blob < fr)
outmat[i,]<-flag2
}
# Return matrix, ith row indicates which points
# in x are close to pts[i,]
#
outmat
}

adtestl<-function(x,y,est=tmean,nboot=100,alpha=.05,fr=NA,SEED=TRUE,...){
#
# Test the hypothesis that the regression model is additive.
# Use a variation of Stute et al. (1998, JASA, 93, 141-149).
# method, and running interval version of the backfitting
# algorithm
#
if(!is.matrix(x))stop("X values should be stored in a matrix")
if(ncol(x)==1)stop("There should be two or more predictors")
temp<-cbind(x,y)
p<-ncol(x)
p1<-p+1
temp<-elimna(temp)
x<-temp[,1:p]
x<-as.matrix(x)
y<-temp[,p1]
if(alpha<.05 && nboot<=100)warning("You used alpha<.05 and nboot<=100")
if(is.na(fr)){
fr<-.8
if(ncol(x)==2){
nval<-c(20,30,50,80,150)
fval<-c(0.40,0.36,0.18,0.15,0.09)
if(length(y)<=150)fr<-approx(nval,fval,length(y))$y
if(length(y)>150)fr<-.09
}
}
if(SEED)set.seed(2)
x<-as.matrix(x)
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
yhat<-adrunl(x,y,plotit=FALSE,fr=fr,pyhat=T)
regres<-y-yhat
print("Taking bootstrap sample, please wait.")
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-sqrt(12)*(data-.5) # standardize the random numbers.
rvalb<-apply(data,1,adtestls1,yhat,regres,mflag,x,fr)
# An n x nboot matrix of R values
rvalb<-rvalb/sqrt(length(y))
dstatb<-apply(abs(rvalb),2,max)
wstatb<-apply(rvalb^2,2,mean)
dstatb<-sort(dstatb)
wstatb<-sort(wstatb)
# compute test statistic
v<-c(rep(1,length(y)))
rval<-adtestls1(v,yhat,regres,mflag,x,fr)
rval<-rval/sqrt(length(y))
dstat<-max(abs(rval))
wstat<-mean(rval^2)
ib<-round(nboot*(1-alpha))
critd<-dstatb[ib]
critw<-wstatb[ib]
list(dstat=dstat,wstat=wstat,critd=critd,critw=critw)
}


adtestls1<-function(vstar,yhat,res,mflag,x,fr){
ystar<-yhat+res*vstar
bres<-adrunl(x,ystar,fr=fr,pyhat=TRUE,plotit=FALSE)
bres<-ystar-bres
rval<-0
for (i in 1:nrow(x)){
rval[i]<-sum(bres[mflag[,i]])
}
rval
}
adcom<-function(x,y,est=mean,tr=0,nboot=600,alpha=.05,fr=NA,
jv=NA,SEED=TRUE,...){
#
# Test the hypothesis that component
# jv
# is zero. That is, in a generalized additive model, test
# H_0: f_jv(X_jv) = 0.
# Use a variation of Stute et al. (1998, JASA, 93, 141-149).
# method, and running interval version of the backfitting
# algorithm
#
# if jv=NA, all components are tested.
#
# Current version allows only 0 or 20% trimming
#
x=as.matrix(x)
if(!is.matrix(x))stop("X values should be stored in a matrix")
if(ncol(x)==1)stop("There should be two or more predictors")
temp<-cbind(x,y)
p<-ncol(x)
p1<-p+1
temp<-elimna(temp)
x<-temp[,1:p]
x<-as.matrix(x)
y<-temp[,p1]
if(is.na(fr)){
if(tr==.2){
nval<-c(20,40,60,80,120,160)
fval<-c(1.2,1,.85,.75,.65,.65)
if(length(y)<=160)fr<-approx(nval,fval,length(y))$y
if(length(y)>160)fr<-.65
}
if(tr==0){
nval<-c(20,40,60,80,120,160)
fval<-c(.8,.7,.55,.5,.5,.5)
if(length(y)<=160)fr<-approx(nval,fval,length(y))$y
if(length(y)>160)fr<-.6
}
}
if(is.na(fr))stop("Span can be deteremined only for 0 or .2 trimming")
if(SEED)set.seed(2)
x<-as.matrix(x)
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
if(!is.na(jv))prval<-jv
if(is.na(jv))prval<-c(1:ncol(x))
c.sum<-matrix(NA,nrow=length(prval),ncol=2)
dimnames(c.sum)<-list(NULL,c("d.stat","p.value"))
for(ip in 1:length(prval)){
flag<-rep(T,ncol(x))
flag[prval[ip]]<-F
yhat<-adrun(x[,flag],y,plotit=FALSE,fr=fr,pyhat=T)
regres<-y-yhat
temp<-indt(x[,!flag],regres)
c.sum[ip,1]<-temp$dstat
c.sum[ip,2]<-temp$p.value.d
}
list(results=c.sum)
}

logadr<-function(x,y,est=mean,iter=10,pyhat=FALSE,plotit=TRUE,fr=.8,xout=FALSE,eout=xout,
outfun=out,theta=50,phi=25,expand=.5,STAND=TRUE,ticktype="simple",scale=FALSE,...){
#
# additive model based on a variation of Copas' (1983) smooth
# for binary outcomes.
# (Use backfitting algorithm.)
#
m<-elimna(cbind(x,y))
x<-as.matrix(x)
p<-ncol(x)
p1<-p+1
y<-m[,p1]
x<-m[,1:p]
x<-as.matrix(x)
if(STAND){
for (ip in 1:p)x[,ip]<-(x[,ip]-mean(x[,ip]))/sqrt(var(x[,ip]))
}
if(xout){
keepit<-rep(T,nrow(x))
flag<-outfun(x,plotit=FALSE)$out.id
keepit[flag]<-F
x<-x[keepit,]
y<-y[keepit]
}
x<-as.matrix(x)
#if(p==1)val<-rungen(x[,1],y,est=est,pyhat=T,plotit=plotit,fr=fr,...)$output
if(p==1)val<-logrsm(x[,1],y,pyhat=T,plotit=plotit,fr=fr,...)$output
if(p>1){
np<-p+1
x<-m[,1:p]
y<-m[,np]
fhat<-matrix(NA,ncol=p,nrow=length(y))
fhat.old<-matrix(NA,ncol=p,nrow=length(y))
res<-matrix(NA,ncol=np,nrow=length(y))
dif<-1
for(i in 1:p)
fhat.old[,i]<-logrsm(x[,i],y,pyhat=TRUE,plotit=FALSE,fr=fr)$output
eval<-NA
for(it in 1:iter){
for(ip in 1:p){
res[,ip]<-y
for(ip2 in 1:p){
if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2]
}
fhat[,ip]=logrsm(x[,ip],y,pyhat=TRUE,plotit=FALSE,fr=fr)$output
}
eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2))))
if(it > 1){
itm<-it-1
dif<-abs(eval[it]-eval[itm])
}
fhat.old<-fhat
if(dif<.01)break
}
#print(fhat)
val<-apply(fhat,1,sum)
aval<-est(y-val,...)
val<-val+aval
flag=(val<0)
val[flag]=0
flag=(val>1)
val[flag]=1
if(plotit && p==2){
fitr<-val
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fitr<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fitr,theta=theta,phi=phi,expand=expand,xlab="x1",ylab="x2",zlab="",
scale=scale,ticktype=ticktype)
}}
if(!pyhat)val<-"Done"
val
}

qhomtsub<-function(isub,x,y,qval){
#
#  Perform quantile regression using x[isub] to predict y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  regfun is some regression method already stored in R
#  It is assumed that regfun$coef contains the  intercept and slope
#  estimates produced by regfun.  The regression methods written for
#  this  book, plus regression functions in R, have this property.
#
#  x is assumed to be a matrix containing values of the predictors.
#
xmat<-matrix(x[isub,],nrow(x),ncol(x))
temp<-qplotreg(xmat,y[isub],qval=qval,plotit=FALSE)
regboot<-temp[1,2]-temp[2,2]
regboot
}

qplotreg<-function(x, y,qval=c(.2,.8),q=NULL,plotit=TRUE,xlab="X",ylab="Y",xout=FALSE,outfun=out,...){
#
# Compute the quantile regression line for each of the
# quantiles indicated by qval.
# plotit=TRUE, plot the results.
#	
if(!is.null(q))qval=q
xy=elimna(cbind(x,y))
if(ncol(xy)>2)stop("Only One Predictor Allowed")
x=xy[,1]
y=xy[,2]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
n<-length(qval)
coef<-matrix(NA,ncol=2,nrow=n)
x<-as.matrix(x)
if(ncol(x)>1)stop("This version allows one predictor only.")
if(plotit)plot(x,y,xlab=xlab,ylab=ylab)
for(it in 1:n){
coef[it,]<-qreg(x,y,qval=qval[it],pr=FALSE)$coef
dimnames(coef)=list(NULL,c("Inter.","Slope"))
if(plotit)abline(coef[it,1],coef[it,2])
}
coef
}


ancmpbpb<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,pts=NA,est=tmean,nboot=NA,
bhop=FALSE,SEED=TRUE,...){
print("This function has been eliminated. Please use ancmppb instead.")
}


qsm<-function(x,y,qval=c(.2,.5,.8),fr=.8,plotit=TRUE,scat=TRUE,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,op=TRUE,LP=TRUE){
#
# running  interval smoother for the quantiles stored in
# qval
#
# fr controls amount of smoothing
# op=T, use Harrell-Davis estimator
# op=F, use single order statistic
#
#  LP=TRUE: The initial smooth is smoothed again using LOESS
#
plotit<-as.logical(plotit)
scat<-as.logical(scat)
m<-cbind(x,y)
if(ncol(m)!=2)stop("Must have exactly one predictor. For more than one, use qhdsm.")
m<-elimna(m)
if(eout && xout)stop("Not allowed to have eout=xout=T")
if(eout){
flag<-outfun(m,plotit=FALSE)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(x)$keep
m<-m[flag,]
}
x<-m[,1]
y<-m[,2]
rmd<-c(1:length(x))
if(pyhat)outval<-matrix(NA,ncol=length(qval),nrow=length(x))
if(scat)plot(x,y)
for(it in 1:length(qval)){
if(!op)for(i in 1:length(x))rmd[i]<-qest(y[near(x,x[i],fr)],q=qval[it])
if(op)for(i in 1:length(x))rmd[i]<-hd(y[near(x,x[i],fr)],q=qval[it])
if(pyhat)outval[,it]<-rmd
if(!scat)plot(x,y,type="n")
points(x,rmd,type="n")
sx<-sort(x)
xorder<-order(x)
sysm<-rmd[xorder]
if(LP)sysm=lplot(sx,sysm,pyhat=TRUE,plotit=FALSE)$yhat.values
lines(sx,sysm)
}
if(pyhat)output<-outval
if(!pyhat)output<-"Done"
list(output=output)
}
locvar<-function(x,y,pyhat=FALSE,pts=x,plotit=TRUE){
#
# For each x, estimate VAR(y|x)
# with the method used by Bjerve and Doksum
# i.e., use Fan's kernel regression method.
#
yhat<-locreg(x,y,pyhat=TRUE,plotit=FALSE,pts=x)
val<-locreg(x,(y-yhat)^2,pyhat=pyhat,pts=pts,plotit=plotit)
val
}

smmval<-function(dfvec,iter=10000,alpha=.05,SEED=TRUE){
if(SEED)set.seed(1)
dfv<-length(dfvec)/sum(1/dfvec)
vals<-NA
tvals<-NA
J<-length(dfvec)
for(i in 1:iter){
for(j in 1:J){
tvals[j]<-rt(1,dfvec[j])
}
vals[i]<-max(abs(tvals))
}
vals<-sort(vals)
ival<-round((1-alpha)*iter)
qval<-vals[ival]
qval
}


bwmedimcp<-function(J,K,x,JK=J*K,grp=c(1:JK),alpha=.05){
#
# Multiple comparisons for interactions
# in a split-plot design.
# The analysis is done by taking difference scores
# among all pairs of dependent groups and
# determining which of
# these differences differ across levels of Factor A
# using trimmed means.
#
# For MOM or M-estimators, use spmcpi which uses a bootstrap method
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}

JK<-J*K
if(JK!=length(x))stop("Something is wrong. Expected ",JK," groups but x contains ", length(x), "groups instead.")
MJ<-(J^2-J)/2
MK<-(K^2-K)/2
JMK<-J*MK
MJMK<-MJ*MK
Jm<-J-1
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
output<-matrix(0,MJMK,7)
dimnames(output)<-list(NULL,c("A","A","B","B","psihat","sig","crit.sig"))
jp<-1-K
kv<-0
kv2<-0
test<-NA
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}}
m<-matrix(c(1:JK),J,K,byrow=T)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j<jj){
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
output[ic,1]<-j
output[ic,2]<-jj
output[ic,3]<-k
output[ic,4]<-kk
x1<-x[[m[j,k]]]-x[[m[j,kk]]]
x2<-x[[m[jj,k]]]-x[[m[jj,kk]]]
temp<-qdtest(x1,x2)
output[ic,5]<-median(x1)-median(x2)
test[ic]<-temp$p.value
output[ic,6]<-test[ic]
}}}}}}
ncon<-length(test)
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
for (ic in 1:ncol(con)){
output[temp2,7]<-zvec
}
output
}




bwmedbmcp<-function(J,K,x,JK=J*K,grp=c(1:JK),con=0,alpha=.05,dif=FALSE,pool=FALSE,bop=FALSE,nboot=100,SEED=TRUE){
#
# All pairwise comparisons among levels of Factor B
# in a split-plot design using trimmed means.
#
# Data are pooled for each level
# of Factor B.
# bop=T, use bootstrap estimates of standard errors.
# FWE controlled with Rom's method
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}
JK<-J*K
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
if(pool){
data<-list()
m1<-matrix(c(1:JK),J,K,byrow=T)
for(k in 1:K){
for(j in 1:J){
flag<-m1[j,k]
if(j==1)temp<-x[[flag]]
if(j>1){
temp<-c(temp,x[[flag]])
}}
data[[k]]<-temp
}
print("Group numbers refer to levels of Factor B")
if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop)
if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha)
return(temp)
}
if(!pool){
mat<-matrix(c(1:JK),ncol=K,byrow=T)
for(j in 1:J){
data<-list()
ic<-0
for(k in 1:K){
ic<-ic+1
data[[ic]]<-x[[mat[j,k]]]
}
print(paste("For level ", j, " of Factor A:"))
if(!dif)temp<-lincdm(data,con=con,alpha=alpha,nboot=nboot,mop=bop)
if(dif)temp<-qdmcpdif(data,con=con,alpha=alpha)
print(temp$test)
print(temp$psihat)
}}
}

gamplot<-function(x,y,sop=TRUE,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE,
xlab="X",ylab="",zlab="",theta=50,phi=25,expand=.5,scale=FALSE,ticktype="simple"){
#
# Plot regression surface using generalized additive model
#
# sop=F, use usual linear model y~x1+x2...
# sop=T, use splines
#
library(akima)
library(mgcv)
x<-as.matrix(x)
np<-ncol(x)
np1<-np+1
if(ncol(x)>4)stop("x should have at most four columns of data")
m<-elimna(cbind(x,y))
if(xout && eout)stop("Can't have xout=eout=T")
if(eout){
flag<-outfun(m)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
}
x<-m[,1:np]
x<-as.matrix(x)
y<-m[,np1]
if(!sop){
if(ncol(x)==1)fitr<-fitted(gam(y~x[,1]))
if(ncol(x)==2)fitr<-fitted(gam(y~x[,1]+x[,2]))
if(ncol(x)==3)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]))
if(ncol(x)==4)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]+x[,4]))
}
if(sop){
if(ncol(x)==1)fitr<-fitted(gam(y~s(x[,1])))
if(ncol(x)==2)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])))
if(ncol(x)==3)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])))
if(ncol(x)==4)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])+s(x[,4])))
}
last<-fitr
if(plotit){
if(ncol(x)==1){
plot(x,fitr,xlab=xlab,ylab=ylab)
}
if(ncol(x)==2){
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fitr<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fitr,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab,
scale=scale,ticktype=ticktype)
}
}
if(!pyhat)last <- "Done"
last
}

rgvar<-function(x,est=covmcd,...){
#
# compute a robust generalized variance
#
# choices for est are:
#  var
#  covmcd
#  covmve
#  skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method)
#               op=2 (projection method for outliers)
#  covroc  (S+ only as of Dec, 2005)
#  Rocke's measure of scatter, this requires that the command
#          library(robust) has been executed.
#
library(MASS)
val<-prod(eigen(est(x,...))$values)
val
}
rgvarseb<-function(x,nboot=100,est=skipcov,SEED=TRUE,...){
#
n<-nrow(x)
val<-NA
for(i in 1:nboot){
data<-sample(n,n,replace=TRUE)
val[i]<-rgvar(x[data,],est=est,...)
}
se<-sqrt(var(val))
se
}
covmve<-function(x){
library(MASS)
val<-cov.mve(x)
list(center=val$center,cov=val$cov)
}

mvecov<-function(x){
library(MASS)
val<-cov.mve(x)
val$cov
}


rgvar2g<-function(x,y,nboot=100,est=covmcd,alpha=.05,cop=3,op=2,SEED=TRUE,...){
#
# Two independent groups.
# Test hypothesis of equal generalized variances.
#
#  Choices for est include:
#  var
#  covmcd
#  covmve
#  skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method)
#               op=2 (projection method for outliers)
#  covroc  Rocke's measure of scatter, this requires that the command
#          library(robust) has been executed.
#
if(SEED)set.seed(2)
se1<-rgvarseb(x,nboot=nboot,est=est,SEED=SEED,...)
se2<-rgvarseb(y,nboot=nboot,est=est,SEED=SEED,...)
dif<-rgvar(x,est=est,...)-rgvar(y,est=est,...)
test.stat<-dif/sqrt(se1^2+se2^2)
test.stat
}

covmcd<-function(x,nsamp="sample"){
#
# nsamp="best" is the default used by R,
# meaning that  the number of samples is chosen so that
# exhaustive enumeration is done up to 5000 samples
# nsamp="sample" the number of samples
#  is min(5*p, 3000)
#
library(MASS)
val<-cov.mcd(x,nsamp=nsamp)
list(center=val$center,cov=val$cov)
}


mcdcov<-function(x,nsamp="sample"){
#
# nsamp="best" is the default used by R,
# meaning that  the number of samples is chosen so that
# exhaustive enumeration is done up to 5000 samples
# nsamp="sample" the number of samples
#  is min(5*p, 3000)
#
#library(lqs)
library(MASS)
val<-cov.mcd(x,nsamp=nsamp)
val$cov
}

ancdes<-function(x,depfun=fdepth,DH=FALSE,FRAC=.5,...){
#
#  Choose points for design of an ANCOVA
#  x is the n by p matrix m.
#
#   DH=T means return the half of the data having
#   the deepest  points
#
#   DH=F, return deepest point and those points on the
#   .5 depth contour
#
#    FRAC the fraction of the least deep points that will not be returned.
#  That is, return 1-FRAC deepest points.
#
if(is.data.frame(x))x=as.matrix(x)
if(!is.matrix(x))stop("x must be a matrix or a data frame")
temp<-depfun(x,plotit=FALSE,...)
temp2<-order(temp)
if(!DH){
val<-matrix(x[temp2[length(temp)],],ncol=ncol(x))
nmid<-round(length(temp)/2)
id2<-(temp[temp2[nmid]]==temp)
val2<-matrix(x[id2,],ncol=ncol(x))
if(!is.matrix(val2))val2<-t(as.matrix(val2))
val<-rbind(val,val2)
}
if(DH){
bot=round(length(temp)*FRAC)
val=matrix(x[temp2[bot:length(temp)],],ncol=ncol(x))
}
val=elimna(val)
val
}


stacklist<-function(x){
#
# Assumes x has list mode with each entry a
# matrix having p columns.
#
# Goal: stack the data into a matrix having p columns.
#
p<-ncol(x[[1]])
xx<-as.matrix(x[[1]])
for(j in 2:length(x)){
temp<-as.matrix(x[[j]])
xx<-rbind(xx,temp)
}
xx
}

smvar<-function(x,y,fr=.6,xout=TRUE,eout=FALSE,xlab="X",ylab="VAR(Y|X)",pyhat=FALSE,plotit=TRUE,nboot=40,
RNA=FALSE,SEED=TRUE){
#
# Estimate VAR(Y|X) using bagged version of running interval method
#
# xout=T eliminates all points for which x is an outlier.
# eout=F eliminates all points for which (x,y) is an outlier.
#
# pyhat=T will return estimate for each x.
#
# RNA=T removes missing values when applying smooth
# with RNA=F, might get NA for some pyhat values.
#
# plotit=TRUE, scatterplot of points x versus square of
# predicted y minus y
# stemming from a smooth. Then plots a line indicating
# var(y|x) using bagged smooth
#
temp <- cbind(x, y)
temp <- elimna(temp)
x <- temp[, 1]
y <- temp[, 2]
yhat<-lplot(x, y, pyhat = TRUE, plotit = FALSE)$yhat.values
yvar<-(y-yhat)^2
estvar<-runmbo(x,y,est=var,pyhat=TRUE,fr=fr,plotit=FALSE,RNA=RNA,nboot=nboot)
if(plotit){
plot(c(x,x),c(yvar,estvar),type="n",xlab=xlab,ylab=ylab)
points(x,yvar)
sx<-sort(x)
xorder<-order(x)
sysm<-estvar[xorder]
lines(sx,sysm)
}
output <- "Done"
if(pyhat)output <- estvar
output
}
locvarsm<-function(x,y,pyhat=FALSE,pts=x,plotit=TRUE,nboot=40,RNA=TRUE,xlab="X",
ylab="VAR(Y|X)",op=2,xout=T,eout=FALSE,pr=TRUE,fr=.6,scat=TRUE,outfun=out,SEED=TRUE){
#
# For each x, estimate VAR(y|x) using bootstrap bagging.
# with
# op=1 uses Fan's kernel method plus bootstrap bagging.
# op=2 uses running interval smoother plus bootstrap bagging
#
# xout=T eliminates points where there are outliers among x values
#        this option applies only when using op=2 and when using
#        running interval smoother.
# eout=T eliminates outliers among cloud of all data.
#
if(SEED)set.seed(2)
temp<-cbind(x,y)
temp<-elimna(temp)
x<-temp[,1]
y<-temp[,2]
if(op==2){
if(pr){
print("Running interval method plus bagging has been chosen")
print("op=1 will use Fan's method plus bagging")
}}
if(op==1){
if(pr){
print("Fan's method plus bagging has been chosen (cf. Bjerve and Doksum)")
print("op=2 will use running interval plus bagging")
}
mat <- matrix(NA, nrow = nboot, ncol = nrow(temp))
for(it in 1:nboot) {
idat <- sample(c(1:length(y)), replace = T)
xx <- temp[idat, 1]
yy <- temp[idat, 2]
mat[it,  ] <- locvar(xx,yy,pts=x,pyhat=TRUE,plotit=FALSE)
}
rmd<-apply(mat,2,mean)
 if(plotit) {
plot(c(x, x), c(y, rmd), type = "n", xlab = xlab, ylab= ylab)
sx <- sort(x)
xorder <- order(x)
sysm <- rmd[xorder]
lines(sx, sysm)
}

output<-"Done"
if(pyhat)output <- rmd
}
if(op==2){
output<-runmbo(x,y,fr=fr,est=var,xlab=xlab,ylab=ylab,pyhat=pyhat,eout=eout,
xout=xout,RNA=RNA,plotit=plotit,scat=scat,nboot=nboot,outfun=outfun,SEED=SEED)
}
output
}

mcp2atm<-function(J,K,x,tr=.2,alpha=.05,grp=NA,op=F){
#
#  Test all linear contrasts associated with
# main effects for Factor A and B and all interactions based on trimmed means
# By default,
# tr=.2, meaning 20% trimming is used.
#
        #   The data are assumed to be stored in x in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
        JK <- J * K
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                x<-list()
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JK) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)] # Remove missing values
        }
        #

        if(JK != length(x))
                warning("The number of groups does not match the number of contrast coefficients.")
for(j in 1:JK){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
        # Create the three contrast matrices
temp<-con2way(J,K)
conA<-temp$conA
conB<-temp$conB
conAB<-temp$conAB
if(!op){
Factor.A<-lincon(x,con=conA,tr=tr,alpha=alpha)
Factor.B<-lincon(x,con=conB,tr=tr,alpha=alpha)
Factor.AB<-lincon(x,con=conAB,tr=tr,alpha=alpha)
}
All.Tests<-NA
if(op){
Factor.A<-NA
Factor.B<-NA
Factor.AB<-NA
con<-cbind(conA,conB,conAB)
All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha)
}
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,All.Tests=All.Tests,conA=conA,conB=conB,conAB=conAB)
}

mdifloc<-function(x,y,est=tukmed,...){
#
# Compute multivariate measure of location associated
# with the distribution of x-y
#
# By default, use Tukey's median.
#
x<-as.matrix(x)
y<-as.matrix(y)
FLAG<-F
if(ncol(x)!=ncol(y))stop("x and y should have the same number of columns")
if(ncol(x)==1 && ncol(y)==1)FLAG<-T
if(FLAG)val<-loc2dif(x,y,est=est,...)
if(!FLAG){
J<-(ncol(x)^2-ncol(x))/2
mat<-matrix(NA,ncol=ncol(x),nrow=nrow(x)*nrow(y))
for(j in 1:ncol(x))mat[,j]<-as.vector(outer(x[,j], y[,j], FUN = "-"))
val<-est(mat,...)
}
val
}
mdiflcr<-function(m1,m2,tr=.5,nullv=rep(0,ncol(m1)),plotit=TRUE,
SEED=TRUE,pop=1,fr=.8,nboot=600){
#
# For two independent groups, let D=X-Y.
# Let theta_D be median of marginal distributions
# Goal: Test theta_D=0
#
# This is a multivariate analog of Wilcoxon-Mann-Whitney method
# Only alpha=.05 can be used.
#
# When plotting:
# pop=1 Use scatterplot
# pop=2 Use expected frequency curve.
# pop=3 Use adaptive kernel density
#
if(!is.matrix(m1))stop("m1 is not a matrix")
if(!is.matrix(m2))stop("m2 is not a matrix")
if(ncol(m1)!=ncol(m2))stop("number of columns for m1 and m2 are not equal")
n1<-nrow(m1)
n2<-nrow(m2)
if(SEED)set.seed(2)
data1 <- matrix(sample(n1, size = n1 * nboot, replace = T), nrow = nboot)
data2 <- matrix(sample(n2, size = n2 * nboot, replace = T), nrow = nboot)
bcon <- matrix(NA, ncol = ncol(m1), nrow = nboot)
for(j in 1:nboot)bcon[j,]<-mdifloc(m1[data1[j,],],m2[data2[j,],],est=lloc,tr=tr)
tvec<-mdifloc(m1,m2,est=lloc,tr=tr)
tempcen <- apply(bcon, 1, mean)
smat <- var(bcon - tempcen + tvec)
temp <- bcon - tempcen + tvec
bcon <- rbind(bcon, nullv)
dv <- mahalanobis(bcon, tvec, smat)
bplus <- nboot + 1
sig.level <- 1 - sum(dv[bplus] >= dv[1:nboot])/nboot
if(plotit && ncol(m1)==2){
if(pop==2)rdplot(mdif,fr=fr)
if(pop==1){
plot(mdif[,1],mdif[,2],xlab="VAR 1",ylab="VAR 2",type="n")
points(mdif[,1],mdif[,2],pch=".")
points(center[1],center[2],pch="o")
points(0,0,pch="+")
}
if(pop==3)akerdmul(mdif,fr=fr)
}
list(p.value=sig.level,center=tvec)
}

mwmw<-function(m1,m2,cop=5,pr=TRUE,plotit=TRUE,pop=1,fr=.8,op=1,dop=1){
#
# Compute measure of effect size, p,
# a multivariate analog of Wilcoxon-Mann-Whitney p
#
# When plotting:
# pop=1 Use scatterplot
# pop=2 Use expected frequency curve.
# pop=3 Use adaptive kernel density
#
# dop=1, use method A1 approximation of halfspace depth
# dop=2, use method A2 approximation of halfspace depth
#
# cop determines how center of data is determined when
# approximating halfspace depth
# cop=1, Halfspace medina
# cop=2, MCD
# cop=3, marginal medians
# cop=4, MVE
# cop=5, skipped mean
#
library(akima)
if(is.null(dim(m1)))stop("m1 is not a matrix or data frame")
if(is.null(dim(m2)))stop("m2 is not a matrix or data frame")
if(ncol(m1)!=ncol(m2))stop("number of columns for m1 and m2 are not equal")
if(ncol(m1)==1)stop("Use R function cid or bmp")
nn<-min(c(nrow(m1),nrow(m2)))
mdif<-matrix(as.vector(outer(m1[,1],m2[,1],"-")),ncol=1)
for(j in 2:ncol(m1)){
mdif<-cbind(mdif,matrix(as.vector(outer(m1[,j],m2[,j],"-")),ncol=1))
}
if(op==1){
if(ncol(m1)==2)temp2<-depth2(rbind(mdif,c(rep(0,ncol(m1)))))
#if(ncol(m1)==3)temp2<-depth3(rbind(mdif,c(rep(0,ncol(m1)))))
if(ncol(m1)>2){
if(cop==1)center<-dmean(mdif,tr=.5,dop=dop)
if(cop==2)center<-cov.mcd(mdif)$center
if(cop==3)center<-apply(mdif,2,median)
if(cop==4)center<-cov.mve(mdif)$center
if(cop==5)center<-smean(mdif)
temp2<-fdepth(rbind(mdif,c(rep(0,ncol(m1)))))
}}
if(op==2){
temp2<-pdis(rbind(mdif,c(rep(0,ncol(m1)))))
temp2<-1/(temp2+1)
}
center<-dmean(mdif,tr=.5,dop=dop)
phat<-temp2[nrow(mdif)+1]/max(temp2)
# phat is relative depth of zero vector
# Determine critical value
crit<-NA
alpha<-c(.1,.05,.025,.01)
crit[1]<-1-1.6338/sqrt(nn)
crit[2]<-1-1.8556/sqrt(nn)
crit[3]<-1-2.0215/sqrt(nn)
crit[4]<-1-2.1668/sqrt(nn)
if(pr){
print("For alpha=.1,.05,.025,.01, the correspoding critical values are")
print(crit)
print("Reject if phat is less than or equal to the critical value")
}
if(plotit && ncol(m1)==2){
if(pop==2)rdplot(mdif,fr=fr)
if(pop==1){
plot(mdif[,1],mdif[,2],xlab="VAR 1",ylab="VAR 2",type="n")
points(mdif[,1],mdif[,2],pch=".")
points(center[1],center[2],pch="o")
points(0,0,pch="+")
}
if(pop==3)akerdmul(mdif,fr=fr)
}
list(phat=phat,center=center,crit.val=crit)
}

qreg<-function(x, y,qval=.5,q=NULL,op=1,v2=TRUE,pr=FALSE,xout=FALSE,outfun=outpro,plotit=FALSE,xlab="X",ylab="Y",...)
{
#
# Compute the quantile regression line. That is, the goal is to
# determine the qth (qval) quantile of Y given X using the
#  the Koenker-Bassett approach.
#
#  v2=T, uses the function rq in the R library quantreg
#  v2=F, uses an older and slower version
#	
if(!is.null(q))qval=q
x<-as.matrix(x)
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
p<-np-1
x<-X[,1:p]
x<-as.matrix(x)
y<-X[,np]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(!v2){
temp<-ltareg(x,y,0,op=op)
if(qval==.5){
coef<-temp$coef
res<-temp$res
}
if(qval!=.5){
START<-temp$coef
coef<-nelderv2(X,np,FN=qreg.sub,START=START,qval=qval)
}}
if(v2){
if(pr){
print("v2=T attempts to use a faster version by calling")
print("the function rq, which is stored in the library quantreg,")
print("which can be downloaded from")
print("http://cran.r-project.org/src/contrib/PACKAGES.html")
print("On a PC, store quantreg in the library subdirectory of R")
print("On a unix machine, try the command install.packages('quantreg')")
print("To avoid this message, use pr=FALSE")
print(" ")
}
library(quantreg)
x<-as.matrix(x)
temp<-rq(y~x,tau=qval)
coef<-temp[1]$coefficients
}
if(ncol(x)==1){
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
abline(coef)
}}
res <- y - x%*%coef[2:np] - coef[1]
list(coef = coef, residuals = res)
}



qindbt.sub<-function(isub,x,y,qval){
#
#  Perform regression using x[isub] to predict y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  regfun is some regression method already stored in R
#  It is assumed that regfun$coef contains the  intercept and slope
#  estimates produced by regfun.  The regression methods written for
#  this  book, plus regression functions in R, have this property.
#
#  x is assumed to be a matrix containing values of the predictors.
#
xmat<-matrix(x[isub,],nrow(x),ncol(x))
regboot<-NA
for(i in 1:length(qval)){
regboot[i]<-qreg(xmat,y[isub],qval[i])$coef[2]
}
regboot
}





runmq<-function(x,y,HD=FALSE,qval=c(.2,.5,.8),xlab="X",ylab="Y",fr=1,
sm=FALSE,nboot=40,SEED=TRUE,eout=FALSE,xout=FALSE,...){
#
# Plot of running interval smoother based on specified quantiles in
# qval
#
# fr controls amount of smoothing
# tr is the amount of trimming
#
# Missing values are automatically removed.
#
rmd1<-NA
xx<-cbind(x,y)
p<-ncol(xx)-1
xx<-elimna(xx)
x<-xx[,1:p]
y<-xx[,ncol(xx)]
plot(x,y,xlab=xlab,ylab=ylab)
sx1<-sort(x)
xorder1<-order(x)
for(it in 1:length(qval)){
if(!sm){
if(!HD)temp<-rungen(x,y,est=qest,fr=fr,pyhat=TRUE,plotit=FALSE,q=qval[it])
if(HD)temp<-rungen(x,y,est=hd,fr=fr,pyhat=TRUE,plotit=FALSE,q=qval[it])
rmd1<-temp[1]$output
sysm1<-rmd1[xorder1]
lines(sx1,sysm1)
}
if(sm){
if(!HD)temp<-runmbo(x,y,est=qest,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED,
nboot=nboot,eout=FALSE,xout=FALSE,q=qval[it])
if(HD)temp<-runmbo(x,y,est=hd,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED,
nboot=nboot,eout=FALSE,xout=FALSE,q=qval[it])
rmd1<-temp
sysm1<-rmd1[xorder1]
lines(sx1,sysm1)
}
}}


ritest<-function(x,y,adfun=adrun,plotfun=lplot,eout=FALSE,xout=TRUE,plotit=TRUE,flag=3,
nboot=500,alpha=.05,tr=.2,...){
#
# There are two methods for testing for regression interactions
# using robust smooths.
# The first, performed by this function, fits an additive model
# and test the hypothesis that the residuals, given x, is a
# horizontal plane.
#
# The second, which is done by function adtest, tests the hypothesis
# that a generalized additive model fits the data.
#
# Plot used to investigate regression interaction
# (the extent a generalized additive model does not fit data).
# Compute additive fit, plot residuals
# versus x, an n by 2 matrix.
#
if(!is.matrix(x))stop(" x must be a matrix")
if(ncol(x)!=2)stop(" x must have two columns only")
yhat<-adfun(x,y,pyhat=TRUE,eout=eout,xout=xout,plotit=FALSE)
res<-y-yhat
output<-indt(x,res,flag=flag,nboot=nboot,alpha=alpha,tr=tr)
if(plotit)plotfun(x,y-yhat,eout=eout,xout=xout,expand = 0.5,scale=FALSE,xlab="X",
ylab="Y",zlab="",theta=50,phi=25,...)
output
}

gvar2g<-function(x,y,nboot=100,DF=TRUE,eop=1,est=skipcov,
alpha=.05,cop=3,op=1,MM=FALSE,SEED=TRUE,pr=FALSE,fast=FALSE,...){
#
# Two independent groups.
# Test hypothesis of equal generalized variances.
#
# DF=T, means skipcov with MM=F is used.
#
# That is, W-estimator based on a projection outlier detection method
# and Carling's method applied to projections.
# if equal sample sizes, adjusted critical value is used where appopriate
#
# DF=F
# no adjusted critical value is used and any robust measure of
# scatter can be used.
#
#  Choices for est include:
#  var
#  covmcd
#  covmve
#  skipcov with MM=F (boxplot) MM=T (MAD-MEDIAN), op=1 (MGV method)
#               op=2 (projection method for outliers)
#  covroc  Rocke's measure of scatter,
#
#   op, cop and eop, see skipcov
#   adjusted critical level should be used with
#   skipcov and alpha=.05 only.
#   fast=T, will use skipcov.for if it is available.
#
#    Function returns ratio of first estimate divided by second estimate
#
if(SEED)set.seed(2)
#if(!is.matrix(x))stop("x should be a matrix with ncol>1")
if(is.null(dim(x)))stop("x should be a matrix or data frame with ncol>1")
if(is.null(dim(y)))stop("y should be a matrix or data frame with ncol>1")
#if(!is.matrix(y))stop("y should be a matrix with ncol>1")
if(ncol(x)==1 || ncol(y)==1)stop("Only multivariate data are allowed")
n1<-nrow(x)
n2<-nrow(y)
adalpha<-NA
if(DF){
if(n1==n2 && alpha==.05){
p1<-ncol(x)
if(p1==2){
if(n1>=20)adalpha<-1.36/n1+.05
}
if(p1==3){
if(n1>=20)adalpha<-1.44/n+.05
}
if(p1==4){
if(n1>=40)adalpha<-2.47/n1+.05
}
if(p1==5){
if(n1>=40)adalpha<-3.43/n+.05
}
if(p1==6){
if(n1>=60)adalpha<-4.01/n1+.05
}}}
val<-NA
for(j in 1:nboot) {
                data1 <- sample(n1, size = n1, replace = T)
                data2 <- sample(n2, size = n2, replace = T)
if(!DF){
val[j]<-rgvar(as.matrix(x[data1,]),est=est,...)-
rgvar(as.matrix(y[data2,]),est=est,...)
}
if(DF){val[j]<-
if(!fast){
rgvar(as.matrix(x[data1,]),est=skipcov,op=op,outpro.cop=cop,MM=MM,...)-
rgvar(as.matrix(y[data2,]),est=skipcov,op=op,outpro.cop=cop,MM=MM,...)
}
if(fast){
rgvar(as.matrix(x[data1,]),est=skipcov.for,op=op,outpro.cop=cop,MM=MM,...)-
rgvar(as.matrix(y[data2,]),est=skipcov.for,op=op,outpro.cop=cop,MM=MM,...)
}
if(pr)print(c(j,val[j]))
}}
p.value<-sum(val<0)/nboot
p.value<-2*min(p.value,1-p.value)
est1=rgvar(x,est=est)
est2=rgvar(y,est=est)
list(p.value=p.value,adjusted.crit.level=adalpha,ratio.of.estimates=est1/est2,n1=n1,n2=n2)
}

grit<-function(x,y,itest=1,sm.fun=rplot,nboot=500,alpha=.05,SEED=TRUE,
fr=1,plot.fun=rplot,plotit=TRUE,...){
#
# Fit a running interval smoother using projection distances
# excluding the predictor variable itest
# itest=1 by default, meaning that the goal is to test
# the hypothesis that the first variable does not contribute
# to the regression model
#
# Method fits a smooth using x_1, ..., x_p, excluding variabe itest
# Then x_itest and the resulting residuals are passed to indt
# Alternative choices for smooth include
# sm.fun=lplot, and if p>2, runpd
#
if(!is.matrix(x))stop("Should have two or more predictors stored in a matrix")
p<-ncol(x)
pp<-p+1
x<-elimna(cbind(x,y))
y<-x[,pp]
x<-x[,1:p]
flag<-rep(T,ncol(x))
flag[itest]<-F
temp<-sm.fun(x[,flag],y,plotit=FALSE,pyhat=TRUE,fr=fr)
res<-y-temp
test.it<-indt(x[,itest],res)
if(plotit)plot.fun(x[,itest],res,...)
test.it
}
stackit<-function(x,jval){
#
# Take a matrix having p columns and convert
# it to a matrix having jval columns and np/jval rows
# So take first jval columns, and rbind this with
# next jval columns, etc.
#
x<-as.matrix(x)
chkit<-ncol(x)%%jval
if(chkit!=0)stop("ncol(x) is not a multiple of jval")
xval<-x[,1:jval]
xval<-as.matrix(xval)
iloop<-ncol(x)/jval-1
il<-1
iu<-jval
for(i in 1:iloop){
il<-il+jval
iu<-iu+jval
temp<-x[,il:iu]
temp<-as.matrix(temp)
xval<-rbind(xval,temp)
}
xval
}
ancmg<-function(x,y,pool=TRUE,jcen=1,fr=1,depfun=fdepth,nmin=8,op=3,tr=.2,pts=NA,
SEED=TRUE,pr=TRUE,cop=3,con=0,nboot=NA,alpha=.05,bhop=F){
#
# ANCOVA
# for two or more groups based on trimmed means or medians
# Multiple covariates are allowed.
#
# op=1 use omnibus test for trimmed means, with trimming given by tr
# op=2 use omnibus test for medians.
#       (Not recommended when there are tied values, use op=4)
# op=3 multiple comparisons using trimming and percentile bootstrap.
#     This method seems best for general use.
# op=4 multiple comparisons using medians and percentile bootstrap
#
# y is matrix with J columns, so have J groups.
# or y can have list mode with length J
#
# x is a matrix with Jp columns, so first p columns
# correspond to the p covariates in the first group, etc.
# Or,
# x can have list mode with length J and each component
# being a matrix with p columns.
# So if covariates for group 1 are in the matrix m1
# x[[1]]<-m1 will store them in x, x having list mode
#
# nmin is the minimum sample size allowed for any group
# when testing hypotheses.
# If a design point results in a sample size <nmin,
# that point is eliminated.
#
#  pool=T means pool the data when determining the center of the
#  design points and the measure of scatter when applying the smooth
#
#  pool=F, does not pool but rather use the data from group
#  jcen to determine center and the measure of scatter
#
#  pts, a matrix that can be used to specify design points to be used
#       number of columns should equal number of covariates.
#
#  depfun determines how depth of a point is determinted,
#  default is projection depth
#
#  The output includes a matrix of sample sizes. The ith row
#  corresponds to the ith point used to compare groups.
#  The jth column indicates the number of points (the sample size)
#  that was found for the jth group. That is, how many points
#  in the jth group were found that are close to the design point
#  under consideration.
#
library(MASS)
output<-NULL
if(SEED)set.seed(2) # set the seed so that MVE always gives same result
if(pr){
if(op==1)print("Trimmed means are to be compared. For medians, use op=2")
if(op==2)print("Medians are to be compared. For trimmed means, use op=1")
if(op==3)print("20% trimmed means are compared. For medians, use op=4")
if(op==4)print("medians are compared. For 20% trimmed means, use op=3")
}
nval<-NA
if(is.matrix(y))J<-ncol(y)
if(is.list(y))J<-length(y)
if(is.list(x))pval<-ncol(x[[1]])
if(J==1)stop("Only have one group stored in y")
if(is.matrix(x)){
if(ncol(x)%%J!=0)stop("Number of columns of x should be a multiple of ncol(y)")
}
if(is.matrix(x)){
pval<-ncol(x)/J
temp<-seq(1,ncol(x),pval)
js<-temp[jcen]
jcenp<-js+pval-1
if(jcenp > ncol(x))stop("jcen has an invalid value")
xcen<-x[,js:jcenp]
}
if(is.list(x))xcen<-x[[jcen]]
if(pool){
if(is.matrix(x))xval<-stackit(x,pval)
if(is.list(x))xval<-stacklist(x)
mval<-cov.mve(xval)
pts<-ancdes(xval,depfun=depfun,cop=cop)
}
if(!pool){
pts<-ancdes(xcen,depfun=depfun,cop=cop)
mval<-cov.mve(xcen)
}
nval<-matrix(NA,ncol=J,nrow=nrow(pts))
icl<-0-pval+1
icu<-0
for(j in 1:J){
icl<-icl+pval
icu<-icu+pval
for(i in 1:nrow(pts)){
if(is.matrix(x)  && is.matrix(y)){
nval[i,j]<-length(y[near3d(x[,icl:icu],pts[i,],fr,mval),j])
}
if(is.matrix(x)  && is.list(y)){
tempy<-y[[j]]
nval[i,j]<-length(tempy[near3d(x[,icl:icu],pts[i,],fr,mval)])
}
if(is.list(x)  && is.matrix(y)){
xm<-as.matrix(x[[j]])
nval[i,j]<-length(y[near3d(xm,pts[i,],fr,mval),j])
}
if(is.list(x)  && is.list(y)){
tempy<-y[[j]]
xm<-as.matrix(x[[j]])
nval[i,j]<-length(tempy[near3d(xm,pts[i,],fr,mval)])
}
#
}}
flag<-rep(T,nrow(pts))
for(i in 1:nrow(pts)){
if(min(nval[i,])<nmin)flag[i]<-F
}
nflag<-F
if(sum(flag)==0){
print("Warning: No design points found with large enough sample size")
nflag<-T
}
if(!nflag){
pts<-pts[flag,] # eliminate points for which the sample size is too small
nval<-nval[flag,]
if(!is.matrix(pts))pts<-t(as.matrix(pts))
output<-matrix(NA,nrow=nrow(pts),ncol=3)
dimnames(output)<-list(NULL,c("point","test.stat","p-value"))
if(op==3 || op==4)output<-list()
}
for(i in 1:nrow(pts)){
if(op==1 || op==2)output[i,1]<-i
icl<-0-pval+1
icu<-0
yval<-list()
for(j in 1:J){
icl<-icl+pval
icu<-icu+pval
if(is.matrix(x)  && is.matrix(y)){
yval[[j]]<-y[near3d(x[,icl:icu],pts[i,],fr,mval),j]
}
if(is.matrix(x)  && is.list(y)){
tempy<-y[[j]]
yval[[j]]<-tempy[near3d(x[,icl:icu],pts[i,],fr,mval)]
}
if(is.list(x)  && is.matrix(y)){
yval[[j]]<-y[near3d(x[[j]],pts[i,],fr,mval),j]
}
if(is.list(x)  && is.list(y)){
tempy<-y[[j]]
yval[[j]]<-tempy[near3d(x[[j]],pts[i,],fr,mval)]
}
#
}
if(op==1)temp<-t1way(yval,tr=tr)
if(op==2)
{
print("WARNING: NOT RECOMMENDED FOR DISCRETE DATA WITH TIES")
print("RECOMMENDATION: Set the argument op=4")
temp<-med1way(yval,SEED=SEED,pr=FALSE)
}
if(op==1 || op==2){
output[i,2]<-temp$TEST
if(op==1)output[i,3]<-temp$siglevel
if(op==2)output[i,3]<-temp$p.value
}
if(op==3){
output[[i]]<-pbmcp(yval,alpha=alpha,SEED=SEED,con=con,bhop=bhop,est=tmean)
}
if(op==4){output[[i]]<-medpb(yval,alpha=alpha,SEED=SEED,con=con,bhop=bhop)
}
}
#print(nflag)
if(nflag)output<-NULL
print("Points Chosen:")
print(pts)
print("Sample Sizes:")
print(nval)
output
#list(points.chosen=pts,sample.sizes=nval,output=output)
}
ancmg1<-function(x,y,pool=TRUE,jcen=1,fr=1,depfun=fdepth,nmin=8,op=3,tr=.2,
SEED=TRUE,pr=TRUE,pts=NA,con=0,nboot=NA,alpha=.05,bhop=F){
#
# ANCOVA
# for two or more groups based on trimmed means or medians
# Single  covariate is assumed.
#
# FOR TWO OR MORE COVARIATES, USE ANCMG
#
# for two groups and one covariate, also consider ancova and ancGpar
#
# op=1 use omnibus test for trimmed means, with trimming given by tr
# op=2 use omnibus test for medians.
#       (Not recommended when there are tied values, use op=4)
# op=3 multiple comparisons using trimming and percentile bootstrap.
#     This method seems best for general use.
# op=4 multiple comparisons using medians and percentile bootstrap
#
# y is matrix with J columns, so have J groups.
# or y can have list mode with length J
#
# x is a matrix with Jp columns, so first p columns
# correspond to the p covariates in the first group, etc.
#
# Or,
# x can have list mode with length J
#
# nmin is the minimum sample size allowed for any group
# when testing hypotheses.
# If a design point results in a sample size <nmin,
# that point is eliminated.
#
#  pool=T means pool the data when determining the center of the
#  design points and the measure of scatter when applying the smooth
#
#  pool=F, does not pool but rather use the data from group
#  jcen to determine center and the measure of scatter
#
#  The output includes a matrix of sample sizes. The ith row
#  corresponds to the ith point used to compare groups.
#  The jth column indicates the number of points (the sample size)
#  that was found for the jth group. That is, how many points
#  in the jth group were found that are close to the design point
#  under consideration.
#
if(SEED)set.seed(2) # set the seed so that MVE always gives same result
if(pr){
if(op==1)print("Trimmed means are to be compared. For medians, use op=2")
if(op==2)print("Medians are to be compared. For trimmed means, use op=1")
if(op==3)print("20% trimmed means are compared. For medians, use op=4")
if(op==4)print("medians are compared. For 20% trimmed means, use op=3")
}
output<-NULL
nval<-NA
if(is.matrix(y))J<-ncol(y)
if(is.matrix(x))pval=ncol(x)
if(is.list(y))J<-length(y)
if(is.list(x))pval<-ncol(x[[1]])
if(pval>1)stop("More than one covariate. Use ancmg")
if(J==1)stop("Only have one group stored in y")
if(is.matrix(x)){
if(ncol(x)%%J!=0)stop("Number of columns of x should be a multiple of ncol(y)")
}
if(is.matrix(x)){
xcen<-x[,jcen]
}
if(is.list(x))xcen<-x[[jcen]]
if(is.na(pts[1])){
if(pool){
if(is.matrix(x))xval<-stackit(x,1)
if(is.list(x))xval<-stacklist(x)
temp<-idealf(xval)
pts<-temp$ql
pts[2]<-median(xval)
pts[3]<-temp$qu
}
if(!pool){
temp<-idealf(xcen)
pts<-temp$ql
pts[2]<-median(xval)
pts[3]<-temp$qu
}}
nval<-matrix(NA,ncol=J,nrow=length(pts))
for(j in 1:J){
for(i in 1:length(pts)){
if(is.matrix(x)  && is.matrix(y)){
nval[i,j]<-length(y[near(x[,j],pts[i],fr=fr)])
}
if(is.matrix(x)  && is.list(y)){
tempy<-y[[j]]
nval[i,j]<-length(tempy[near(x[,j],pts[i],fr=fr)])
}
if(is.list(x)  && is.matrix(y)){
xm<-as.matrix(x[[j]])
nval[i,j]<-length(y[near(xm,pts[i],fr=fr),j])
}
if(is.list(x)  && is.list(y)){
tempy<-y[[j]]
xm<-as.matrix(x[[j]])
nval[i,j]<-length(tempy[near(xm,pts[i],fr=fr)])
}
#
}}
flag<-rep(TRUE,length(pts))
for(i in 1:length(pts)){
if(min(nval[i,])<nmin)flag[i]<-F
}
nflag<-F
if(sum(flag)==0){
print("Warning: No design points found with large enough sample size")
nflag<-T
}
if(!nflag){
pts<-pts[flag] # eliminate points for which the sample size is too small
nval<-nval[flag,]
if(!is.matrix(pts))pts<-t(as.matrix(pts))
output<-matrix(NA,nrow=length(pts),ncol=3)
dimnames(output)<-list(NULL,c("point","test.stat","p-value"))
if(op==3 || op==4)output<-list()
}
for(i in 1:length(pts)){
if(op==1 || op==2)output[i,1]<-i
icl<-0-pval+1
icu<-0
yval<-list()
for(j in 1:J){
if(is.matrix(x)  && is.matrix(y)){
yval[[j]]<-y[near(x[,j],pts[i],fr=fr),j]
}
if(is.matrix(x)  && is.list(y)){
tempy<-y[[j]]
yval[[j]]<-tempy[near(x[,j],pts[i],fr=fr)]
}
if(is.list(x)  && is.matrix(y)){
yval[[j]]<-y[near3d(x[[j]],pts[i],fr=fr),j]
}
if(is.list(x)  && is.list(y)){
tempy<-y[[j]]
yval[[j]]<-tempy[near(x[[j]],pts[i],fr=fr)]
}
#
}
if(op==1)temp<-t1way(yval,tr=tr)
if(op==2)
{
print("WARNING: NOT RECOMMENDED FOR DISCRETE DATA WITH TIES")
print("RECOMMENDATION: Set the argument op=4")
temp<-med1way(yval,SEED=SEED,pr=FALSE)
}
if(op==1 || op==2){
output[i,2]<-temp$TEST
if(op==1)output[i,3]<-temp$siglevel
if(op==2)output[i,3]<-temp$p.value
}
if(op==3){
output[[i]]<-pbmcp(yval,alpha=alpha,SEED=SEED,con=con,bhop=bhop,est=tmean)
}
if(op==4){output[[i]]<-medpb(yval,alpha=alpha,SEED=SEED,con=con,bhop=bhop)
}
}
if(nflag)output<-NULL
list(points.chosen=pts,sample.sizes=nval,output=output)
}
qhomtv2<-function(x,y,nboot=100,alpha=.05,qval=c(.2,.8),plotit=TRUE,SEED=TRUE){
#
#   Test hypothesis of homoscedasticiy  by
#   computing a confidence interval for beta_1-beta_2, the
#   difference between the slopes of the qval[2] and qval[1]
#   regression slopes, where qval[1] and qval[2] are
#   the quantile regression slopes
#   estimated via the Koenker-Bassett method.
#   So by default, use the .8 quantile slope minus the
#   the .2 quantile slope.
#
print("Note: adjusted confidence intervals are used;")
print("they can differ from p-values")
print("FWE is not controlled")
if(length(qval)!=2)stop("Argument qval should have 2 values")
x<-as.matrix(x)
p<-ncol(x)
xy<-elimna(cbind(x,y))
x<-xy[,1:p]
x<-as.matrix(x)
pp<-p+1
y<-xy[,pp]
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data,1,qhomtsub2,x,y,qval[1]) # A p by nboot matrix
bvec2<-apply(data,1,qhomtsub2,x,y,qval[2])
bvec1<-as.matrix(bvec1)
bvec2<-as.matrix(bvec2)
if(p==1){
bvec1<-t(bvec1)
bvec2<-t(bvec2)
}
se<-NA
for(j in 1:p)se[j]<-sqrt(var(bvec1[j,]-bvec2[j,]))
temp1<-rqfit(x,y,qval[1])$coef[2:pp]
temp2<-rqfit(x,y,qval[2])$coef[2:pp]
crit<-qnorm(1-alpha/2)
crit.ad<-NA
dif<-temp2-temp1
regci<-NA
regci1<-dif-crit*se
regci2<-dif+crit*se
sig.level<-2*(1-pnorm(abs(dif)/se))
regci.ad<-NA
if(alpha==.05 && qval[1]==.2 && qval[2]==.8)
crit.ad<-qnorm(0-.09/sqrt(length(y))+.975)
output<-matrix(NA,ncol=7,nrow=p)
dimnames(output)<-list(NULL,c("Est 1","Est 2","Dif","SE",
"ci.lower","ci.upper","p.value"))
output[,1]<-temp1
output[,2]<-temp2
output[,3]<-dif
output[,4]<-se
output[,5]<-regci1
output[,6]<-regci2
output[,7]<-sig.level
ci.ad<-c(dif-crit.ad*se,dif+crit.ad*se)
output
}



qhomtsub2<-function(isub,x,y,qval){
#
#  Perform quantile regression using x[isub] to predict y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  regfun is some regression method already stored in R
#  It is assumed that regfun$coef contains the  intercept and slope
#  estimates produced by regfun.  The regression methods written for
#  this  book, plus regression functions in R, have this property.
#
#  x is assumed to be a matrix containing values of the predictors.
#
xmat<-matrix(x[isub,],nrow(x),ncol(x))
pp<-ncol(x)+1
temp<-rqfit(xmat,y[isub],qval)$coef[2:pp]
temp
}



rslope<-function(x,y,fr=1,est=tmean,nmin=10,pts=x,plotit=FALSE,xlab="X",
ylab="Y",...){
#
# Estimate slope at points in pts.
#
# fr controls amount of smoothing
#
# Missing values are automatically removed.
#
temp<-cbind(x,y)
temp<-elimna(temp) # Eliminate any rows with missing values
x<-temp[,1]
y<-temp[,2]
vals<-rep(NA,length(pts))
for(i in 1:length(pts)){
flagl<-nearl(x,fr=fr,pts[i])
flagr<-nearr(x,fr=fr,pts[i])
flagr<-as.logical(flagr)
flagl<-as.logical(flagl)
if(sum(flagl)>=nmin && sum(flagr)>=nmin){
yl<-est(y[flagl],...)
yr<-est(y[flagr],...)
xl<-est(x[flagl],...)
xr<-est(x[flagr],...)
vals[i]<-(yr-yl)/(xr-xl)
}}
if(plotit){
plot(c(x,x[1],x[2]),c(vals,-5,5),xlab=xlab,ylab=ylab)
xord<-order(x)
lines(x[xord],vals[xord])
}
vals
}


rslopesm<-function(x,y,fr=1,est=tmean,nmin=10,pts=x,plotit=FALSE,xlab="X",
ylab="Y",SEED=TRUE,nboot=40,xout=FALSE,RNA=TRUE,atr=.2,scat=TRUE,pyhat=TRUE,...){
#
#  For a regression line predicting Y given X
# Estimate slope at points in pts with bagging
# followed by a smooth.
#
# pyhat=T, returns estimated slopes corresponding to the sorted
# x values.
# fr controls amount of smoothing
# atr controls the amount of trimming.
#
# OUTPUT: by default, the estimated  slopes at
# X_1<=X_2<=...<=X_n
# That is, for the x values written in ascending order, the
# slope is estimated for each value. If the slope is not considered
# estimable, the estimate is set to NA.
#
# pts is used if the goal is to estimate the slope for some
# other collection of points.
#
# nmin controls how many points close to x are required when
# deciding that the slope is estimable.
# plotit=TRUE will plot the estimates.
#
# The plotted points are the estimates using rslope and
# the solid line gives the estimated values reported by this function
#
# Missing values are automatically removed.
#
if(SEED) set.seed(2)
temp<-cbind(x,y)
if(ncol(temp)!=2)stop("One predictor only is allowed")
temp<-elimna(temp) # Eliminate any rows with missing values
if(xout) {
                flag <- outfun(temp[, 1], plotit = F)$keep
                temp <- temp[flag,  ]
x<-temp[,1]
y<-temp[,2]
}
flag<-order(x)
x<-x[flag]
y<-y[flag]
mat<-matrix(NA,nrow=nboot,ncol=length(pts))
vals<-NA
       for(it in 1:nboot) {
                idat <- sample(c(1:length(y)), replace = T)
                xx <- temp[idat, 1]
                yy <- temp[idat, 2]
#                mat[it,  ] <- runhat(xx, yy, pts = x, est = est, fr = fr, ...)
mat[it,]<-rslope(xx,yy,fr=fr,est=est,nmin=nmin,pts=x,plotit=FALSE)
        }
rmd<-apply(mat,2,mean,na.rm=RNA,tr=atr)
flag<-is.na(rmd)
rmdsm<-lplot(x,rmd,pyhat=TRUE,plotit=plotit)
output<-"Done"
if(pyhat){
temp<-rep(NA,length(x))
temp[!flag]<-rmdsm$yhat.values
output<-temp
}
output
}

m1way<-function(x,est=hd,nboot=599,SEED=TRUE,...){
#
#   Test the hypothesis that J measures of location are equal
#   using the percentile bootstrap method.
#   By default, medians are compared using 599 bootstrap samples.
#   and the Harrell-Davis Estimator. To use the usual sample median, set
#   est=median
#
#   The data are assumed to be stored in x in list mode.  Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or a matrix.")
J<-length(x)
nval<-vector("numeric",length(x))
gest<-vector("numeric",length(x))
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
bvec<-matrix(0,J,nboot)
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
print(paste("Working on group ",j))
nval[j]<-length(x[[j]])
gest[j]<-est(x[[j]])
xcen<-x[[j]]-est(x[[j]],...)
data<-matrix(sample(xcen,size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # A J by nboot matrix
#                     containing the bootstrap values of est.
}
teststat<-wsumsq(gest,nval)
testb<-apply(bvec,2,wsumsq,nval)
p.value<-1 - sum(teststat >= testb)/nboot
teststat<-wsumsq(gest,nval)
list(teststat=teststat,p.value=p.value)
}

oancpb<-function(x1,y1,x2,y2,est=tmean,tr=.2,pts=NA,fr1=1,fr2=1,nboot=600,
alpha=.05,plotit=TRUE,SEED=TRUE,PRO=FALSE,...){
#
# Compare two independent  groups using an ancova method
# with a percentile bootstrap combined with a running interval
# smooth.
#
#  CURRENTLY SEEMS THAT THE R FUNCTION ancGLOB is better.
#
#  This function performs an omnibus test using data corresponding
#  to K design points  specified by the argument pts. If
#  pts=NA, K=5 points are chosen for you (see Introduction to Robust
#  Estimation and Hypothesis Testing.)
#  Null hypothesis is that conditional distribution of Y, given X for first
#  group,  minus the conditional distribution of Y, given X for second
#  group is equal to zero.
#  The strategy is to choose K specific X values
#  and then test the hypothesis that all K differences are zero.
#
#  If you want to choose specific X values, Use the argument
#  pts
#  Example: pts=c(1,3,5) will use X=1, 3 and 5.
#
#  For multiple comparisons using these J points, use ancpb
#
#  Assume data are in x1 y1 x2 and y2
#
# PRO=F, means Mahalanobis distance is used.
# PRO=T, projection distance is used.
#
#  fr1 and fr2 are the spans used to fit a smooth to the data.
#
stop('USE ancGLOB')
#
#
gv1<-vector("list")
if(is.na(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
for (i in 1:5){
j<-i+5
temp1<-y1[near(x1,x1[isub[i]],fr1)]
temp2<-y2[near(x2,x1[isub[i]],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
#
loc<-NA
if(SEED)set.seed(2)
bvec<-matrix(NA,nrow=nboot,ncol=5)
for(j in 1:5){
k<-j+5
loc[j]<-est(gv1[[j]])-est(gv1[[k]])
xx<-matrix(sample(gv1[[j]],size=length(gv1[[j]])*nboot,replace=TRUE),
nrow=nboot)
yy<-matrix(sample(gv1[[k]],size=length(gv1[[k]])*nboot,replace=TRUE),
nrow=nboot)
bvec[,j]<-apply(xx,1,FUN=est,...)-apply(yy,1,FUN=est,...)
}
nullv<-rep(0,5)
if(!PRO){
mvec<-apply(bvec,2,FUN=mean)
m1<-var(t(t(bvec)-mvec+loc))
temp<-mahalanobis(rbind(bvec,nullv),loc,m1)
}
if(PRO){
temp<-pdis(rbind(bvec,nullv))
}
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
}
if(!is.na(pts[1])){
npt<-length(pts)
n1<-1
n2<-1
vecn<-1
mat<-matrix(NA,nrow=2*length(pts),ncol=3)
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
gv<-vector("list",2*length(pts))
for (i in 1:length(pts)){
j<-i+npt
temp1<-y1[near(x1,pts[i],fr1)]
temp2<-y2[near(x2,pts[i],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
mat[i,1]<-pts[i]
if(length(temp1)<=10)print(paste("Warning, there are",length(temp1)," points corresponding to the design point X=",pts[i]))
if(length(temp2)<=10)print(paste("Warning, there are",length(temp2)," points corresponding to the design point X=",pts[i]))
mat[i,2]<-length(temp1)
mat[i,3]<-length(temp2)
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
loc<-NA
if(SEED)set.seed(2)
bvec<-matrix(NA,nrow=nboot,ncol=npt)
for(j in 1:npt){
k<-j+npt
loc[j]<-est(gv1[[j]])-est(gv1[[k]])
xx<-matrix(sample(gv1[[j]],size=length(gv1[[j]])*nboot,replace=TRUE),
nrow=nboot)
yy<-matrix(sample(gv1[[k]],size=length(gv1[[k]])*nboot,replace=TRUE),
nrow=nboot)
bvec[,j]<-apply(xx,1,FUN=est,...)-apply(yy,1,FUN=est,...)
}
nullv<-rep(0,npt)
if(!PRO){
mvec<-apply(bvec,2,FUN=mean)
m1<-var(t(t(bvec)-mvec+loc))
temp<-mahalanobis(rbind(bvec,nullv),loc,m1)
}
if(PRO)temp<-pdis(rbind(bvec,nullv))
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
}
if(plotit)runmean2g(x1,y1,x2,y2,fr=fr1,est=est,...)
list(p.value=sig.level)
}

rqfitpv<-function(x,y,alpha=.05,nullval=rep(0,ncol(cbind(x,y))),
qval=.5,xout=FALSE,outfun=out,...){
#
#   Compute a p-value for slope parameter when fitting a
#   quantile regression model to data.
#
stop("This function has been removed. Use qregci")
x<-as.matrix(x)
p<-ncol(x)
np<-p+1
output<-matrix(NA,ncol=4,nrow=np)
dimnames(output)<-list(NULL,c("Param.","ci.low","ci.up","p.value"))
for(j in 1:np){
output[j,1]<-j-1
ci<-rqfit(x,y,qval=qval,xout=xout,outfun=outfun,...)$ci[j,]
output[j,2]<-ci[1]
output[j,3]<-ci[2]
alph<-c(1:99)/100
for(i in 1:99){
irem<-i
chkit<-rqfit(x,y,alpha=alph[i],qval=qval,xout=xout,outfun=outfun,...)$ci[j,]
if(chkit[1]>nullval || chkit[2]<nullval)break
}
p.value<-irem/100
if(p.value<=.1){
iup<-(irem+1)/100
alph<-seq(.001,iup,.001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-rqfit(x,y,alpha=alph[i],qval=qval,xout=xout,outfun=outfun,...)$ci[j,]
if(chkit[1]>nullval || chkit[2]<nullval)break
}}
output[j,4]<-p.value
}
output
}

adtest<-function(x,y,est=tmean,nboot=100,alpha=.05,fr=NA,xout=FALSE,outfun=out,SEED=TRUE,...){
#
# Test the hypothesis that the regression model is additive.
# Use a variation of Stute et al. (1998, JASA, 93, 141-149).
# method, and running interval version of the backfitting
# algorithm
#
x=as.matrix(x)
#if(!is.matrix(x))stop("X values should be stored in a matrix")
if(ncol(x)==1)stop("There should be two or more predictors")
temp<-cbind(x,y)
p<-ncol(x)
p1<-p+1
temp<-elimna(temp)
x<-temp[,1:p]
x<-as.matrix(x)
y<-temp[,p1]
if(xout){
keepit<-rep(T,nrow(x))
flag<-outfun(x,plotit=FALSE,...)$out.id
keepit[flag]<-F
x<-x[keepit,]
y<-y[keepit]
}
if(alpha<.05 && nboot<=100)warning("You used alpha<.05 and nboot<=100")
if(is.na(fr)){
fr<-.8
if(ncol(x)==2){
nval<-c(20,30,50,80,150)
fval<-c(0.40,0.36,0.18,0.15,0.09)
if(length(y)<=150)fr<-approx(nval,fval,length(y))$y
if(length(y)>150)fr<-7.57/length(y)+.05
}
}
if(SEED)set.seed(2)
x<-as.matrix(x)
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
yhat<-adrun(x,y,est=est,plotit=FALSE,fr=fr,pyhat=T)
regres<-y-yhat
print("Taking bootstrap sample, please wait.")
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-sqrt(12)*(data-.5) # standardize the random numbers.
rvalb<-apply(data,1,adtests1,yhat,regres,mflag,x,fr)
# An n x nboot matrix of R values
rvalb<-rvalb/sqrt(length(y))
dstatb<-apply(abs(rvalb),2,max)
wstatb<-apply(rvalb^2,2,mean)
v<-c(rep(1,length(y)))
rval<-adtests1(v,yhat,regres,mflag,x,fr)
rval<-rval/sqrt(length(y))
dstat<-max(abs(rval))
wstat<-mean(rval^2)
p.value.d<-1-sum(dstat>=dstatb)/nboot
p.value.w<-1-sum(wstat>=wstatb)/nboot
list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w)
}

rhom<-function(x,y,op=1,op2=FALSE,tr=.2,plotit=TRUE,xlab="X",ylab="ABS(res)",
est=median,sm=FALSE,SEED=TRUE){
# For regression model, Y=m(X)+s(X)e,
# where s(X) models heteroscedasticity, and e has median 0,
# test hypothesis s(X)=1 for any X
#
# For p>1, method tests for each p whether residuals and x_j
# have a horizontal regression line.
#
# op2=F, tests for homogeneity using running interval smoother
# op2=T, test of independence based on Y-M(Y), M(Y) some measure
#        of location given by argument est.
#  In general, op2=T should NOT be used when the goal is to test
#  the hypothesis of a homoscedastic error term.
#
# op=1 test using regression method (function regci)
# op=2 test using Winsorized correlation
#      tr is amount of winsorizing.
# op=3 test using a wild boostrap method
#
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
xy<-elimna(cbind(x,y))
x<-xy[,1:p]
y<-xy[,pp]
x<-as.matrix(x)
output<-NA
if(ncol(x)==1){
if(!op2)res<-y-runhat(x,y,est=est,pts=x)
if(op2)res<-y-est(y)
if(op==1)output<-regci(x,abs(res),SEED=SEED,pr=FALSE)$p.value[2]
if(op==2)output<-wincor(x,abs(res),tr=tr)$siglevel
if(op==3)output<-indt(x,abs(res),tr=0,SEED=SEED)$p.value.d
}
if(ncol(x)>1){
pv<-ncol(x)+1
if(!op2)res<-y-rung3hat(x,y,est=est,pts=x)$rmd
if(op2)res<-y-est(y)
if(op==1)output<-regci(x,abs(res),pr=FALSE)$sig.level[2:pv]
if(op==2)output<-winall(cbind(x,abs(res)),tr=tr)$siglevel[1:ncol(x),pv]
if(op==3)output<-indt(x,abs(res),tr=0,SEED=SEED)$p.value.d
}
if(plotit){
if(ncol(x)==1){
if(!sm)rungen(x,abs(res),est=est,xlab=xlab,ylab=ylab)
if(sm)runmbo(x,abs(res),est=est,xlab=xlab,ylab=ylab)
}
if(ncol(x)==2){
if(sm)rung3d(x,abs(res),est=est,xlab=xlab,ylab=ylab)
if(!sm)run3bo(x,abs(res),est=est,xlab=xlab,ylab=ylab)
}}
list(p.value=output)
}

gk.sigmamu <- function(x, c1 = 4.5, c2 = 3.0, mu.too = FALSE, ...)
{
  n <- length(x)

  medx <- median(x)
  sigma0 <- median(abs(x - medx))
w <- abs(x - medx) / sigma0
w <- ifelse(w<=c1,(1.0 - (w / c1)^2)^2,0)
  mu <- sum(x * w) / sum(w)

  x <- (x - mu) / sigma0
  rho <- x^2
  rho[rho > c2^2] <- c2^2
  sigma2 <- sigma0^2 / n * sum(rho)

  if(mu.too)
    c(mu, sqrt(sigma2))
  else
    sqrt(sigma2)
}

gk <- function(x, y, ...)
{
  ((gk.sigmamu(x + y, ...))^2 - (gk.sigmamu(x - y, ...))^2) / 4.0
}

hard.rejection <- function(distances, p, beta = 0.9, ...)
{
  d0 <- qchisq(beta, p) * median(distances) / qchisq(0.5, p)
  weights <- double(length(distances))
  weights[distances <= d0] <- 1.0
  weights
}
#
#
#

gkcov<-function(x,y,gk.sigmamu=taulc,...){
#
# Compute robust covariance using the Gnanadesikan-Kettenring
# estimator.
# (cf. Marrona & Zomar, 2002, Technometrics
#
val<-.25*(gk.sigmamu(x+y,...)-gk.sigmamu(x-y,...))
val
}
covogk<-function(x,sigmamu=taulc,v=gkcov,n.iter=1,beta=.9,...){
#
# Compute robust (weighted) covariance matrix in Maronna and Zamar
# (2002, Technometrics, eq. 7).
#
# x is an n by p matrix
# n.iter number of iterations. 1 seems to be best
# sigmamu is any user supplied function having the form
#   sigmamu(x,mu.too=F) and which computes a robust measure of
#   of dispersion if mu.too=F. If mu.too=T, it returns
#   a robust measure of location as well.
# v is any robust covariance
#
if(!is.matrix(x))stop("x should be a matrix")
x<-elimna(x)  # remove any rows with missing data
temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)$wcovmat
temp
}
ogk<-function(x,sigmamu=taulc,v=gkcov,n.iter=1,beta=.9,...){
#
# Compute robust (weighted) covariance matrix in Maronna and Zamar
# (2002, Technometrics, eq. 7).
#
# x is an n by p matrix
# n.iter number of iterations. 1 seems to be best
# sigmamu is any user supplied function having the form
#   sigmamu(x,mu.too=F) and which computes a robust measure of
#   of dispersion if mu.too=F. If mu.too=T, it returns
#   a robust measure of location as well.
# v is any robust covariance
#
if(!is.matrix(x))stop("x should be a matrix")
x<-elimna(x)  # remove any rows with missing data
temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)
list(center=temp$wcenter,cov=temp$wcovmat)
}

ogk.pairwise <- function(X,n.iter=1,sigmamu=taulc,v=gkcov,beta=.9,...)
#weight.fn=hard.rejection,beta=.9,...)
{
# Downloaded (and modified slightly) from www.stats.ox.ac.uk/~konis/pairwise.q
# Corrections noted by V. Todorov have been incorporated
#
  data.name <- deparse(substitute(X))
  X <- as.matrix(X)
  n <- dim(X)[1]
  p <- dim(X)[2]
  Z <- X
  U <- diag(p)
  A <- list()
  # Iteration loop.
  for(iter in 1:n.iter) {
    # Compute the vector of standard deviations d and
    # the correlation matrix U.
    d <- apply(Z, 2, sigmamu, ...)
    Z <- sweep(Z, 2, d, '/')

    for(i in 1:(p - 1)) {
      for(j in (i + 1):p) {
        U[j, i] <- U[i, j] <- v(Z[ , i], Z[ , j], ...)
      }
    }

    # Compute the eigenvectors of U and store them in
    # the columns of E.

    E <- eigen(U, symmetric = TRUE)$vectors

    # Compute A, there is one A for each iteration.

    A[[iter]] <- d * E

    # Project the data onto the eigenvectors.

    Z <- Z %*% E
  }

  # End of orthogonalization iterations.

  # Compute the robust location and scale estimates for
  # the transformed data.

#  sqrt.gamma <- apply(Z, 2, sigmamu, mu.too = TRUE, ...)
  sqrt.gamma <- apply(Z, 2, sigmamu, mu.too = TRUE)
  center <- sqrt.gamma[1, ]
  sqrt.gamma <- sqrt.gamma[2, ]

  # Compute the mahalanobis distances.

  Z <- sweep(Z, 2, center)
  Z <- sweep(Z, 2, sqrt.gamma, '/')
  distances <- rowSums(Z^2)

  # From the inside out compute the robust location and
  # covariance matrix estimates.  See equation (5).

  covmat <- diag(sqrt.gamma^2)

  for(iter in seq(n.iter, 1, -1)) {
    covmat <- A[[iter]] %*% covmat %*% t(A[[iter]])
    center <- A[[iter]] %*% center
  }

  center <- as.vector(center)

  # Compute the reweighted estimate.  First, compute the
  # weights using the user specified weight function.

  #weights <- weight.fn(distances, p, ...)
weights <- hard.rejection(distances, p, beta=beta,...)
  sweights <- sum(weights)

  # Then compute the weighted location and covariance
  # matrix estimates.

  wcenter <- colSums(sweep(X, 1, weights, '*')) / sweights

  Z <- sweep(X, 2, wcenter)
  Z <- sweep(Z, 1, sqrt(weights), '*')
  wcovmat <- (t(Z) %*% Z) / sweights;

  list(center = center,
       covmat = covmat,
       wcenter = wcenter,
       wcovmat = wcovmat,
       distances = distances,
       sigmamu = deparse(substitute(sigmamu)),
       v = deparse(substitute(v)),
       data.name = data.name,
       data = X)
}


gk.sigmamu <- function(x, c1 = 4.5, c2 = 3.0, mu.too = FALSE, ...)
{
  n <- length(x)

  medx <- median(x)
  sigma0 <- median(abs(x - medx))
#  w <- (x - medx) / sigma0
#  w <- (1.0 - (w / c1)^2)^2
  #w[w < 0.0] <- 0.0
w <- abs(x - medx) / sigma0
w <- ifelse(w<=c1,(1.0 - (w / c1)^2)^2,0)
  mu <- sum(x * w) / sum(w)

  x <- (x - mu) / sigma0
  rho <- x^2
  rho[rho > c2^2] <- c2^2
  sigma2 <- sigma0^2 / n * sum(rho)

  if(mu.too)
    c(mu, sqrt(sigma2))
  else
    sqrt(sigma2)
}

gk <- function(x, y, ...)
{
  ((gk.sigmamu(x + y, ...))^2 - (gk.sigmamu(x - y, ...))^2) / 4.0
}

hard.rejection <- function(distances, p, beta = 0.9, ...)
{
  d0 <- qchisq(beta, p) * median(distances) / qchisq(0.5, p)
  weights <- double(length(distances))
  weights[distances <= d0] <- 1.0
  weights
}

outogk<-function(x,sigmamu=taulc,v=gkcov,op=TRUE,SEED=FALSE,
beta=max(c(.95,min(c(.99,1/nrow(x)+.94)))),n.iter=1,plotit=TRUE,...){
#
# Use the ogk estimator to
# determine which points are outliers
#
#  op=T uses robust Mahalanobis distance based on
#  the OGK estimator with  beta adjusted so that
#  the outside rate per observation is approximately .05
#  under normality.
#  op=F returns the outliers based on the distances used
#  by the OGK estimator
#  (Currently, op=T seems best for detecting outliers.)
#
if(!is.matrix(x))stop("x should be a matrix")
x<-elimna(x)
if(!op){
temp<-ogk.pairwise(x,sigmamu=sigmamu,v=v,beta=beta,n.iter=n.iter,...)
vals<-hard.rejection(temp$distances,p=ncol(x),beta=beta,...)
flag<-(vals==1)
vals<-c(1:nrow(x))
outid<-vals[!flag]
keep<-vals[flag]
if(is.matrix(x)){
if(ncol(x)==2 && plotit){
plot(x[,1],x[,2],xlab="X", ylab="Y",type="n")
points(x[flag,1],x[flag,2])
if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="o")
}}}
if(op){
temp<-out(x,cov.fun=ogk,beta=beta,plotit=plotit,SEED=SEED)
outid<-temp$out.id
keep<-temp$keep
}
list(out.id=outid,keep=keep,distances=temp$dis)
}
splot<-function(x,op=TRUE,VL=FALSE,xlab="X",ylab="Rel. Freq."){
#
# Frequency plot
#
x<-x[!is.na(x)]
temp<-sort(unique(x))
freq<-NA
for(i in 1:length(temp)){
freq[i]<-sum(x==temp[i])
}
rmfreq=freq
nval=sum(freq)
freq<-freq/length(x)
tfreq<-freq
tfreq[1]<-0
tfreq[2]<-max(freq)
plot(temp,tfreq,xlab=xlab,ylab=ylab,type="n")
points(temp,freq,pch="*")
if(op)
if(!VL)lines(temp,freq)
if(VL){
for(i in 1:length(temp))lines(c(temp[i],temp[i]),c(0,freq[i]))
}
list(n=nval,frequencies=rmfreq)
}

outcov<-function(x,y=NA,outfun=outogk,plotit=FALSE){
#
# Remove outliers and compute covariances
#
if(!is.na(y[1]))x<-cbind(x,y)
keep<-outfun(x,plotit=plotit)$keep
val<-var(x[keep,])
if(ncol(val)==2)val<-val[1,2]
list(cov=val)
}

covout<-function(x,y=NA,outfun=outogk,plotit=FALSE){
#
# Remove outliers and compute covariances
#
if(!is.na(y[1]))x<-cbind(x,y)
keep<-outfun(x,plotit=plotit)$keep
val<-var(x[keep,])
if(ncol(val)==2)val<-val[1,2]
val
}

tbscor<-function(x,y=NA){
#
# Compute a correlation coefficient using the TBS measure of scatter
#
if(!is.na(y[1]))x<-cbind(x,y)
if(!is.matrix(x))stop("x should be a matrix")
x<-elimna(x)
n<-nrow(x)
p<-ncol(x)
temp<-tbs(x)$cov
val<-matrix(NA,p,p)
for(j in 1:p){
for(k in 1:p){
val[j,k]<-temp[k,j]/sqrt(temp[k,k]*temp[j,j])
}}
test<-abs(val*sqrt((n-2)/(1-val^2)))
if(p==2){
val<-val[1,2]
p.value<-c("Greater than .1")
crit<-20.20/n+1.89
if(test>=crit)p.value<-c("Less than .1")
crit<-30.41/n+2.21
if(test>=crit)p.value<-c("Less than .05")
crit<-39.72/n+2.5
if(test>=crit)p.value<-c("Less than .025")
crit<-58.55/n+2.80
if(test>=crit)p.value<-c("Less than .01")
}
list(cor=val,test.stat=test,p.value=p.value)
}

skiptbs<-function(x,y=NA,plotit=FALSE){
#
# Remove outliers and compute correlations
#
if(!is.na(y[1]))x<-cbind(x,y)
x<-elimna(x)
n<-nrow(x)
keep<-outtbs(x,plotit=plotit)$keep
val<-cor(x[keep,])
p.value<-NA
test<-NA
crit.05<-30.41/n+2.21
vat<-val
diag(vat)<-0
test<-abs(vat*sqrt((n-2)/(1-vat^2)))
diag(test)<-NA
if(ncol(val)==2){
p.value<-c("Greater than .1")
val<-val[1,2]
test<-abs(val*sqrt((n-2)/(1-val^2)))
p.value<-c("Greater than .1")
crit<-20.20/n+1.89
if(test>=crit)p.value<-c("Less than .1")
crit<-30.41/n+2.21
if(test>=crit)p.value<-c("Less than .05")
crit<-39.72/n+2.5
if(test>=crit)p.value<-c("Less than .025")
crit<-58.55/n+2.80
if(test>=crit)p.value<-c("Less than .01")
}
list(cor=val,test.stat=test,p.value=p.value,crit.05=crit.05)
}
skipogk<-function(x,y=NA,plotit=FALSE){
#
# Remove outliers and compute correlations
#
if(!is.na(y[1]))x<-cbind(x,y)
x<-elimna(x)
n<-nrow(x)
keep<-outogk(x,plotit=plotit)$keep
val<-cor(x[keep,])
p.value<-NA
test<-NA
crit.05<-15.49/n+2.68
vat<-val
diag(vat)<-0
test<-abs(vat*sqrt((n-2)/(1-vat^2)))
diag(test)<-NA
if(ncol(val)==2){
p.value<-c("Greater than .1")
val<-val[1,2]
test<-abs(val*sqrt((n-2)/(1-val^2)))
crit<-4.8/n+2.72
if(test>=crit)p.value<-c("Less than .1")
crit<-15.49/n+2.68
if(test>=crit)p.value<-c("Less than .05")
crit<-14.22/n+3.26
if(test>=crit)p.value<-c("Less than .025")
crit<-24.83/n+3.74
if(test>=crit)p.value<-c("Less than .01")
}
list(cor=val,test.stat=test,p.value=p.value,crit.05=crit.05)
}

rqfit<-function(x,y,qval=.5,alpha=.05,xout=FALSE,outfun=out,res=FALSE,...){
#
# Do a quantile regression fit
#
if(alpha!=.05)stop("This function only allows alpha=.05. Use qregci")
library(quantreg)
xx<-cbind(x,y)
p<-ncol(xx)-1
xx<-elimna(xx)
x<-xx[,1:p]
y<-xx[,ncol(xx)]
x=as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
residuals<-NA
if(res)residuals<-rq(y~x)$residuals
temp<-summary(rq(y~x,tau=qval,alpha=alpha))
temp0<-temp[[4]]
if(is.matrix(temp[[3]]))temp0<-temp[[3]] #Newer R version
temp<-temp0
coef<-temp[,1]
ci<-temp[,2:3]
list(coef=coef,ci=ci,residuals=residuals)
}
rqtest.sub<-function(isub,x,y,qval=.5){
#
#  Perform regression using x[isub] to predict y[isub]
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  This function is used by other functions when computing
#  bootstrap estimates.
#
#  x is assumed to be a matrix containing values of the predictors.
#
xmat<-matrix(x[isub,],nrow(x),ncol(x))
regboot<-rqfit(xmat,y[isub],qval=qval)$coef
regboot
}


tbs <- function(x,eps=1e-3,maxiter=20,r=.45,alpha=.05){
#        Rocke's contrained s-estimator
#
#      r=.45 is the breakdown point
#      alpha=.05 is the asymptotic rejection probability.
#
if(!is.matrix(x))stop("x should be a matrix with two or more columns")
x<-elimna(x)
library(MASS)
#temp<-cov.mve(x)
temp<-cov.mcd(x) # The use of mcd is crucial; using mve results in
#  very poor outside rate per obs under normality.
t1<-temp$center
s<-temp$cov
    n <- nrow(x)
    p <- ncol(x)
if(p==1)stop("x should be a matrix with two or more columns")
c1M<-cgen.bt(n,p,r,alpha,asymp=FALSE)
c1<-c1M$c1
if(c1==0)c1<-.001 #Otherwise get division by zero
M<-c1M$M
    b0 <- erho.bt(p,c1,M)
    crit <- 100
    iter <- 1
    w1d <- rep(1,n)
    w2d <- w1d
    while ((crit > eps)&(iter <= maxiter))
    {
        t.old <- t1
        s.old <- s
        wt.old <- w1d
        v.old <- w2d
        d2 <- mahalanobis(x,center=t1,cov=s)
        d <- sqrt(d2)
        k <- ksolve.bt(d,p,c1,M,b0)
        d <- d/k
        w1d <- wt.bt(d,c1,M)
        w2d <- v.bt(d,c1,M)
        t1 <- (w1d %*% x)/sum(w1d)
        s <- s*0
        for (i in 1:n)
        {
            xc <- as.vector(x[i,]-t1)
            s <- s + as.numeric(w1d[i])*(xc %o% xc)
        }
        s <- p*s/sum(w2d)
        mnorm <- sqrt(as.vector(t.old) %*% as.vector(t.old))
        snorm <- eigen(s.old)$values[1]
        crit1 <- max(abs(t1 - t.old))
#        crit <- max(crit1,crit2)
        crit <- max(abs(w1d-wt.old))/max(w1d)
        iter <- iter+1
    }
#    mnorm <- sqrt(as.vector(t1) %*% as.vector(t1))
#    snorm <- eigen(s)$values[1]
#    return(list(t1=t1,s=s))
list(center=t1,cov=s)
}
erho.bt <- function(p,c1,M)
#   expectation of rho(d) under chi-squared p
    return(chi.int(p,2,M)/2
        +(M^2/2+c1*(5*c1+16*M)/30)*chi.int2(p,0,M+c1)
        +(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4))*(
chi.int(p,0,M+c1)-chi.int(p,0,M))
        +(1/2+M^4/(2*c1^4)-M^2/c1^2)*(chi.int(p,2,M+c1)-chi.int(p,2,M))
        +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*(chi.int(p,3,M+c1)-chi.int(p,3,M))
        +(3*M^2/(2*c1^4)-1/(2*c1^2))*(chi.int(p,4,M+c1)-chi.int(p,4,M))
        -(4*M/(5*c1^4))*(chi.int(p,5,M+c1)-chi.int(p,5,M))
        +(1/(6*c1^4))*(chi.int(p,6,M+c1)-chi.int(p,6,M)))
chi.int <- function(p,a,c1)
#   partial expectation d in (0,c1) of d^a under chi-squared p
  return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*pchisq(c1^2,p+a) )
chi.int2 <- function(p,a,c1)
#   partial expectation d in (c1,\infty) of d^a under chi-squared p
 return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*(1-pchisq(c1^2,p+a)))
cgen.bt <- function(n,p,r,alpha,asymp=FALSE){
#   find constants c1 and M that gives a specified breakdown r
#   and rejection point alpha
if (asymp == FALSE){if (r > (n-p)/(2*n) ) r <- (n-p)/(2*n)}
# maximum achievable breakdown
#
#   if rejection is not achievable, use c1=0 and best rejection
#
    limvec <- rejpt.bt.lim(p,r)
    if (1-limvec[2] <= alpha)
    {
        c1 <- 0
        M <- sqrt(qchisq(1-alpha,p))
    }
    else
    {
    c1.plus.M <- sqrt(qchisq(1-alpha,p))
    M <- sqrt(p)
    c1 <- c1.plus.M - M
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        deps <- 1e-4
        M.old <- M
        c1.old <- c1
        er <- erho.bt(p,c1,M)
        fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30)
        fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps
        fcM  <- (erho.bt(p,c1,M+deps)-er)/deps
        fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30)
        M <- M - fc/fcp
        if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2}
        c1 <- c1.plus.M - M
#        if (M-c1 < 0)  M <- c1.old+(M.old-c1.old)/2
        crit <- abs(fc)
        iter <- iter+1
    }
    }
list(c1=c1,M=M,r1=r)
}
erho.bt.lim <- function(p,c1)
#   expectation of rho(d) under chi-squared p
  return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1))
erho.bt.lim.p <- function(p,c1)
#   derivative of erho.bt.lim wrt c1
  return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1))


rejpt.bt.lim <- function(p,r){
#   find p-value of translated biweight limit c
#   that gives a specified breakdown
    c1 <- 2*p
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        c1.old <- c1
        fc <- erho.bt.lim(p,c1) - c1^2*r
        fcp <- erho.bt.lim.p(p,c1) - 2*c1*r
        c1 <- c1 - fc/fcp
        if (c1 < 0)  c1 <- c1.old/2
        crit <- abs(fc)
        iter <- iter+1
    }
    return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p))))
}
chi.int.p <- function(p,a,c1)
  return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 )
chi.int2.p <- function(p,a,c1)
  return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 )
ksolve.bt <- function(d,p,c1,M,b0){
#       find a constant k which satisfies the s-estimation constraint
#       for modified biweight
    k <- 1
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        k.old <- k
        fk <- mean(rho.bt(d/k,c1,M))-b0
        fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2)
        k <- k - fk/fkp
        if (k < k.old/2)  k <- k.old/2
        if (k > k.old*1.5) k <- k.old*1.5
        crit <- abs(fk)
        iter <- iter+1
    }
    return(k)
}
rho.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1*(x^2/2)
        +ivec2*(M^2/2+c1*(5*c1+16*M)/30)
        +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4)
            +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2
            +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3
            +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4
            -4*M*x^5/(5*c1^4)+x^6/(6*c1^4)))
}
psi.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2)
}
psip.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1))
}
wt.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2)
}
v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M))


olstests1<-function(vstar,yhat,res,x){
ystar <- yhat + res * vstar
p<-ncol(x)
pp<-p+1
vals<-lsfit(x,ystar)$coef[2:pp]
test<-sum(vals^2)
test
}
kerreg<-function(x,y,pyhat=FALSE,pts=NA,plotit=TRUE,theta=50,phi=25,expand=.5,
scale=FALSE,zscale=FALSE,eout=FALSE,xout=FALSE,outfun=out,np=100,xlab="X",ylab="Y",zlab="Z",
varfun=pbvar,e.pow=TRUE,pr=TRUE,ticktype="simple",...){
#
# Compute local weighted regression with Epanechnikov kernel
#
# See Fan, Annals of Statistics, 1993, 21, 196-217.
# cf. Bjerve and Doksum, Annals of Statistics, 1993, 21, 890-902
#
# With a single predictor, this function calls locreg
# See locreg for information about np and plotting
#
library(akima)
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
d<-ncol(x)
np1<-d+1
m<-elimna(cbind(x,y))
if(xout && eout)stop("Can't have eout=xout=T")
if(eout){
flag<-outfun(m,plotit=FALSE,...)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
}
if(zscale){
for(j in 1:np1){
m[,j]<-(m[,j]-median(m[,j]))/mad(m[,j])
}}
x<-m[,1:d]
x<-as.matrix(x)
y<-m[,np1]
n<-nrow(x)
if(d>1){
xrem<-x
pi<-gamma(.5)^2
cd<-c(2,pi)
if(d==2)A<-1.77
if(d==3)A<-2.78
if(d>2){
for(j in 3:d)cd[j]<-2*pi*cd[j-2]/j  # p. 76
}
if(d>3)A<-(8*d*(d+2)*(d+4)*(2*sqrt(pi))^d)/((2*d+1)*cd[d])  # p. 87
hval<-A*(1/n)^(1/(d+4))  # p. 86
for(j in 1:d){
sig<-sqrt(var(x[,j]))
temp<-idealf(x[,j])
iqr<-(temp$qu-temp$ql)/1.34
A<-min(c(sig,iqr))
x[,j]<-x[,j]/A
}
xx<-cbind(rep(1,nrow(x)),x)
yhat<-NA
for(j in 1:n){
yhat[j]<-NA
temp1<-t(t(x)-x[j,])/(hval)
temp1<-temp1^2
temp1<-apply(temp1,1,FUN="sum")
temp<-.5*(d+2)*(1-temp1)/cd[d]
epan<-ifelse(temp1<1,temp,0) # Epanechnikov kernel, p. 76
chkit<-sum(epan!=0)
if(chkit >= np1){
vals<-lsfit(x,y,wt=epan)$coef
yhat[j]<-xx[j,]%*%vals
}}
if(plotit  && d==2){
if(pr){
if(!scale){
print("scale=F is specified")
print("If there is dependence, might use scale=T")
}}
m<-elimna(cbind(xrem,yhat))
xrem<-m[,1:d]
yhat<-m[,np1]
fitr<-yhat
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(xrem[i,]==xrem[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1]
mkeep<-xrem[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fit,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab,
scale=scale,ticktype=ticktype)
}}
if(d==1){
yhat<-locreg(x[,1],y,pyhat=TRUE,np=np,plotit=plotit,pts=pts,
xlab=xlab,ylab=ylab)
yhat2<-locreg(x[,1],y,pyhat=TRUE,np=0,plotit=FALSE)
}
if(d>1)yhat2<-yhat
m<-NULL
#E.pow<-varfun(yhat2[!is.na(yhat2)])/varfun(y)
# Estimate of explanatory power performs poorly.
if(pyhat)m<-yhat
#list(Strength.Assoc=sqrt(E.pow),Explanatory.Power=E.pow,yhat=m)
m
}


attract<-function(X, Y, k = 5)
{
# Works in Splus but not in R.
# For simple linear regression: plots k elemental starts and
# their domains of attraction.  Calls conc2.
	l1coef <- l1fit(X, Y)$coef
	X <- as.matrix(X)
	nr <- dim(X)[1]
	nc <- dim(X)[2] + 1
	J <- 1:nc
	dom <- matrix(nrow = k, ncol = nc)
	par(mfrow = c(1, 2))
	plot(X, Y)
	title("a) 5 Elemental Starts")
	for(i in 1:k) {
## get J
		J <- sample(nr, nc)	## get bJ, the elem fit
		if(abs(X[J[1]] - X[J[2]]) < 1/100000000) {
			slope <- 0
		}
		else {
			slope <- (Y[J[1]] - Y[J[2]])/(X[J[1]] - X[J[2]])
		}
		int <- Y[J[1]] - slope * X[J[1]]
		fit <- c(int, slope)
		yhat <- X %*% fit[2:nc] + fit[1]
		lines(X, yhat)	
	## get the domain of attraction for LTA concentration
		dom[i,  ] <- conc2(X, Y, start = fit)$coef
	}
	plot(X, Y)
	for(i in 1:k) {
		fit <- dom[i,  ]
		yhat <- X %*% fit[2:nc] + fit[1]
		lines(X, yhat)
	}
	title("b) The Corresponding Attractors")
}

bg2ci<-function(x, alpha = 0.05)
{
#gets BGse with middle n^0.8 cases for sample median and
#the corresponding robust  100 (1-alpha)% CI. This is optimal
#for estimating the SE but is not resistant.
	n <- length(x)
	up <- 1 - alpha/2
	med <- median(x)
	ln <- max(1,floor(n/2) - ceiling(0.5 * n^0.8))
	un <- n - ln
	rdf <- un - ln - 1
	cut <- qt(up, rdf)
	d <- sort(x)
	se2 <- (d[un] - d[ln])/(2 * n^0.3)
	rval <- cut * se2
	rlo2 <- med - rval
	rhi2 <- med + rval	
	#got low and high endpoints of robust CI
	list(int = c(rlo2, rhi2), med = med, se2 = se2)
}

cav<-function(alpha = 0.01, k = 5)
{
#gets n(asy var) for the alpha trimmed mean
#and T_(A,n)(k) if errors are Cauchy(0,1)
	z <- tan(pi * (alpha - 0.5))
	val <- (z - atan(z))/((1 - 2 * alpha) * atan(z))
	ntmav <- val + (2 * alpha * (tan(pi * (alpha - 0.5)))^2)/(1 - 2 * alpha
		)^2
	zj <- k
	alphaj <- 0.5 + atan( - k)/pi
	alphaj <- ceiling(100 * alphaj)/100
	zj <- tan(pi * (alphaj - 0.5))
	val <- (zj - atan(zj))/((1 - 2 * alphaj) * atan(zj))
	natmav <- val + (2 * alphaj * (tan(pi * (alphaj - 0.5)))^2)/(1 - 2 *
		alphaj)^2
	return(ntmav, natmav)
}

cci<-function(x, alpha = 0.05)
{
#gets classical  100 (1-alpha)% CI
#defaults are alpha = .05
	n <- length(x)
	up <- 1 - alpha/2
	mn <- mean(x)
	v <- var(x)
	se <- sqrt(v/n)
	val <- qt(up, n - 1) * se
	lo <- mn - val
	hi <- mn + val
	list(int = c(lo, hi), mean = mn, se = se)
}

cgci<-function(x, alpha = 0.05, ks = 3.5)
{
#gets T_S,n with a coarse grid
# and the corresponding robust  100 (1-alpha)% CI
	n <- length(x)
	up <- 1 - alpha/2
	med <- median(x)
	madd <- mad(x, constant = 1)
	d <- sort(x)	##get robust T_S,n CI
	lo <- sum(x < (med - ks * madd))
	hi <- sum(x > (med + ks * madd))
	tp <- max(hi, lo)/n
	if(tp == 0)
		tp <- 0
	if(tp > 0 && tp <= 0.01)
		tp <- 0.01
	if(tp > 0.01 && tp <= 0.1)
		tp <- 0.1
	if(tp > 0.1 && tp <= 0.25)
		tp <- 0.25
	if(tp > 0.25 && tp <= 0.4)
		tp <- 0.4
	if(tp > 0.4)
		tp <- 0.49
	tstmn <- mean(x, trim = tp)	
	#have obtained the two stage trimmed mean
	ln <- floor(n * tp)
	un <- n - ln
	if(ln > 0) {
		d[1:ln] <- d[(ln + 1)]
		d[(un + 1):n] <- d[un]
	}
	den <- ((un - ln)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- un - ln - 1
	rval <- qt(up, rdf) * sqrt(swv/n)
	tslo <- tstmn - rval
	tshi <- tstmn + rval	
	##got low and high endpoints of robust T_S,n CI
	list(int = c(tslo, tshi), tp = tp)
}


cltv<-
function(gam = 0.5)
{
# Gets asy var for lts(h) and lta(h)at Cauchy C(0,1)
# where h/n -> gam.
	k <- tan((pi * gam)/2)
	num <- 2 * k - pi * gam
	den <- pi * (gam - (2 * k)/(pi * (1 + k^2)))^2
	ltsv <- num/den
	num <- gam
	den <- 4 * (1/pi - 1/(pi * (1 + k^2)))^2
	ltav <- num/den
	return(ltsv, ltav)
}

cmba2<-
function(x, csteps = 5, ii = 1)
{
# gets the covmba estimator using 98, 95, 90, 80, 70, 60 and 50% trimming
	n <- dim(x)[1]
	p <- dim(x)[2]
	mds <- matrix(nrow = n, ncol = 8, 0)	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)
	cmd <- sqrt(mahalanobis(x, mns, covs))	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	mds[, 8] <- sqrt(mahalanobis(x, mns, covs))
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	##get the resistant estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	smd2 <- sort(md2)
	val <- p + 3
	tem <- 1:7
	tem[1] <- smd2[val + floor(0.02 * n)]
	tem[2] <- smd2[val + floor(0.05 * n)]
	tem[3] <- smd2[val + floor(0.1 * n)]
	tem[4] <- smd2[val + floor(0.2 * n)]
	tem[5] <- smd2[val + floor(0.3 * n)]
	tem[6] <- smd2[val + floor(0.4 * n)]
	tem[7] <- median(md2)
	medd2 <- tem[7]
	for(j in ii:7) {
## get the start
		val2 <- tem[j]
		mns <- apply(x[md2 <= val2,  ], 2, mean)
		covs <- var(x[md2 <= val2,  ])	## concentrate
		for(i in 1:csteps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		mds[, j] <- sqrt(mahalanobis(x, mns, covs))
		plot(cmd, mds[, j])
		identify(cmd, mds[, j])
		crit <- prod(diag(chol(covs)))
		if(crit < critb) {
			critb <- crit
			covb <- covs
			mnb <- mns
		}
	}
	pairs(mds)	##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb
	list(center = mnb, cov = covb, mds = mds)
}

conc2<-
function(x, y, start = l1fit(x, y)$coef)
{   #Finds that LTA attractor of the start.
	nc <- dim(x)[2] + 1
	res <- y - (x %*% start[2:nc] + start[1])
	ares <- abs(res)
	cov <- ceiling(length(y)/2)
	m <- sort(ares, partial = cov)[cov]
	old <- sum(ares[ares <= m])
	new <- old - 1
	ct <- 0
	while(new < old) {
		ct <- ct + 1
		start <- l1fit(x[ares <= m,  ], y[ares <=
			m])$coef
		res <- y - (x %*% start[2:nc] + start[1
			])
		ares <- abs(res)
		m <- sort(ares, partial = cov)[cov]
		new <- sum(ares[ares <= m])	#print(old)
		if(new < old) {
			old <- new
			new <- new - 1
		}
	}
	list(coef = start, ct = ct)
}

concmv<-
function(n = 100, csteps = 5, gam = 0.4, outliers = TRUE, start = 2)
{
#Shows how concentration works when p = 2.
# Use start = 1 for DGK, start = 2 for MBA sphere, start = 3 for MBA MAD
	p <- 2	#A <- cbind(c(1, 0.9), c(0.9, 1))
	x <- matrix(rnorm(n * p), ncol = p, nrow = n)	#A <- diag(sqrt(1:p))
#if(outliers == T) {
# val <- floor(gam * n)
# tem <- 10 + 0 * 1:p
# x[1:val,  ] <- x[1:val,  ] + tem
#}
#x <- x %*% A
	A <- cbind(c(1, 0.4), c(0.4, 1))
	B <- cbind(c(0.5, 0), c(0, 0.5))
	if(outliers == T) {
		val <- floor(gam * n)
		x[(val + 1):n,  ] <- x[(val + 1):n,  ] %*% A
		x[1:val,  ] <- x[1:val,  ] %*% B
		x[1:val, 1] <- x[1:val, 1] + 0
		x[1:val, 2] <- x[1:val, 2] + 6
	}
	else {
		x <- x %*% A
	}
	if(start == 1) {
		covs <- var(x)
		mns <- apply(x, 2, mean)
	}
	if(start == 2) {
		covv <- diag(p)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	if(start >= 2) {
		tem <- apply(x, 2, mad)^2
		covv <- diag(tem)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
		plot(x[, 1], x[, 2])
		points(x[md2 <= medd2, 1], x[md2 <= medd2, 2], pch = 15)
		identify(x[, 1], x[, 2])
	}
}

concsim<-
function(n = 100, p = 2, steps = 5, gam = 0.4, runs = 20)
{
# This Splus function is used to determine when the DD
# plot separates outliers from non-outliers for various starts.
	A <- sqrt(diag(1:p))
	mbact <- 0
	fmcdct <- 0
	mbct <- 0
	madct <- 0
	dgkct <- 0
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T
		val <- floor(gam * n)
		tem <- 10 + 0 * 1:p
		x[1:val,  ] <- x[1:val,  ] + tem
		x <- x %*% A	#MBA
		out <- covmba(x, csteps = steps)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
mbact <- mbact + 1	
	#DGK
		covs <- var(x)
		mns <- apply(x, 2, mean)	## concentrate
		for(i in 1:steps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		rd2 <- mahalanobis(x, mns, covs)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) dgkct <- dgkct + 1	
	#Median Ball start
		covv <- diag(p)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])	## concentrate
		for(i in 1:steps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		rd2 <- mahalanobis(x, mns, covs)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) mbct <- mbct + 1	
	#MAD start
		tem <- apply(x, 2, mad)^2
		covv <- diag(tem)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])	## concentrate
		for(i in 1:steps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		rd2 <- mahalanobis(x, mns, covs)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) madct <- madct + 1	
	#FMCD
		out <- cov.mcd(x)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			fmcdct <- fmcdct + 1
	}
	list(mbact = mbact, fmcdct = fmcdct, dgkct = dgkct, mbct = mbct, madct
		 = madct)
}

corrsim<-
function(n = 100, p = 3, eps = 0.4, nruns = 100, type = 1)
{
#For R, first type "library(lqs)" before using this function
# This function generates 100 n by p matrices x.
# The output is the 100 sample correlations between the MDi and RDi
# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for type = 3
# mahalanobis gives squared Maha distances
	corrs <- 1:nruns
	for(i in 1:nruns) {
		wt <- 0 * (1:n)
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	#The following 3 commands make x elliptically contoured.
#zu <- runif(n)
#x[zu < eps,] <- x[zu < eps,]*5
#x <- x^2
# To make marginals of x lognormal, use
#x <- exp(x)
		center <- apply(x, 2, mean)
		cov <- var(x)
		md2 <- mahalanobis(x, center, cov)
		if(type == 1) {
			out <- covmba(x)
		}
		if(type == 2) {
			out <- rmba(x)
		}
		if(type == 3) {
			out <- cov.mcd(x)
		}
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)	
	# need square roots for the usual distances
		md <- sqrt(md2)
		rd <- sqrt(rd2)
		const <- sqrt(qchisq(0.5, p))/median(rd)
		rd <- const * rd	
	# wt[rd < sqrt(qchisq(0.975, p))] <- 1
#  corrs[i] <- cor(md[wt > 0], rd[wt > 0])}
		corrs[i] <- cor(md, rd)
	}
	cmean <- mean(corrs)
	cmin <- min(corrs)
	clt95 <- sum(corrs < 0.95)
	clt80 <- sum(corrs < 0.8)
	list(cmean = cmean, cmin = cmin, clt95 = clt95, clt80 = clt80,
		corrs = corrs)
}


covdgk<-
function(x, csteps = 10)
{
#computes the scaled DGK multivariate estimator
	p <- dim(x)[2]
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2,
			mean)
		covs <- var(x[md2 <= medd2,  ])
	}
##scale for consistency at MVN
	rd2 <- mahalanobis(x, mns, covs)
	const <- median(rd2)/(qchisq(0.5, p))
	covs <- const * covs
	list(center = mns, cov = covs)
}

covmba <- function(x, csteps = 5)
{  # gets the MBA estimator
        zx <- x
        x <- as.matrix(x)
	p <- dim(x)[2]	
	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
              	md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
             if(p > 1){
		mns <- apply(x[md2 <= medd2,  ], 2,
			mean)
		covs <- var(x[md2 <= medd2,  ])
             }
             if(p == 1){
		mns <- mean(x[md2 <= medd2])
		covs <- var(x[md2 <= medd2])
             }
	}
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	
	##get the resistant estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
        if(p > 1){
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	
        }
        if(p == 1){
	mns <- mean(zx[md2 <= medd2])
	covs <- var(zx[md2 <= medd2])	
        }
        ## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
              if(p > 1){
		mns <- apply(x[md2 <= medd2,  ], 2,
			mean)
		covs <- var(x[md2 <= medd2,  ])
              }
              if(p == 1){
	        mns <- mean(zx[md2 <= medd2])
	        covs <- var(zx[md2 <= medd2])	
               }
	}
	crit <- prod(diag(chol(covs)))
	if(crit < critb) {
		critb <- crit
		covb <- covs
		mnb <- mns
	}
##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb
	list(center = mnb, cov = covb)
}

covmba2<-
function(x, csteps = 5)
{  # gets the MBA estimator, use covmba2 instead of covmba if p > 1
	p <- dim(x)[2]	
	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2,
			mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	
	##get the resistant estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2,
			mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	crit <- prod(diag(chol(covs)))
	if(crit < critb) {
		critb <- crit
		covb <- covs
		mnb <- mns
	}
##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb
	list(center = mnb, cov = covb)
}

covsim2<-
function(n=100, p = 2, steps = 5, gam = 0.4, runs = 20)
{
# This Splus function is used to determine when the DD
# plot separates outliers from non-outliers.
	A <- sqrt(diag(1:p))
	mbact <- 0
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T
		val <- floor(gam * n)
		tem <- 10 + 0 * 1:p
		x[1:val,  ] <- x[1:val,  ] + tem
		x <- x %*% A
		out <- covmba(x, csteps = steps)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			mbact <- mbact + 1
	}
	list(mbact = mbact)
}

ctrviews<-
function(x, Y, ii = 1)
{
# Uses classical distances instead of robust distances.
# Trimmed views for 90, 80, ... 0 percent
# trimming. Allows visualization of m
# and crude estimatation of c beta in models
# of the form y = m(x^T beta) + e.
# Workstation: activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button.
# In R, highight "stop."
	x <- as.matrix(x)
	center <- apply(x, 2, mean)
	cov <- var(x)
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% bhat[-1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(bhat)
	}
}

ddcomp<-
function(x, steps = 5)
{
# Makes 4 DD plots using the FMCD and MBA estimators.
# Click left mouse button to identify points.
# Click right mouse button to end the function.
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
# R users need to type "library(lqs)" before using.
	p <- dim(x)[2]
	par(mfrow = c(2, 2))
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)	
	# MD is the classical and RD the robust distance
	MD <- sqrt(md2)	#DGK start
	md2 <- mahalanobis(x, center, cov)
	medd2 <- median(md2)	## get the start
	mns <- center
	covs <- cov	## concentrate
	for(i in 1:steps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	rd2 <- mahalanobis(x, mns, covs)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDdgk <- const * rd
	plot(MD, RDdgk)
	abline(0, 1)
	identify(MD, RDdgk)
	title("DGK DD Plot")	#MBA
	out <- covmba(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the identity line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDm <- const * rd
	plot(MD, RDm)
	abline(0, 1)
	identify(MD, RDm)
	title("MBA DD Plot")	#FMCD
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDf <- const * rd
	plot(MD, RDf)
	abline(0, 1)
	identify(MD, RDf)
	title("FMCD DD Plot")	#Median Ball start
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:steps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	rd2 <- mahalanobis(x, mns, covs)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDmb <- const * rd
	plot(MD, RDmb)
	abline(0, 1)
	identify(MD, RDmb)
	title("Med Ball DD Plot")
}

ddmv<-
function(n = 100, p = 2, steps = 5, gam = 0.4,
	outtype = 2, est = 1)
{
# This Splus function is used to determine when the DD
# plot separates outliers from non-outliers for various starts.
# Workstation needs to activate a graphics
# device with the command "X11()" or "motif()."
# Advance the view with the right mouse button.
## est = 1  for DGK, 2 for median ball, 3 for MAD
	A <- sqrt(diag(1:p))
	x <- matrix(rnorm(n * p), ncol = p, nrow
		 = n)
	val <- floor(gam * n)
	tem <- 10 + 0 * 1:p
	x[1:val,  ] <- x[1:val,  ] + tem	
	#if outtype = 1, outliers are Np(10 1, Ip) nonoutliers Np(0,Ip)
	if(outtype == 2) x <- x %*% A	
	## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T
## get the start
	if(est == 1) {
#DGK classical start
		covs <- var(x)
		mns <- apply(x, 2, mean)
	}
	if(est == 2) {
#Median Ball high breakdown start
		covv <- diag(p)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center =
			med, covv)
		medd2 <- median(md2)	
	## get the start
		mns <- apply(x[md2 <= medd2,  ],
			2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	if(est == 3) {
#MAD high breakdown start
		tem <- apply(x, 2, mad)^2
		covv <- diag(tem)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center =
			med, covv)
		medd2 <- median(md2)	
	## get the start
		mns <- apply(x[md2 <= medd2,  ],
			2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
## concentrate and plot, highlighting outliers
	MD <- sqrt(mahalanobis(x, mns, covs))
	for(i in 1:steps) {
		md <- sqrt(mahalanobis(x, mns,
			covs))
		medd <- median(md)
		mns <- apply(x[md <= medd,  ], 2,
			mean)
		covs <- var(x[md <= medd,  ])
		rd <- sqrt(mahalanobis(x, mns,
			covs))
		plot(MD, rd)
		points(MD[1:val], rd[1:val], pch
			 = 15)
		identify(MD, rd)
	}
}


ddplot<-
function(x)
{
# Makes a DD plot. cov.mcd is used for the RDi.
# Click left mouse button to identify points.
# Click right mouse button to end the function.
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
# R users need to type "library(lqs)" before using.
	p <- dim(x)[2]
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
	out <- cov.mcd(x)	# or use out <- cov.mve(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)	
	# md is the classical and rd the robust distance
	MD <- sqrt(md2)
	rd <- sqrt(rd2)	
	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RD <- const * rd
	plot(MD, RD)
	abline(0, 1)
	identify(MD, RD)	#  list(MD = MD, RD = RD)
}


ddsim<-
function(n = 100, p = 3, eps = 0.4, type = 1)
{
# R: type "library(lqs)" before using if type = 3.
# Rapidly plots 20 DD plots in a row.
# Unix: type "X11()" or "motif()" to
# turn on a graphics device.
# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for type = 3
	med <- 1:20
	for(i in 1:20) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	## For elliptically contoured data, use:
#zu <- runif(n)
#x[zu < eps,] <- x[zu < eps,]*5
#x <- x^2
##For lognormal marginals, add:
#x <- exp(x)
		center <- apply(x, 2, mean)
		cov <- var(x)
		md2 <- mahalanobis(x, center, cov)
		if(type == 1) {
			out <- covmba(x)
		}
		if(type == 2) {
			out <- rmba(x)
		}
		if(type == 3) {
			out <- cov.mcd(x)
		}
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		md <- sqrt(md2)
		rd <- sqrt(rd2)	#Scale the RDi so plot follows 0-1 line
#if the data is multivariate normal.
		const <- sqrt(qchisq(0.5, p))/median(rd)
		rd <- const * rd
		plot(md, rd)
		abline(0, 1)
		med[i] <- median(md)	#The following command can be inserted
#to slow down the plots "identify(md,rd)"
	}
	list(med = med)
}


deav<-
function(alpha = 0.01, k = 5)
{
#gets n(asy var) for the alpha trimmed mean
#and T_(A,n)(k) if errors are DE(0,1)
	z <-  - log(2 * alpha)
	num <- 2 - (2 + 2 * z + z^2) * exp( - z)
	den <- (1 - exp( - z)) * (1 - 2 * alpha)
	val1 <- num/den
	num <- 2 * alpha * z^2
	den <- (1 - 2 * alpha)^2
	ntmav <- val1 + num/den
	zj <- k * log(2)
	alphaj <- 0.5 * exp( - zj)
	alphaj <- ceiling(100 * alphaj)/100
	zj <-  - log(2 * alphaj)
	num <- 2 - (2 + 2 * zj + zj^2) * exp( - zj)
	den <- (1 - exp( - zj)) * (1 - 2 * alphaj)
	val1 <- num/den
	num <- 2 * alphaj * zj^2
	den <- (1 - 2 * alphaj)^2
	natmav <- val1 + num/den
	return(ntmav, natmav)
}


deltv<-
function(gam = 0.5)
{
# Gets asy var for lts(h) and lta(h) at standard double exp
# where h/n -> gam.
	k <- -1 * log(1 - gam)
	num <- 2 - (2 + 2 * k + k^2) * exp( - k)
	den <- (gam - k * exp( - k))^2
	ltsv <- num/den
	ltav <- 1/gam
	return(ltsv, ltav)
}

diagplot<-
function(x, Y)
{
# Scatterplot matrix of OLS diagnostics.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
	n <- length(Y)
	rmat <- matrix(nrow = n, ncol = 7)
	out <- lsfit(x, Y)
	tem <- ls.diag(out)
	rmat[, 1] <- tem$cooks
	rmat[, 2] <- tem$hat
	rmat[, 3] <- tem$std.res
	rmat[, 4] <- tem$stud.res
	rmat[, 5] <- tem$dfits
	rmat[, 6] <- Y - out$resid
	rmat[, 7] <- Y
	pairs(rmat, labels = c("Cook's CD", "leverages", "stand resid",
		"stud resid", "DFFITS", "YHAT", "Y"))
}

ellipse<-
function(x, center = apply(x, 2, mean), cov = var(x),
	alph = 0.95)
{# Makes a covering interval. The x should have 2 columns.
	mu1 <- center[1]
	mu2 <- center[2]
	w <- solve(cov)
	w11 <- w[1, 1]
	w12 <- w[1, 2]
	w22 <- w[2, 2]
	tem <- x[, 2] - mu2
	y2 <- seq(min(tem), max(tem), length = 100)
	xc <- qchisq(alph, 2)
	el <- matrix(0, 2, 2)
	ind <- 0
	for(i in 1:100) {
		j1 <- (y2[i] * w12)^2
		j2 <- w11 * ((y2[i])^2 * w22 - xc)	
	# print(i)
# print(j1 - j2)
		if((j1 - j2) >= 0) {
			ind <- ind + 2
			tem <- (y2[i] * w12)^2
			tem <- tem - w11 * ((y2[i])^2 *
				w22 - xc)
			tem <- sqrt(tem)
			term <- ( - y2[i] * w12 + tem)/
				w11
			el <- rbind(el, c((term + mu1), (
				y2[i] + mu2)))
			term <- ( - y2[i] * w12 - tem)/
				w11
			el <- rbind(el, c((term + mu1), (
				y2[i] + mu2)))
		}
	}
	el <- el[3:ind,  ]
	nn <- dim(x)[1]
	if((ind - 2) > nn) {
		tem <- sample((ind - 2), nn)
		el <- el[tem,  ]
	}
	xt <- cbind(x[, 1], el[, 1])
	yt <- cbind(x[, 2], el[, 2])
	matplot(xt, yt)
}

essp<-
function(x, Y, M = 50)
{
# Trimmed view or ESSP for M percent
# trimming. Allows visualization of g
# and crude estimation of c beta in models
# of the form y = g(x^T beta,e).
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)."
# Click on the right mouse button to finish.
# In R, highlight "stop."
	x <- as.matrix(x)
        tval <- M/100
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	val <- quantile(rd2, (1 - tval))
	bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$
		coef
	ESP <- x %*% bhat[-1]
	plot(ESP, Y)
	identify(ESP, Y)
	return(bhat[-1])
}

ffL<-
function(x, y)
{
# for unix, use X11() to turn on the graphics device before using this function
# this function makes a FF lambda plot where the competing models are Y^L
	n <- length(y)
	rmat <- matrix(nrow = n, ncol = 5)
	rmat[, 1] <- y - lsfit(x, y)$resid
	ytem <- (y^(0.5) - 1)/0.5
	rmat[, 2] <- ytem - lsfit(x, ytem)$resid
	rmat[, 3] <- log(y) - lsfit(x, log(y))$resid
	ytem <- (y^(-0.5) - 1)/-0.5
	rmat[, 4] <- ytem - lsfit(x, ytem)$resid
	ytem <- (y^(-1) - 1)/-1
	rmat[, 5] <- ytem - lsfit(x, ytem)$resid
	pairs(rmat, labels = c("YHAT", "YHAT^(0.5)", "YHAT^(0)", "YHAT^(-0.5)",
		"YHAT^(-1)"))
	min(cor(rmat))
}

fflynx<-function(){
# R users need to type library(ts) and data(lynx)
Y <- log10(lynx)
FAR2 <- 1:114
FAR11 <- 1:114
FAR12 <- 1:114
SETAR272 <- 1:114
SETAR252 <- 1:114
for(i in 3:114){
FAR2[i ] <- 1.05 + 1.41*Y[i-1] -0.77*Y[i-2]}
for(i in 12:114){
FAR11[i ] <-  1.13*Y[i-1] -0.51*Y[i-2] + .23*Y[i-3] -0.29*Y[i-4]
 + .14*Y[i-5] -0.14*Y[i-6] + 0.08*Y[i-7] -0.04*Y[i-8]
 + .13*Y[i-9] + 0.19*Y[i-10] - .31*Y[i-11] }
for(i in 13:114){
FAR12[i ] <-  1.123 + 1.084*Y[i-1] -0.477*Y[i-2] + .265*Y[i-3] -0.218*Y[i-4]
 + .180*Y[i-9]  - .224*Y[i-12] }
for(i in 13:114){
if( Y[i-2] <= 3.116){
SETAR272[i ] <-  0.546  + 1.032*Y[i-1] -0.173*Y[i-2] + .171*Y[i-3] -0.431*Y[i-4]
 + .332*Y[i-5]  - .284*Y[i-6] + .210*Y[i-7]}
else {SETAR272[i ] <-  2.632  + 1.492*Y[i-1] -1.324*Y[i-2]}
}
for(i in 13:114){
if( Y[i-2] <= 3.05){
SETAR252[i ] <-  0.768  + 1.064*Y[i-1] -0.200*Y[i-2] + .164*Y[i-3] -0.428*Y[i-4]
 + .181*Y[i-5] }
else {SETAR252[i ] <-  2.254  + 1.474*Y[i-1] -1.202*Y[i-2]}
}
x <- cbind(Y,FAR2,FAR11,FAR12,SETAR272,SETAR252)
x <- x[13:114,]
print(cor(x))
pairs(x)
}


ffplot<-
function(x, y, nsamps = 7)
{
# For Unix, use X11() to turn on the graphics device before
# using this function. For R, first type library(lqs).
# Makes an FF plot with several resistant estimators.
# Need the program mbareg..
	n <- length(y)
	rmat <- matrix(nrow = n, ncol = 6)
	lsfit <- y - lsfit(x, y)$residuals
	print("got OLS")
	l1fit <- y - l1fit(x, y)$residuals
	print("got L1")
	almsfit <- y - lmsreg(x, y)$resid
	print("got ALMS")
	altsfit <- y - ltsreg(x, y)$residuals
	print("got ALTS")
	mbacoef <- mbareg(x, y, nsamp = nsamps)$coef
	MBAFIT <- mbacoef[1] + x %*% mbacoef[-1]
	print("got MBA")
	rmat[, 1] <- y
	rmat[, 2] <- lsfit
	rmat[, 3] <- l1fit
	rmat[, 4] <- almsfit
	rmat[, 5] <- altsfit
	rmat[, 6] <- MBAFIT
	pairs(rmat, labels = c("Y", "OLS Fit", "L1 Fit", "ALMS Fit",
	                       "ALTS Fit", "MBAREG Fit"))
}

ffplot2<-
function(x, y, nsamps = 7)
{
# For Unix, use X11() to turn on the graphics device before
# using this function. For R, first type library(lqs).
# Makes an FF plot with several resistiant estimators.
# Need the program mbareg.
	n <- length(y)
	rmat <- matrix(nrow = n, ncol = 5)
	lsfit <- y - lsfit(x, y)$residuals
	print("got OLS")
	almsfit <- y - lmsreg(x, y)$resid
	print("got ALMS")
	altsfit <- y - ltsreg(x, y)$residuals
	print("got ALTS")
	mbacoef <- mbareg(x, y, nsamp = nsamps)$coef
	MBAFIT <- mbacoef[1] + x %*% mbacoef[-1]
	print("got MBA")
	rmat[, 1] <- y
	rmat[, 2] <- lsfit
	rmat[, 3] <- almsfit
	rmat[, 4] <- altsfit
	rmat[, 5] <- MBAFIT
	pairs(rmat, labels = c("Y", "OLS Fit", "ALMS Fit", "ALTS Fit", "MBAREG Fit"))
}

fysim<-function( runs = 20)
{
# 20 FY plots for simulated AR(2) time series data
fycorr <- 1:runs
for(i in 1: runs){
Y <- ardata()$arts
out <- ar.yw(Y)
Yts <- Y[10:200]
FIT <- Yts - out$resid[10:200]
plot(FIT,Yts)
abline(0,1)
fycorr[i] <- cor(FIT,Yts)
}
list(fycorr=fycorr)
}

gamper<-
function(h, k=500)
{
	n <- 10000
	c <- 5000
	gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/
		h))) * 100
	print(gam0)
}

gamper2<-
function(p, k = 500)
{
##estimates the amount of contamination fmcd can tolerate
	n <- 10000
	c <- 5000
	h <- p + 1
	gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/h))) * 100
	print(gam0)
}


llrdata <- function(n = 100,  q=5)
{
# Generates data for loglinear regression.
#
	y <- 0 * 1:n
	beta <- 0 * 1:q
        beta[1:3] <- 1
        alpha <- -2.5
	x <- matrix(rnorm(n * q), nrow = n,
			ncol = q)
        x <- 0.5*x + 1
        SP <- alpha + x%*%beta
	y <- rpois(n,lambda=exp(SP))
        list(x=x,y=y)
}

llressp <- function(x,y)
{
# Makes the ESSP for loglinear regression.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the glm statement.
	q <- 5
# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = poisson)
	ESP <- x %*% out$coef[-1] + out$coef[1]
        Y <- y
        plot(ESP,Y)	
        abline(mean(y),0)
        fit <- y
        fit <- exp(ESP)
        indx <- sort.list(ESP)
        lines(ESP[indx],fit[indx])
        lines(lowess(ESP,y),type="s")
                        }

llrplot<-
function(x, y)
{
# Makes ESSP, the weighted forward response and residual plots for loglinear regression.
#
#   If q is changed, change the formula in the glm statement.
	q <- 5	# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family =
		poisson)
	ESP <- x %*% out$coef[-1] + out$coef[1]
	Y <- y
	par(mfrow = c(2, 2))
	plot(ESP, Y)
	abline(mean(y), 0)
	Ehat <- exp(ESP)
	indx <- sort.list(ESP)
	lines(ESP[indx], Ehat[indx])
	lines(lowess(ESP, y), type = "s")
	title("a) ESSP")
	Vhat <- (y - Ehat)^2
	plot(Ehat, Vhat)
	abline(0, 1)
	#abline(lsfit(Ehat, Vhat)$coef)
	title("b)")
	Z <- y
	Z[y < 1] <- Z[y < 1] + 0.5
	MWRES <- sqrt(Z) * (log(Z) - x %*% out$coef[-1] - out$coef[1])
	MWFIT <- sqrt(Z) * log(Z) - MWRES
	plot(MWFIT, sqrt(Z) * log(Z))
	abline(0, 1)
	#abline(lsfit(MWFIT, sqrt(Z) * log(Z))$coef)
	title("c) WFRP Based on MLE")
	plot(MWFIT, MWRES)
	title("d) WRP Based on MLE")
}

llrsim<-
function(n = 100, nruns = 1, type = 1)
{
# Runs llrpot 10 times on simulated LLR.
# Type = 1 for Poisson data, Type = 2 for negative binomial data
# Calls llrdata, oddata, llrplot.
	q <- 5
	for(i in 1:nruns) {
		if(type == 1)
			out <- llrdata(n, q)
		else out <- oddata(n, q)
		x <- out$x
		y <- out$y
		llrplot(x, y)	#identify(MWFIT, MWRES)
	}
}

llrwtfrp <- function(x,y)
{
# Makes the weighted forward response and residual plots for loglinear regression.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."

#
#   If q is changed, change the formula in the glm statement.
	q <- 5
# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = poisson)
	ESP <- x %*% out$coef[-1] + out$coef[1]
        Z <- y
        Z[y<1] <- Z[y<1] + 0.5
        out2<-lsfit(x,y=log(Z),wt=Z)
        #WRES <- sqrt(Z)*(log(Z) - x%*%out2$coef[-1] - out2$coef[1])
        WRES <- out2$res
        WFIT <- sqrt(Z)*log(Z) - WRES
        MWRES <- sqrt(Z)*(log(Z) - x%*%out$coef[-1] - out$coef[1])
        MWFIT <- sqrt(Z)*log(Z) - MWRES
        par(mfrow=c(2,2))
        plot(WFIT,sqrt(Z)*log(Z))
        abline(0,1)
        title("a) Weighted Forward Response Plot")
        plot(WFIT,WRES)
        title("b) Weighted Residual Plot")
        plot(MWFIT,sqrt(Z)*log(Z))
        abline(0,1)
        title("c) WFRP Based on MLE")
        plot(MWFIT,MWRES)
        title("d) WRP Based on MLE")
                                }

lmsviews<-
function(x, Y, ii = 1)
{
# Trimmed views using lmsreg for 90, 80, ... 0 percent
# trimming.   Allows visualization of  m
# and crudely estimation of  c beta in models
# of the form y = m(x^T beta) + e.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button and
# in R, highight "stop."
	x <- as.matrix(x)
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		b <- lmsreg(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% b[-1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(b)
	}
}

lrdata <- function(n = 200,  type = 3)
{
# Generates data for logistic regression.
# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = -0.5 ||mu_1||^2.
#
#   If q is changed, change the formula in the glm statement.
	q <- 5
	y <- 0 * 1:n
	y[(n/2 + 1):n] <- y[(n/2 + 1):n] + 1
	beta <- 0 * 1:q
        if(type == 1) {
		beta[1] <- 1
                alpha <- -0.5
	}
	if(type == 2) {
		beta <- beta + 1
                alpha <- -q/2
	}
        if(type == 3) {
		beta[1:3] <-  1
                alpha <- -1.5
	}
        x <- matrix(rnorm(n * q), nrow = n,
			ncol = q)
	if(type == 1) {
		x[(n/2 + 1):n, 1] <- x[(n/2 + 1
				):n, 1] + 1
		}
	if(type == 2) {
		x[(n/2 + 1):n,  ] <- x[(n/2 + 1
				):n,  ] + 1
		}
        if(type == 3) {
		x[(n/2 + 1):n, 1:3 ] <- x[(n/2 + 1
				):n, 1:3 ] + 1
		}
        #X|y=0 ~ N(0, I) and X|y=1 ~ N(beta,I)
	# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = binomial)
	list(alpha = alpha, beta = beta, lrcoef = out$coef,x=x,y=y)
}

lressp <- function(x,y,slices=10)
{
# Makes the ESSP for logistic regression.
# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = ||mu_1||^2.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button.
# In R, highlight "stop."
#
#   If q is changed, change the formula in the glm statement.
	q <- 5
# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = binomial)
	ESP <- x %*% out$coef[-1] + out$coef[1]
        Y <- y
        plot(ESP,Y)	
        abline(mean(y),0)
        fit <- y
        fit <- exp(ESP)/(1 + exp(ESP))
      #  lines(sort(ESP),sort(fit))
        indx <- sort.list(ESP)
        lines(ESP[indx],fit[indx])
        fit2 <- fit
        n <- length(y)
        val <- as.integer(n/slices)
        for(i in 1: (slices-1)){
          fit2[((i-1)*val+1):(i*val)] <- mean(y[indx[((i-1)*val+1):(i*val)]])
        }
        fit2[((slices-1)*val+1):n] <- mean(y[indx[((slices-1)*val+1):n]])
# fit2 is already sorted in order corresponding to indx
        lines(ESP[indx],fit2)
#list(fit2=fit2,n=n,slices=slices,val=val)
                }


lsviews<-
function(x, Y, ii = 1)
{
# This function is the same as tvreg except that the untrimmed
# cases are highlighted. It compares the LS fits for 90, 80,
# ..., 0 percent trimming. Used to visualize g if y = g(beta^T x,e).
# Workstation: activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button.
# In R, highlight ``stop."
	x <- as.matrix(x)
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- bhat[1] + x %*% bhat[-1]
		plot(ESP, Y)
		points(ESP[rd2 <= val], Y[rd2 <= val], pch = 15, cex = 1.4)
		abline(0, 1)
		title(labs[i])
		identify(ESP, Y)
		print(bhat)
	}
}

maha<-
function(x)
{
# Generates the classical mahalanobis distances.
	center <- apply(x, 2, mean)
	cov <- var(x)
	return(sqrt(mahalanobis(x, center, cov)))
}

mbalata<-
function(x, y, k=6, nsamp = 7)
{
#gets the median ball fit with 7 centers, med resid crit, 7 ball sizes
        x <- as.matrix(x)
	n <- dim(x)[1]
	q <- dim(x)[2]	
	# q + 1 is number of predictors including intercept
	vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 +
		floor(n/20), q + 3 + floor(n/10), q + 3 + floor(n/5), q +
		3 + floor(n/3), q + 3 + floor(n/2))
	covv <- diag(q)
	centers <- sample(n, nsamp)
	temp <- lsfit(x, y)
	mbaf <- temp$coef	## get LATA criterion
	res <- temp$residuals
	crit <- k^2*median(res^2)
	cn <- sum(res^2 <= crit)
	absres <- sort(abs(res))
	critf <- sum(absres[1:cn])	##
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		smd2 <- sort(md2)
		for(j in 1:7) {
			temp <- lsfit(x[md2 <= smd2[vals[j]],  ], y[md2 <=
				smd2[vals[j]]])	
	#Use OLS on rows with md2 <= cutoff = smd2[vals[j]]
			res <- y - temp$coef[1] - x %*% temp$coef[-1]	
	## get LATA criterion
			crit <- k^2*median(res^2)
			cn <- sum(res^2 <= crit)
			absres <- sort(abs(res))
			crit <- sum(absres[1:cn])	##
			if(crit < critf) {
				critf <- crit
				mbaf <- temp$coef
			}
		}
	}
	list(coef = mbaf, critf = critf)
}

mbamv<-
function(x, y, nsamp = 7)
{
# This function is for simple linear regression. The
# highlighted boxes get weight 1. Click on right
# mouse button to advance plot. Only uses 50% trimming.
        x <- as.matrix(x)
	n <- dim(x)[1]
	q <- dim(x)[2]
	covv <- diag(q)
	centers <- sample(n, nsamp)
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		med <- median(md2)
		plot(x, y)
		points(x[md2 < med], y[md2 < med], pch = 15)
                abline(lsfit(x[md2 < med],y[md2 < med]))
		identify(x, y)
	}
}

mbamv2<-
function(x, Y, nsamp = 7)
{
# This function is for multiple linear regression. The
# highlighted boxes get weight 1. Click on right
# mouse button to advance plot. Only uses 50% trimming.
        x <- as.matrix(x)
       	n <- dim(x)[1]
	q <- dim(x)[2]
	covv <- diag(q)
	centers <- sample(n, nsamp)
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		med <- median(md2)
                if(q ==1){out <- lsfit(x[md2 < med],Y[md2 < med])}
                else{out <- lsfit(x[md2 < med,],Y[md2 < med])}
                FIT <- out$coef[1] + x%*%out$coef[-1]
		RES <- Y - FIT
                par(mfrow=c(2,1))
                plot(FIT,Y)
		points(FIT[md2 < med], Y[md2 < med], pch = 15)
                abline(0,1)
		identify(FIT, Y)
                plot(FIT,RES)
                points(FIT[md2 < med], RES[md2 < med], pch = 15)
                abline(0,0)
		identify(FIT, RES)
	}
}

mbareg<-
function(x, y, nsamp = 7)
{
#gets the mbareg fit with 7 centers, med resid crit, 7 ball sizes
        x <- as.matrix(x)
	n <- dim(x)[1]
	q <- dim(x)[2]	# q + 1 is number of predictors including intercept
	vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 + floor(n/20
		), q + 3 + floor(n/10), q + 3 + floor(n/5), q + 3 + floor(n/3),
		q + 3 + floor(n/2))
	covv <- diag(q)
	centers <- sample(n, nsamp)
	temp <- lsfit(x, y)
	mbaf <- temp$coef
	critf <- median(temp$residuals^2)
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		smd2 <- sort(md2)
		for(j in 1:7) {
			temp <- lsfit(x[md2 <= smd2[vals[j]],  ], y[md2 <= smd2[
				vals[j]]])	
	#Use OLS on rows with md2 <= cutoff = smd2[vals[j]]
			res <- y - temp$coef[1] - x %*% temp$coef[-1]
			crit <- median(res^2)
			if(crit < critf) {
				critf <- crit
				mbaf <- temp$coef
			}
		}
	}
	list(coef = mbaf, critf = critf)
}

med2ci<-
function(x, cc = 4, alpha = 0.05)
{
#gets ~ 50% trimmed mean se for sample median and the corresponding robust  100 (1-alpha)% CI
#defaults are alpha = .05, cc = 5 may be better than the default
	up <- 1 - alpha/2
	n <- length(x)
	med <- median(x)
	ln <- floor(n/2) - ceiling(sqrt(n/cc))
	un <- n - ln
	low <- ln + 1
	d <- sort(x)
	if(ln > 0) {
		d[1:ln] <- d[(low)]
		d[(un + 1):n] <- d[un]
	}
	den <- ((un - ln)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- un - low
	rval <- qt(up, rdf) * sqrt(swv/n)
	rlo <- med - rval
	rhi <- med + rval
	list(int = c(rlo, rhi), med = med, swv = swv)
}

medci<-
function(x, alpha = 0.05)
{
#gets Bloch and Gastwirth SE for sample median and the corresponding resistant  100 (1-alpha)% CI
#defaults are alpha = .05
	n <- length(x)
	up <- 1 - alpha/2
	med <- median(x)
	ln <- floor(n/2) - ceiling(sqrt(n/4))
	un <- n - ln
	d <- sort(x)
	rdf <- un - ln - 1
	cut <- qt(up, rdf)
	sebg <- 0.5 * (d[un] - d[ln + 1])
	rval <- cut * sebg
	rlo <- med - rval
	rhi <- med + rval
	list(int = c(rlo, rhi), med = med, sebg = sebg)
}
MLRplot<-function(x, Y)
{
# Forward response plot and residual plot.
# R needs command "library(lqs)" if a robust estimator replaces lsfit.
# Advance the view with the right mouse button.
	x <- as.matrix(x)
	out <- lsfit(x, Y)
	cook <- ls.diag(out)$cooks
	n <- dim(x)[1]
	p <- dim(x)[2] + 1
	tem <- cook > min(0.5, (2 * p)/n)
	bhat <- out$coef
	FIT <- bhat[1] + x %*% bhat[-1]
	par(mfrow = c(2, 1))
	plot(FIT, Y)
	abline(0, 1)
	points(FIT[tem], Y[tem], pch = 15)
	identify(FIT, Y)
	title("Forward Response Plot")
	RES <- Y - FIT
	plot(FIT, RES)
	points(FIT[tem], RES[tem], pch = 15)
	identify(FIT, RES)
	title("Residual Plot")
}

mlrplot2 <- function(x, Y)
{
# Forward response plot and residual plot for two mbareg estimators.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)" if a robust estimator replaces lsfit.
# Advance the view with the right mouse button.
	x <- as.matrix(x)
	out <- mbareg(x, Y)
	bhat <- out$coef
	FIT <- bhat[1] + x %*% bhat[-1]
	par(mfrow = c(2, 2))
	plot(FIT, Y)
	abline(0, 1)
	identify(FIT, Y)
	title("MBA Forward Response Plot")
	RES <- Y - FIT
	plot(FIT, RES)
	identify(FIT, RES)
	title("MBA Residual Plot")
#
        out <- mbalata(x, Y)
	bhat <- out$coef
	FIT <- bhat[1] + x %*% bhat[-1]
	plot(FIT, Y)
	abline(0, 1)
	identify(FIT, Y)
	title("MBALATA Forward Response Plot")
	RES <- Y - FIT
	plot(FIT, RES)
	identify(FIT, RES)
	title("MBALATA Residual Plot")
}


mplot<-
function(x)
{
# Makes a DD plot only using the MDi, the RDi are not used.
	p <- dim(x)[2]
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
	md <- sqrt(md2)
	rd <- md
	const <- sqrt(qchisq(0.5, p))/median(rd)
	rd <- const * rd
	plot(md, rd)
	abline(0, 1)
	identify(md, rd)
}

nav<-
function(alpha = 0.01, k = 5)
{
#gets n(asy var) for the alpha trimmed mean
#and T_(A,n)(k) if errors are N(0,1)
	z <-  - qnorm(alpha)
	den <- 1 - (2 * z * dnorm(z))/(2 * pnorm(z) - 1
		)
	val <- den/(1 - 2 * alpha)
	ntmav <- val + (2 * alpha * z^2)/(1 - 2 * alpha
		)^2
	zj <- k * qnorm(0.75)
	alphaj <- pnorm( - zj)
	alphaj <- ceiling(100 * alphaj)/100
	zj <-  - qnorm(alphaj)
	den <- 1 - (2 * zj * dnorm(zj))/(2 * pnorm(zj) -
		1)
	val <- den/(1 - 2 * alphaj)
	natmav <- val + (2 * alphaj * zj^2)/(1 - 2 *
		alphaj)^2
	return(ntmav, natmav)
}

nltv<-
function(gam = 0.5)
{
# Gets asy var for lts(h) and lta(h) at standard normal
# where h/n -> gam.
	k <- qnorm(0.5 + gam/2)
	den <- gam - 2 * k * dnorm(k)
	ltsv <- 1/den
	tem <- (1 - exp( - (k^2)/2))^2
	ltav <- (2 * pi * gam)/(4 * tem)
	return(ltsv, ltav)
}

oddata<-
function(n = 100, q = 5, theta = 1)
{
# Generates overdispersion (negative binomial) data for loglinear regression.
#
	y <- 1:n
	pr <- 1/(1 + theta)
	beta <- 0 * 1:q
	beta[1:3] <- 1
	alpha <- -2.5
	x <- matrix(rnorm(n * q), nrow = n, ncol = q)
	x <- 0.5 * x + 1
	SP <- alpha + x %*% beta
	y <- rnbinom(n, size = ceiling(exp(SP)), pr)
	list(x = x, y = y)
}

pifclean<-
function(k, gam)
{
	p <- floor(log(3/k)/log(1 - gam))
	list(p = p)
}

piplot<-function(x, y, alpha = 0.05)
{
# For Unix, use X11() to turn on the graphics device before
# using this function.
# Makes an FY plot with prediction limits added.
	x <- as.matrix(x)
	p <- dim(x)[2] + 1
	n <- length(y)
	up <- 1:n
	low <- up
	out <- lsfit(x, y)
	tem <- ls.diag(out)
	lev <- tem$hat
	res <- out$residuals
	FIT <- y - res
	Y <- y
	corfac <- (1 + 15/n)*sqrt(n/(n - p))
	val2 <- quantile(res, c(alpha/2, 1 - alpha/2))	
	#get lower and upper PI limits for each case
	for(i in 1:n) {
		val <- sqrt(1 + lev[i])
		val3 <- as.single(corfac * val2[1] * val)
		val4 <- as.single(corfac * val2[2] * val)
		up[i] <- FIT[i] + val4
		low[i] <- FIT[i] + val3
	}
	zy <- c(min(low), Y, max(up))
	zx <- c(min(FIT), FIT, max(FIT))
        #change labels so plot labels are good
        ff <- FIT
        yy <- Y
        Y <- zy
        FIT <- zx
	plot(FIT, Y, type = "n")
	points(ff, yy)
	abline(0, 1)
	points(ff, up, pch = 17)
	points(ff, low, pch = 17)
}

pisim<-function(n = 100, q = 7, nruns = 100, alpha = 0.05, eps = 0.1, shift = 9, type
	 = 1)
{
# compares new and classical PIs for multiple linear regression
# if type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors
# constant = 1 so there are p = q+1 coefficients
	b <- 0 * 1:q + 1
	cpicov <- 0
	npicov <- 0
	acpicov <- 0
	opicov <- 0
	val3 <- 1:nruns
	val4 <- val3
	val5 <- val3
	pilen <- matrix(0, nrow = nruns, ncol = 4)
	coef <- matrix(0, nrow = nruns, ncol = q + 1)
	corfac <- (1 + 15/n) * sqrt(n/(n - q - 1))
	corfac2 <- sqrt(n/(n - q - 1))
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rnorm(1)
		}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rt(1, df = 3)
		}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rexp(1) - 1
		}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + runif(1, min = -1, max = 1)
		}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rnorm(1, sd = 1 + rbinom(1, 1, eps
				) * shift)
		}
		out <- lsfit(x, y)
		fres <- out$resid
		coef[i,  ] <- out$coef
		yfhat <- out$coef[1] + xf %*% out$coef[-1]
		w <- cbind(1, x)
		xtxinv <- solve(t(w) %*% w)
		xf <- c(1, xf)
		hf <- xf %*% xtxinv
		hf <- hf %*% xf
		val <- sqrt(1 + hf)	#get classical PI
		mse <- sum(fres^2)/(n - q - 1)
		val2 <- qt(1 - alpha/2, n - q - 1) * sqrt(mse) * val
		up <- yfhat + val2
		low <- yfhat - val2
		pilen[i, 1] <- up - low
		if(low < yf && up > yf) cpicov <- cpicov + 1	
	#get semiparametric PI
		val2 <- quantile(fres, c(alpha/2, 1 - alpha/2))
		val3[i] <- as.single(corfac * val2[1] * val)
		val4[i] <- as.single(corfac * val2[2] * val)
		up <- yfhat + val4[i]
		low <- yfhat + val3[i]
		pilen[i, 2] <- up - low
		if(low < yf && up > yf) npicov <- npicov + 1	
	# asymptotically conservative PI
		val6 <- corfac2 * max(abs(val2))
		val5[i] <- val6 * val
		up <- yfhat + val5[i]
		low <- yfhat - val5[i]
		pilen[i, 3] <- up - low
		if(low < yf && up > yf) acpicov <- acpicov + 1	
	# asymptotically optimal PI
		sres <- sort(fres)
		cc <- ceiling(n * (1 - alpha))
		rup <- sres[cc]
		rlow <- sres[1]
		olen <- rup - rlow
		if(cc < n) {
			for(j in (cc + 1):n) {
				zlen <- sres[j] - sres[j - cc + 1]
				if(zlen < olen) {
				  olen <- zlen
				  rup <- sres[j]
				  rlow <- sres[j - cc + 1]
				}
			}
		}
		up <- yfhat + corfac * val * rup
		low <- yfhat + corfac * val * rlow
		pilen[i, 4] <- up - low
		if(low < yf && up > yf)
			opicov <- opicov + 1
	}
	pimnlen <- apply(pilen, 2, mean)
	mnbhat <- apply(coef, 2, mean)
	lcut <- mean(val3)
	hcut <- mean(val4)
	accut <- mean(val5)
	cpicov <- cpicov/nruns
	npicov <- npicov/nruns
	acpicov <- acpicov/nruns
	opicov <- opicov/nruns
	list(mnbhat = mnbhat, pimenlen = pimnlen, cpicov = cpicov, npicov =
		npicov, acpicov = acpicov, opicov = opicov, lcut = lcut, hcut
		 = hcut, accut = accut)
}

ratmn<-
function(x, k1 = 6, k2 = 6)
{
#robust 2 stage asymmetically trimmed  mean
	madd <- mad(x, constant = 1)
	med <- median(x)
	LM <- sum(x < (med - k1 * madd))
	nmUM <- sum(x > (med + k2 * madd))
	n <- length(x)	
	# ll (hh) is the percentage to be trimmed to the left (right)
	ll <- ceiling((100 * LM)/n)
	hh <- ceiling((100 * (nmUM))/n)
	tem <- sort(x)
	ln <- floor((ll * n)/100)
	un <- floor((n * (100 - hh))/100)
	low <- ln + 1
	val1 <- tem[low]
	val2 <- tem[un]
	rtmn <- mean(x[(x >= val1) & (x <= val2)])
  trmn
}

rmaha<-
function(x)
{
# Produces robust Mahalanobis distances (scaled for normal data).
	p <- dim(x)[2]
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd <- mahalanobis(x, center, cov)
	const <- sqrt(qchisq(0.5, p))/median(rd)
	return(const * sqrt(rd))
}

robci <- function(x, alpha = 0.05, trmp = 0.25, ka = 6, ks = 3.5
	)
{
#Gets several robust  100 (1-alpha)% CI's for data x.
#defaults are alpha = .05
   n <- length(x)
   up <- 1 - alpha/2
   	med <- median(x)
	madd <- mad(x, constant = 1)
	d <- sort(x)
	dtem <- d	## get the CI for T_A,
	LM <- sum(x < (med - ka * madd))
	nmUM <- sum(x > (med + ka * madd))	
	# ll (hh) is the percentage to be trimmed to the left (right)
	ll <- ceiling((100 * LM)/n)
	hh <- ceiling((100 * (nmUM))/n)
	ln <- floor((ll * n)/100)
	un <- floor((n * (100 - hh))/100)
	low <- ln + 1
	val1 <- dtem[low]
	val2 <- dtem[un]
	tstmn <- mean(x[(x >= val1) & (x <= val2)])	
	#have obtained the two stage asymmetrically trimmed mean
	if(ln > 0) {
		d[1:ln] <- d[low]
	}
	if(un < n) {
		d[(un + 1):n] <- d[un]
	}
	den <- ((un - ln)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- un - low
	rval <- qt(up, rdf) * sqrt(swv/n)
	talo <- tstmn - rval
	tahi <- tstmn + rval	
	##got low and high endpoints of robust T_A,n CI
##get robust T_S,n CI
	d <- dtem
	lo <- sum(x < (med - ks * madd))
	hi <- sum(x > (med + ks * madd))
	low <- ceiling((100 * lo)/n)
	high <- ceiling((100 * hi)/n)
	tp <- min(max(low, high)/100, 0.5)
	tstmn <- mean(x, trim = tp)	
	#have obtained the two stage symetrically trimmed mean
	ln <- floor(n * tp)
	un <- n - ln
	if(ln > 0) {
		d[1:ln] <- d[(ln + 1)]
	}
	if(un < n) {
		d[(un + 1):n] <- d[un]
	}
	den <- ((un - ln)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- un - ln - 1
	rval <- qt(up, rdf) * sqrt(swv/n)
	tslo <- tstmn - rval
	tshi <- tstmn + rval	
	##got low and high endpoints of robust T_S,n CI
##get median CI that uses a scaled Winsorized variance
	d <- dtem
	lnbg <- floor(n/2) - ceiling(sqrt(n/4))
	unbg <- n - lnbg
	lowbg <- lnbg + 1
	if(lnbg > 0) {
		d[1:lnbg] <- d[(lowbg)]
	}
	if(unbg < n) {
		d[(unbg + 1):n] <- d[unbg]
	}
	den <- ((unbg - lnbg)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- unbg - lnbg - 1
	cut <- qt(up, rdf)
	rval <- cut * sqrt(swv/n)
	rlo <- med - rval
	rhi <- med + rval	
	##got median CI that uses a scaled Winsorized variance
##get BG CI
	se2 <- 0.5 * (d[unbg] - d[lowbg])
	rval <- cut * se2
	rlo2 <- med - rval
	rhi2 <- med + rval	
	#got low and high endpoints of BG CI
## get classical CI
	mn <- mean(x)
	v <- var(x)
	se <- sqrt(v/n)
	val <- qt(up, n - 1) * se
	lo <- mn - val
	hi <- mn + val	##got classical CI endpoints
## get trimmed mean CI
	d <- dtem
	ln <- floor(n * trmp)
	un <- n - ln
	trmn <- mean(x, trim = trmp)
	if(ln > 0) {
		d[1:ln] <- d[(ln + 1)]
	}
	if(un < n) {
		d[(un + 1):n] <- d[un]
	}
	den <- ((un - ln)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- un - ln - 1
	rval <- qt(up, rdf) * sqrt(swv/n)
	trlo <- trmn - rval
	trhi <- trmn + rval	
	##got trimmed mean CI endpoints
	list(tint = c(lo, hi), taint = c(talo, tahi),
		tsint = c(tslo, tshi), bgint = c(rlo2,
		rhi2), mint = c(rlo, rhi), trint = c(
		trlo, trhi))
}


rrplot<-
function(x, y, nsamps = 7)
{
# Makes an RR plot. Needs the mbareg function.
	n <- length(y)
	rmat <- matrix(nrow = n, ncol = 5)
	lsres <- lsfit(x, y)$residuals
	print("got OLS")
	l1res <- l1fit(x, y)$residuals
	print("got L1")
	almsres <- lmsreg(x, y)$resid
	print("got ALMS")
	altsres <- ltsreg(x, y)$residuals
	print("got ALTS")
	out <- mba$coef
	mbacoef <- mbareg(x, y, nsamp = nsamps)$coef
	MBARES <- y - mbacoef[1] - x %*% mbacoef[-1]
	print("got MBA")
	rmat[, 1] <- lsres
	rmat[, 2] <- l1res
	rmat[, 3] <- almsres
	rmat[, 4] <- altsres	
	rmat[, 5] <- MBARES
	pairs(rmat, labels = c("OLS residuals",
		"L1 residuals", "ALMS residuals",
		"ALTS residuals", "MBA residuals"))
}

rrplot2<-
function(x, y, nsamps = 7)
{
# Makes an RR plot. Needs the mbareg function.
	n <- length(y)
	rmat <- matrix(nrow = n, ncol = 4)
	lsres <- lsfit(x, y)$residuals
	print("got OLS")
	almsres <- lmsreg(x, y)$resid
	print("got ALMS")
	altsres <- ltsreg(x, y)$residuals
	print("got ALTS")
	out <- mba$coef
	mbacoef <- mbareg(x, y, nsamp = nsamps)$coef
	MBARES <- y - mbacoef[1] - x %*% mbacoef[-1]
	print("got MBA")
	rmat[, 1] <- lsres
	rmat[, 2] <- almsres
	rmat[, 3] <- altsres
	rmat[, 4] <- MBARES
	pairs(rmat, labels = c("OLS residuals",
		 "ALMS residuals",
		"ALTS residuals", "MBA residuals"))
}

rstmn<-
function(x, k1 = 5, k2=5)
{
#robust symmetically trimmed 2 stage mean
#truncates too many cases when the contamination is asymmetric
	madd <- mad(x, constant = 1)
	med <- median(x)
	LM <- sum(x < (med - k1 * madd))
	nmUM <- sum(x > (med + k2 * madd))
	n <- length(x)	#ll (hh) is the percentage trimmed to the left (right)
# tp is the trimming proportion
	ll <- ceiling((100 * LM)/n)
	hh <- ceiling((100 * nmUM)/n)
	tp <- min(max(ll, hh)/100, 0.5)
	mean(x, trim = tp)
}

sir<-
function(x, y, h)
{
#   Obtained from STATLIB. Contributed by Thomas Koetter.
#   Calculates the effective dimension-reduction (e.d.r.)
#   directions by Sliced Inverse Regression (K.C. Li 1991, JASA 86, 316-327)
#
#  Input:   x     n x p matrix, explanatory variable
#           y     n x 1 vector, dependent variable
#           h     scalar:  if h >=  2   number of slices
#                          if h <= -2   number of elements within a slice
#                          0 < h < 1    width of a slice:  h = slicewidth /
# range
#
#  Output:  list(edr, evalues)
#           edr      p x p matrix, estimates for the e.d.r. directions
#           evalues  p x 1 vector, the eigenvalues to the directions
#
# written by Thomas Koetter (thomas@wiwi.hu-berlin.de) 1995
# last modification: 7/18/95
# based on the implementation in XploRe
# a full description of the XploRe program can be found in (chapter 11)
# 'XploRe: An interactive statistical computing environment',
#  W. Haerdle, S. Klinke, B.A. Turlach, Springer, 1995
#
# This software can be freely used for non-commercial purposes and freely
# distributed.
#+-----------------------------------------------------------------------------+
#|  Thomas Koetter                                                             |
#|  Institut fuer Statistik und Oekonometrie                                   |
#|  Fakultaet Wirtschaftswissenschaften                                        |
#|  Humboldt-Universitaet zu Berlin, 10178 Berlin, GERMANY                     |
#+-----------------------------------------------------------------------------+
#|  Tel. voice:   +49 30  2468-321                                             |
#|  Tel. FAX:     +49 30  2468-249                                             |
#|  E-mail:       thomas@wiwi.hu-berlin.de                                     |
#+-----------------------------------------------------------------------------+
	n <- nrow(x)
	ndim <- ncol(x)
	if(n != length(c(y))) {
		stop("length of y doesn't match to number of rows of x !!")
	}
	if( - h > n) {
		stop("Number of elements within slices can't exceed number of data !!"
			)
	}
# stanardize the x variable to z (mean 0 and cov I)
	xb <- apply(x, 2, mean)
	si2 <- solve(chol(var(x)))
	xt <- (x - matrix(xb, nrow(x), ncol(x), byrow = T)) %*% si2	
	# sort the data regarding y. x values are now packed into slices
	ord1 <- order(y)
	data <- cbind(y[ord1], xt[ord1,  ])	# determine slicing strategy
	if(h <= -2) {
# abs(h) is number of elements per slice
		h <- abs(h)
		ns <- floor(n/h)
		condit <- 1:n
		choice <- (1:ns) * h	
	# if there are observations left, add them to the first and last slice
		if(h * ns != n) {
			hk <- floor((n - h * ns)/2)
			choice <- choice + hk
			choice[ns] <- n	# to aviod numerical problems
		}
	}
	else if(h >= 2) {
# h is number of slices
		ns <- h
		slwidth <- (data[n, 1] - data[1, 1])/ns
		slend <- seq(data[1, 1] + slwidth, length = ns, by = slwidth)
		slend[ns] <- data[n, 1]
		condit <- c(data[, 1])
		choice <- slend
	}
	else if((0 < h) && (h < 1)) {
# h is widht of a slice divides by the range of y
		ns <- floor(1/h)
		slwidth <- (data[n, 1] - data[1, 1]) * h
		slend <- seq(data[1, 1] + slwidth, length = ns, by = slwidth)
		slend[ns] <- data[n, 1]	# to aviod numerical problems
		condit <- c(data[, 1])
		choice <- slend
	}
	else stop("values of third parameter not valid")
	v <- matrix(0, ndim, ndim)	# estimate for Cov(E[z|y])
	ind <- rep(T, n)	# index for already sliced elements
	ndim <- ndim + 1
	j <- 1	# loop counter
	while(j <= ns) {
		sborder <- (condit <= choice[j]) & ind	# index of slice j
		if(any(sborder)) {
# are there elements in slice j ?
			ind <- ind - sborder
			xslice <- data[sborder, 2:ndim]
			if(sum(sborder) == 1) {
# xslice is a vector !
				xmean <- xslice
				v <- v + outer(xmean, xmean, "*")
			}
			else {
				xmean <- apply(xslice, 2, mean)
				v <- v + outer(xmean, xmean, "*") * nrow(xslice
				  )
			}
		}
		j <- j + 1
	}
	if(any(ind)) {
		print("Error:  elements unused !!")
		print(ind)
	}
	v <- (v + t(v))/(2 * n)	# to prevent numerical errors (v is symmetric)
	eig <- eigen(v)
	b <- si2 %*% eig$vectors	# estimates for e.d.r. directions
	data <- sqrt(apply(b * b, 2, sum))
	b <- t(b)/data
	return(list(edr = t(b), evalues = eig$values))
}

sirviews<-
function(x, Y, ii = 1)
{
# Uses the function "sir" from STATLIB.
# Trimmed views for 90, 80, ... 0 percent
# trimming. Allows visualization of m
# and crude estimation of c beta in models
# of the form y = m(x^T beta) + e.
# beta is obtained from SIR.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button.
# In R, highlight "stop."
	x <- as.matrix(x)
        q <- dim(x)[2]
	out <- cov.mcd(x)	# or use out <- cov.mve(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	h <- q + 7
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		b <- sir(x[rd2 <= val,  ], Y[rd2 <= val], h)$edr[, 1]
		ESP <- x %*% b
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(b)
	}
}

stmci<-
function(x, alpha = 0.05, ks = 3.5)
{
#gets se for sample median and the corresponding robust  100 (1-alpha)% CI
#defaults are alpha = .05
	n <- length(x)
	up <- 1 - alpha/2
	med <- median(x)
	madd <- mad(x, constant = 1)
	lo <- sum(x < (med - ks * madd))
	hi <- sum(x > (med + ks * madd))
	low <- ceiling((100 * lo)/n)
	high <- ceiling((100 * hi)/n)
	tp <- min(max(low, high)/100, 0.5)
	tstmn <- mean(x, trim = tp)	
	#have obtained the two stage symetrically trimmed mean
	ln <- floor(n * tp)
	un <- n - ln
	d <- sort(x)
	if(ln > 0) {
		d[1:ln] <- d[(ln + 1)]
		d[(un + 1):n] <- d[un]
	}
	den <- ((un - ln)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- un - ln - 1
	rval <- qt(up, rdf) * sqrt(swv/n)
	tslo <- tstmn - rval
	tshi <- tstmn + rval
	list(int = c(tslo, tshi), tp = tp)
}

symviews<-
function(x, Y)
{
# Makes trimmed views for 90, 80, ..., 0
# percent trimming and sometimes works even if m
# is symmetric about E(x^t beta) where
# y = m(x^T beta ) + e.
# For work stations, activate a graphics
# device with command "X11()" or "motif()."
# For R, use "library(lqs)."
# Use the rightmost mouse button to advance
# the view. In R, highlight ``stop."
	x <- as.matrix(x)
	tem <- seq(0.1, 1, 0.1)
	bols <- lsfit(x, Y)$coef
	fit <- x %*% bols[-1]
	temx <- x[fit > median(fit),  ]
	temy <- Y[fit > median(fit)]
	out <- cov.mcd(temx)	# or use out <- cov.mve(temx)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(temx, center, cov)
	for(i in 1:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(temx[rd2 <= val,  ], temy[rd2 <= val])$coef
		ESP <- x %*% bhat[-1]
		plot(ESP, Y)
		identify(ESP, Y)
		print(bhat)
	}
}


tmci<-
function(x, alpha = 0.05, tp = 0.25)
{
#gets se for the tp trimmed mean and the corresponding robust  100 (1-alpha)% CI
#defaults are alpha = .05
	n <- length(x)
	up <- 1 - alpha/2
	tmn <- mean(x, trim = tp)
	ln <- floor(n * tp)
	un <- n - ln
	d <- sort(x)
	if(ln > 0) {
		d[1:ln] <- d[(ln + 1)]
			d[(un + 1):n] <- d[un]
	}
	den <- ((un - ln)/n)^2
	swv <- var(d)/den	
	#got the scaled Winsorized variance
	rdf <- un - ln - 1
	rval <- qt(up, rdf) * sqrt(swv/n)
	tmlo <- tmn - rval
	tmhi <- tmn + rval
	list(int = c(tmlo, tmhi), tp = tp)
}

Tplt<-
function(x, y)
{
# For Unix, use X11() to turn on the graphics device before using this function.
# This function plots y^L vs OLS fit. If plot is linear for L, use y^L instead of y.
# This is a graphical method for a response transform.
	olsfit <- y - lsfit(x, y)$resid
	lam <- c(-1, -2/3, -1/2, -1/3, -1/4, 0, 1/4, 1/
		3, 1/2, 2/3, 1)
	xl <- c("Y**(-1)", "Y**(-2/3)", "Y**(-0.5)",
		"Y**(-1/3)", "Y**(-1/4)", "LOG(Y)",
		"Y**(1/4)", "Y**(1/3)", "Y**(1/2)",
		"Y**(2/3)", "Y")
	for(i in 1:length(lam)) {
		if(lam[i] == 0)
			ytem <- log(y)
		else if(lam[i] == 1)
			ytem <- y
		else ytem <- (y^lam[i] - 1)/lam[i]
		plot(olsfit, ytem, xlab = "YHAT", ylab
			 = xl[i])
		abline(lsfit(olsfit, ytem)$coef)
		identify(olsfit, ytem)
	}
}

trviews<-
function(x, Y, ii = 1)
{
# Trimmed views for 90, 80, ... 0 percent
# trimming.   Increase ii if 90% trimming is too harsh.
# Allows visualization of  m and crudely estimation of
# c beta in models of the form y = m(x^T beta) + e.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button and
# in R, highight "stop."
        x <- as.matrix(x)
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%",
     "20%","10%","0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		b <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% b[-1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(b)
	}
}

tvreg<-
function(x, Y, ii = 1)
{
# Trimmed views (TV) regression for 90, 80, ..., 0 percent
# trimming.  Increase ii if 90% trimming is too harsh.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button and
# in R, highight "stop."
        x <- as.matrix(x)
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%",
		"40%", "30%", "20%", "10%", "0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		b <- lsfit(x[rd2 <= val,  ], Y[rd2 <=
			val])$coef
		FIT <- x %*% b[-1] + b[1]
		plot(FIT, Y)
		abline(0, 1)
		title(labs[i])
		identify(FIT, Y)
		print(b)
	}
}

tvreg2<-
function(X, Y, M = 0)
{
# Trimmed views regression for M percent trimming.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(lqs)."
        X <- as.matrix(X)
	out <- cov.mcd(X)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(X, center, cov)
	tem <- (100 - M)/100
	val <- quantile(rd2, tem)
	b <- lsfit(X[rd2 <= val,  ], Y[rd2 <= val])$coef
	FIT <- X %*% b[-1] + b[1]
	plot(FIT, Y)
	abline(0, 1)
	identify(FIT, Y)
	list(coef = b)
}


wddplot<-
function(x)
{# Shows the southwest corner of the DD plot.
	n <- dim(x)[1]
	wt <- 0 * (1:n)
	p <- dim(x)[2]
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	md <- sqrt(md2)
	rd <- sqrt(rd2)
	const <- sqrt(qchisq(0.5, p))/median(rd)
	rd <- const * rd
	wt[rd < sqrt(qchisq(0.975, p))] <- 1
	MD <- md[wt > 0]
	RD <- rd[wt > 0]
	plot(MD, RD)
}

skipcov<-function(m,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,STAND=FALSE){
#
# m is an n by p matrix
#
# Compute skipped covariance matrix
#
# op=1:
# Eliminate outliers using a projection method
# That is, first determine center of data using:
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
#  cop=4 uses MVE center
#  cop=5 uses TBS
#  cop=6 uses rmba (Olive's median ball algorithm)
#
# For each point
# consider the line between it and the center,
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
#
# op=2 use mgv (function outmgv) method to eliminate outliers
#
# Eliminate any outliers and compute means
#  using remaining data.
# mgv.op=0, mgv uses all pairwise distances to determine center of the data
# mgv.op=1 uses MVE
# mgv.op=2 uses MCD
#
temp<-NA
m<-elimna(m)
m<-as.matrix(m)
if(op==2)temp<-outmgv(m,plotit=FALSE,op=mgv.op)$keep
if(op==1)temp<-outpro(m,plotit=FALSE,MM=MM,cop=outpro.cop,STAND=STAND)$keep
val<-var(m[temp,])
val
}

hc4wtest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,xout=FALSE,outfun=outpro,...){
#
# Test the hypothesis that all OLS slopes are zero
# using HC4 wild bootstrap using wald test.
#
# This function calls the functions
# olshc4 and
# lstest4
#
if(SEED)set.seed(2)
x<-as.matrix(x)
# First, eliminate any rows of data with missing values.
temp <- cbind(x, y)
        temp <- elimna(temp)
        pval<-ncol(temp)-1
        x <- temp[,1:pval]
        y <- temp[, pval+1]
if(xout){
flag<-outfun(x,...)$keep
x<-as.matrix(x)
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
temp<-lsfit(x,y)
yhat<-mean(y)
res<-y-yhat
#s<-lsfitNci4(x, y)$cov[-1, -1]
s<-olshc4(x, y)$cov[-1, -1]
si<-solve(s)
b<-temp$coef[2:pp]
wtest<-t(b)%*%si%*%b
print("Taking boostrap samples. Please wait.")
if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot)
if(!RAD){
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-(data-.5)*sqrt(12) # standardize the random numbers.
}
rvalb<-apply(data,1,lstest4,yhat,res,x)
sum<-sum(rvalb>= wtest[1,1])
p.val<-sum/nboot
list(p.value=p.val)
}
lscale<-function(x,m,q)
{
#
# Compute the L-scale as used by Marrona
# Technometrics, 2005, 47, 264-273
#
# so it is assumed that values in x have been centered
# (a measure of location has been subtracted from each value)
# and the results squared.
#
#  q is defined in Marrona. For principal components, want to reduce
#  to p dimensional data, q=ncol(x)-p
#
hval<-floor((length(x)+m-q+2)/2)
flag<-(x<0)
if(sum(flag)>0)stop("For lscale, all values must be nonnegative")
x<-sort(x)
val<-sum(x[1:hval])
val
}
ortho<-function(x){
# Orthnormalize x
#
y<-qr(x)
y<-qr.Q(y)
y
}

Mpca<-function(x,N1=3,N2=2,tol=.001,N2p=10,Nran=50,
Nkeep=10,SEED=TRUE,op.pro=.1,SCORES=FALSE,pval=NULL){
#
# Robust PCA using Marrona's method (2005, Technometrics)
#
# x is an N by m matrix containing data
# N1, N2, N2p, Nran and Nkeep indicate how many
# iterations are used in the various portions of the
# Marrona robust PCA; see Marrona's paper.
#
# op.pro is the maximum  proportion of unexplained
#   variance that is desired. If pval is not specified, will
#  add variables until this proportion is less than op.pro.
#
# pval, if specified, will use p=pval of the m variables only and report
#  the proportion of unexplained variance.
#  The weighted covariance matrix is returned as well.
#
# SCORES=T, scores are reported and return based on the number of
# variables indicated by pval. pval must be specified.
#
# pval not specified, computes proportion of unexplained  variance
# using p=1, 2 ... variables; results returned in
#
scores<-NULL
wt.cov<-NULL
x<-elimna(x)
if(SEED)set.seed(2)
m<-ncol(x)
n<-nrow(x)
bot<-marpca(x,p=0,N1=N1,N2=N2,tol=tol,N2p=N2p,Nran=Nran,Nkeep=Nkeep,SEED=SEED)
bot<-bot$var.op
mn1<-m-1
rat<-1
it<-0
ratval<-NULL
if(is.null(pval)){
ratval<-matrix(nrow=mn1,ncol=2)
dimnames(ratval)<-list(NULL,c("p","pro.unex.var"))
ratval[,1]<-c(1:mn1)
for(it in 1:mn1){
if(rat>op.pro){
temp<-marpca(x,p=it,N1=N1,N2=N2,tol=tol,N2p=N2p,Nran=Nran,Nkeep=Nkeep,
SEED=SEED)
rat<-temp$var.op/bot
ratval[it,2]<-rat
}}}
if(!is.null(pval)){
if(pval>=m)stop("This method assumes pval<ncol(x)")
temp<-marpca(x,p=pval,N1=N1,N2=N2,tol=tol,N2p=N2p,Nran=Nran,Nkeep=Nkeep,
SEED=SEED)
wt.cov<-temp$wt.cov
ratval<-temp$var.op/bot
}
if(SCORES){
if(is.null(pval))stop("When computing scores, need to specify pval")
temp2<-marpca(x,ncol(x))
ev<-eigen(temp2$wt.cov)
ord.val<-order(ev$values)
mn1<-m-pval+1
Bp<-ev$vectors[,ord.val[mn1:m]] #m by m
xmmu<-x
for(j in 1:m)xmmu[,j]<-x[,j]-temp2$wt.mu[j]
scores<-matrix(ncol=pval,nrow=n)
for(i in 1:n)scores[i,]<-t(Bp)%*%as.matrix(xmmu[i,])
}
list(B=temp$B,a=temp$a,var.op=temp$var.op,unexplained.pro.var=ratval,
scores=scores,wt.cov=wt.cov)
}
mgvcov<-function(m,op=1,cov.fun=rmba,plotit=FALSE){
#
# m is an n by p matrix
#
# Compute skipped covariance matrix
# using the MGV method
#
# Eliminate any outliers and compute covariance matrix
#  using remaining data.
# op=0, mgv uses all pairwise distances to determine center of the data
# op=1 uses the function indicated by the argument
# cov.fun to determine center of the data cloud.
# default is Olive's median ball
#
#
temp<-NA
m<-elimna(m)
temp<-outmgv(m,plotit=plotit,op=op,cov.fun=cov.fun)$keep
val<-var(m[temp,])
val
}

spca<-function(x,var.fun=pbvar,SCORES=FALSE,pval=1){
#
# Spherical PCA (Locantore et al., 1999, Test, 8, 1-28)
# cf. Marrona, 2005, Technometrics
#
scores<-NULL
mvec<-spat(x)
n<-nrow(x)
m<-ncol(x)
y<-matrix(ncol=m,nrow=n)
xdif<-matrix(ncol=m,nrow=n)
for(i in 1:n){
y[i,]<-x[i,]-mvec
xdif[i,]<-y[i,]
y[i,]<-y[i,]/sqrt(sum(y[i,]^2))
}
e.val<-eigen(var(y))
b<-e.val$vectors
lam<-NA
for(j in 1:ncol(x)){
val<-NA
for(i in 1:n)val[i]<-sum(b[,j]*x[i,])
lam[j]<-var.fun(val)
}
if(SCORES){
ord.val<-order(lam)
mn1<-m-pval+1
Bp<-e.val$vectors[,ord.val[mn1:m]] #m by m
scores<-matrix(ncol=pval,nrow=n)
for(i in 1:n)scores[i,]<-t(as.matrix(Bp))%*%as.matrix(xdif[i,])
}
list(eigen.val=lam,B=b,spat.mu=mvec,scores=scores)
}
sqmad<-function(x){
val<-mad(x)^2
val
}
mscale<-function(x,del){
#
# Compute the M-scale as used by Marrona
# Technometrics, 2005, 47, 264-273
#
# so it is assumed that values in x have been centered
# (a measure of location has been subtracted from each value)
# and the results squared.
#
#  del is asdefined in Marrona. For principal components, want to reduce
#  to p dimensional data, q=ncol(x)-p, and del is a function of n and q
#
START<-mad(x)
val<-nelderv2(x,1,mscale.sub,START=START,del=del)
val
}

mscale.sub<-function(x,theta,del){
chival<-x/theta[1]
ones<-rep(1,length(x))
vals<-1-(1-chival)^3
chival<-apply(cbind(ones,vals),1,FUN="min")
val<-abs(mean(chival)-del)
val
}

rmba<-function(x, csteps = 5,na.rm=TRUE)
{
# computes the reweighted MBA estimator
# Code supplied by David Olive
#
#       x is assumed to be a matrix
x=as.matrix(x)
if(na.rm)x=elimna(x)
	p <- dim(x)[2]
	n <- dim(x)[1]	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(as.matrix(x[md2 <= medd2,  ]), 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	##get the resistant estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
	mns <- apply(as.matrix(x[md2 <= medd2,  ]), 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
#		mns <- apply(as.matrix(x[md2 <= medd2,  ]), 2, mean)
		mns <- apply(as.matrix(x[md2 <= medd2,  ]), 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	crit <- prod(diag(chol(covs)))
	if(crit < critb) {
		critb <- crit
		covb <- covs
		mnb <- mns
	}
##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb	
	##reweight the above MBA estimator (mnb,covb) for efficiency
	rd2 <- mahalanobis(x, mnb, covb)
	up <- qchisq(0.975, p)
	rmnb <- apply(as.matrix(x[rd2 <= up,  ]), 2, mean)
	rcovb <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnb, rcovb)
	const <- median(rd2)/(qchisq(0.5, p))
	rcovb <- const * rcovb	## reweight again
	rd2 <- mahalanobis(x, rmnb, rcovb)
	up <- qchisq(0.975, p)
	rmnb <- apply(as.matrix(x[rd2 <= up,  ]), 2, mean)
	rcovb <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnb, rcovb)
	const <- median(rd2)/(qchisq(0.5, p))
	rcovb <- const * rcovb
cor.b=NULL
temp=outer(sqrt(diag(rcovb)),sqrt(diag(rcovb)),'*')
if(min(diag(rcovb)>0))cor.b=rcovb/temp
	list(center = rmnb, cov = rcovb, cor=cor.b)
}
tbscov <- function(x,eps=1e-3,maxiter=20,r=.45,alpha=.05){
#        Rocke's contrained s-estimator
#   returns covariance matrix only. For both locatiion and scatter, use tbs
#
#      r=.45 is the breakdown point
#      alpha=.05 is the asymptotic rejection probability.
#
if(!is.matrix(x))stop("x should be a matrix with two or more columns")
x<-elimna(x)
library(MASS)
temp<-cov.mve(x)
t1<-temp$center
s<-temp$cov
    n <- nrow(x)
    p <- ncol(x)
if(p==1)stop("x should be a matrix with two or more columns")
c1M<-cgen.bt(n,p,r,alpha,asymp=FALSE)
c1<-c1M$c1
if(c1==0)c1<-.001 #Otherwise get division by zero
M<-c1M$M
    b0 <- erho.bt(p,c1,M)
    crit <- 100
    iter <- 1
    w1d <- rep(1,n)
    w2d <- w1d
    while ((crit > eps)&(iter <= maxiter))
    {
        t.old <- t1
        s.old <- s
        wt.old <- w1d
        v.old <- w2d
        d2 <- mahalanobis(x,center=t1,cov=s)
        d <- sqrt(d2)
        k <- ksolve.bt(d,p,c1,M,b0)
        d <- d/k
        w1d <- wt.bt(d,c1,M)
        w2d <- v.bt(d,c1,M)
        t1 <- (w1d %*% x)/sum(w1d)
        s <- s*0
        for (i in 1:n)
        {
            xc <- as.vector(x[i,]-t1)
            s <- s + as.numeric(w1d[i])*(xc %o% xc)
        }
        s <- p*s/sum(w2d)
        mnorm <- sqrt(as.vector(t.old) %*% as.vector(t.old))
        snorm <- eigen(s.old)$values[1]
        crit1 <- max(abs(t1 - t.old))
#        crit <- max(crit1,crit2)
        crit <- max(abs(w1d-wt.old))/max(w1d)
        iter <- iter+1
    }
#    mnorm <- sqrt(as.vector(t1) %*% as.vector(t1))
#    snorm <- eigen(s)$values[1]
#    return(list(t1=t1,s=s))
s
}
erho.bt <- function(p,c1,M)
#   expectation of rho(d) under chi-squared p
    return(chi.int(p,2,M)/2
        +(M^2/2+c1*(5*c1+16*M)/30)*chi.int2(p,0,M+c1)
        +(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4))*(
chi.int(p,0,M+c1)-chi.int(p,0,M))
        +(1/2+M^4/(2*c1^4)-M^2/c1^2)*(chi.int(p,2,M+c1)-chi.int(p,2,M))
        +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*(chi.int(p,3,M+c1)-chi.int(p,3,M))
        +(3*M^2/(2*c1^4)-1/(2*c1^2))*(chi.int(p,4,M+c1)-chi.int(p,4,M))
        -(4*M/(5*c1^4))*(chi.int(p,5,M+c1)-chi.int(p,5,M))
        +(1/(6*c1^4))*(chi.int(p,6,M+c1)-chi.int(p,6,M)))
chi.int <- function(p,a,c1)
#   partial expectation d in (0,c1) of d^a under chi-squared p
  return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*pchisq(c1^2,p+a) )
chi.int2 <- function(p,a,c1)
#   partial expectation d in (c1,\infty) of d^a under chi-squared p
 return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*(1-pchisq(c1^2,p+a)))
cgen.bt <- function(n,p,r,alpha,asymp=FALSE){
#   find constants c1 and M that gives a specified breakdown r
#   and rejection point alpha
if (asymp == FALSE){if (r > (n-p)/(2*n) ) r <- (n-p)/(2*n)}
# maximum achievable breakdown
#
#   if rejection is not achievable, use c1=0 and best rejection
#
    limvec <- rejpt.bt.lim(p,r)
    if (1-limvec[2] <= alpha)
    {
        c1 <- 0
        M <- sqrt(qchisq(1-alpha,p))
    }
    else
    {
    c1.plus.M <- sqrt(qchisq(1-alpha,p))
    M <- sqrt(p)
    c1 <- c1.plus.M - M
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        deps <- 1e-4
        M.old <- M
        c1.old <- c1
        er <- erho.bt(p,c1,M)
        fc <- er - r*(M^2/2+c1*(5*c1+16*M)/30)
        fcc1 <- (erho.bt(p,c1+deps,M)-er)/deps
        fcM  <- (erho.bt(p,c1,M+deps)-er)/deps
        fcp <- fcM - fcc1 - r*(M-(5*c1+16*M)/30+c1*9/30)
        M <- M - fc/fcp
        if (M >= c1.plus.M ){M <- (M.old + c1.plus.M)/2}
        c1 <- c1.plus.M - M
#        if (M-c1 < 0)  M <- c1.old+(M.old-c1.old)/2
        crit <- abs(fc)
        iter <- iter+1
    }
    }
list(c1=c1,M=M,r1=r)
}
erho.bt.lim <- function(p,c1)
#   expectation of rho(d) under chi-squared p
  return(chi.int(p,2,c1)+c1^2*chi.int2(p,0,c1))
erho.bt.lim.p <- function(p,c1)
#   derivative of erho.bt.lim wrt c1
  return(chi.int.p(p,2,c1)+c1^2*chi.int2.p(p,0,c1)+2*c1*chi.int2(p,0,c1))


rejpt.bt.lim <- function(p,r){
#   find p-value of translated biweight limit c
#   that gives a specified breakdown
    c1 <- 2*p
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        c1.old <- c1
        fc <- erho.bt.lim(p,c1) - c1^2*r
        fcp <- erho.bt.lim.p(p,c1) - 2*c1*r
        c1 <- c1 - fc/fcp
        if (c1 < 0)  c1 <- c1.old/2
        crit <- abs(fc)
        iter <- iter+1
    }
    return(c(c1,pchisq(c1^2,p),log10(1-pchisq(c1^2,p))))
}
chi.int.p <- function(p,a,c1)
  return( exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 )
chi.int2.p <- function(p,a,c1)
  return( -exp(lgamma((p+a)/2)-lgamma(p/2))*2^{a/2}*dchisq(c1^2,p+a)*2*c1 )
ksolve.bt <- function(d,p,c1,M,b0){
#       find a constant k which satisfies the s-estimation constraint
#       for modified biweight
    k <- 1
    iter <- 1
    crit <- 100
    eps <- 1e-5
    while ((crit > eps)&(iter<100))
    {
        k.old <- k
        fk <- mean(rho.bt(d/k,c1,M))-b0
        fkp <- -mean(psi.bt(d/k,c1,M)*d/k^2)
        k <- k - fk/fkp
        if (k < k.old/2)  k <- k.old/2
        if (k > k.old*1.5) k <- k.old*1.5
        crit <- abs(fk)
        iter <- iter+1
    }
    return(k)
}
rho.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1*(x^2/2)
        +ivec2*(M^2/2+c1*(5*c1+16*M)/30)
        +(1-ivec1-ivec2)*(M^2/2-M^2*(M^4-5*M^2*c1^2+15*c1^4)/(30*c1^4)
            +(1/2+M^4/(2*c1^4)-M^2/c1^2)*x^2
            +(4*M/(3*c1^2)-4*M^3/(3*c1^4))*x^3
            +(3*M^2/(2*c1^4)-1/(2*c1^2))*x^4
            -4*M*x^5/(5*c1^4)+x^6/(6*c1^4)))
}
psi.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1*x+(1-ivec1-ivec2)*x*(1-x1^2)^2)
}
psip.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1+(1-ivec1-ivec2)*((1-x1^2)^2+4*x*x1*(1-x1^2)/c1))
}
wt.bt <- function(x,c1,M)
{
    x1 <- (x-M)/c1
    ivec1 <- (x1 < 0)
    ivec2 <- (x1 >  1)
    return(ivec1+(1-ivec1-ivec2)*(1-x1^2)^2)
}
v.bt <- function(x,c1,M) return(x*psi.bt(x,c1,M))

gvarg<-function(m,var.fun=cov.mba,...){
#
# Compute the generalized variance of a matrix m
# It is assumed that var.fun returns a covariance matrix only
#
# (Some functions return a a covariance matrix in list mode: $cov
# These functions do not work here.)
#
# other possible choices for var.fun:
# skipcov
# tbscov
# covout
# covogk
# mgvcov
# mvecov
# mcdcov
#
m<-elimna(m)
m<-as.matrix(m)
temp<-var.fun(m,...)
gvar<-prod(eigen(temp)$values)
gvar
}
marpca<-function(x,p=ncol(x)-1,N1=3,N2=2,tol=.001,N2p=10,Nran=50,
Nkeep=10,SEED=TRUE,LSCALE=TRUE,SCORES=F){
#
# Marrona (2005, Technometrics, 47, 264-273) robust PCA
#
# x is an n by m matrix, p<m
#
# Given p, find a p-dim manifold that minimizes orthogonal distances
#  Eq for manifold is Bx=a; this function returns B and a plus
#
# var.op = optimal proportion of unexplained variance,  given p
#
x<-elimna(x)
Cmat<-NULL
if(Nkeep>Nran)stop("Must have Nkeep<=Nran")
if(SEED)set.seed(2)
n<-nrow(x)
m<-ncol(x)
q<-m-p
if(q<0)stop("p should have value between 0 and ncol(x)")
if(q>0){
bkeep<-array(dim=c(q,m,Nran))
akeep<-matrix(nrow=Nran,ncol=q)
sig.val<-NA
for(it in 1:Nran){
temp<-marpca.sub(x,p,N1=N1,N2=N2,tol=tol,LSCALE=LSCALE)
bkeep[,,it]<-temp$B
akeep[it,]<-temp$a
sig.val[it]<-temp$var.op
}
ord<-order(sig.val)
bkeep2<-array(dim=c(q,m,Nkeep))
cmatkeep<-array(dim=c(m,m,Nkeep))
akeep2<-matrix(nrow=Nkeep,ncol=q)
sig.val2<-NA
for(it in 1:Nkeep){
temp<-marpca.sub(x,p,N1=0,N2=N2p,tol=tol,B=bkeep[,,ord[it]],a=akeep[ord[it],],
LSCALE=LSCALE)
bkeep2[,,it]<-temp$B
akeep2[it,]<-temp$a
sig.val2[it]<-temp$var.op
cmatkeep[,,it]<-temp$wt.cov
}
ord<-order(sig.val2)
B<-bkeep2[,,ord[1]]
a<-akeep2[ord[1],]
var.op<-sig.val2[ord[1]]
Cmat<-cmatkeep[,,ord[1]]
}
wt.mu<-NULL
if(q==0){
output<-marpca.sub(x,0,LSCALE=LSCALE)
B<-output$B
a<-output$a
var.op<-output$var.op
wt.mu<-output$mu
Cmat<-output$wt.cov
}
scores<-NULL
if(SCORES){
ev<-eigen(Cmat)
ord.val<-order(ev$values)
mn1<-m-p+1
wt.mu<-marpca.sub(x,p=p)$mu
Bp<-ev$vectors[,ord.val[mn1:m]] #m by m
xmmu<-x
for(j in 1:m)xmmu[,j]<-x[,j]-wt.mu[j]
scores<-matrix(ncol=p,nrow=n)
for(i in 1:n)scores[i,]<-t(Bp)%*%as.matrix(xmmu[i,])
}
list(B=B,a=a,var.op=var.op,wt.cov=Cmat,wt.mu=wt.mu,scores=scores)
}



marpca.sub<-function(x,p=ncol(x)-1,N1=3,N2=2,tol=.001,B=NULL,a=NULL,
LSCALE=TRUE){
#
# Marrona (2005, Technometrics, 47, 264-273) robust PCA
#
# Note: setting
# p=0 causes B to be the identity matrix, which is used in the case
# p=ncol(x) to estimate proportion of unexplained variance.
#
wt.cov<-NULL
if(!is.null(B)){
B<-as.matrix(B)
if(ncol(B)==1)B<-t(B)
}
n<-nrow(x)
m<-ncol(x)
q<-m-p
if(q<0)stop("p and q should have values between 1 and ncol(x)")
hval<-floor((n + m - q + 2)/2)
DEL<-Inf
sig0<-Inf
if(is.null(B)){
if(p>0 && p<m){
B<-matrix(runif(q*m),nrow=q,ncol=m)
B<-t(ortho(t(B)))
}
if(p==0)B<-diag(rep(1,m))
}
it<-1
Bx<-matrix(NA,ncol=q,nrow=n) #q by n
for(i in 1:n)Bx[i,]<-B%*%as.matrix(x[i,]) # q by 1
#Bx<-as.matrix(elimna(Bx))
if(is.null(a))a<-apply(Bx,2,FUN="median") # Initial a (coordinatewise medians)
while(it<N1+N2 && DEL>tol){
r<-NA
for(i in 1:n)r[i]<-sum(Bx[i,]-a)^2
if(LSCALE)sig<-lscale(r,m,q)
if(!LSCALE){
delta<-delta<-(n-m+q-1)/(2*n)
sig<-mscale(r,delta)
}
DEL<-1-sig/sig0
sig0<-sig
ord.r<-order(r)
w<-rep(0,n)
w[ord.r[1:hval]]<-1
xx<-x
for(i in 1:n)xx[i,]<-x[i,]*w[i]
mu<-apply(xx,2,FUN="sum")/sum(w) #m by 1 locations
Cmat<-matrix(0,nrow=m,ncol=m)
for(i in 1:n){
temp<-w[i]*as.matrix(x[i,]-mu)%*%t(as.matrix(x[i,]-mu))
Cmat<-Cmat+temp
}
wt.cov<-Cmat/sum(w)
if(it>N1){
temp<-eigen(wt.cov)
ord.eig<-order(temp$values)
for(iq in 1:q)B[iq,]<-temp$vectors[,ord.eig[iq]]
}
a<-B%*%mu
it<-it+1
}
list(B=B,a=a,var.op=sig,mu=mu,wt.cov=wt.cov)
}

bwimcp<-function(J,K,x,tr=.2,JK=J*K,grp=c(1:JK),alpha=.05){
#
# Multiple comparisons for interactions
# in a split-plot design.
# The analysis is done by taking difference scores
# among all pairs of dependent groups and
# determining which of
# these differences differ across levels of Factor A
# using trimmed means.
#
# For MOM or M-estimators, use spmcpi which uses a bootstrap method
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}

JK<-J*K
if(JK!=length(x))stop("Something is wrong. Expected ",JK," groups but x contains ", length(x), "groups instead.")
MJ<-(J^2-J)/2
MK<-(K^2-K)/2
JMK<-J*MK
MJMK<-MJ*MK
Jm<-J-1
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
output<-matrix(0,MJMK,7)
dimnames(output)<-list(NULL,c("A","A","B","B","psihat","p.value","p.crit"))
jp<-1-K
kv<-0
kv2<-0
test<-NA
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}}
m<-matrix(c(1:JK),J,K,byrow=T)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j<jj){
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
output[ic,1]<-j
output[ic,2]<-jj
output[ic,3]<-k
output[ic,4]<-kk
x1<-x[[m[j,k]]]-x[[m[j,kk]]]
x2<-x[[m[jj,k]]]-x[[m[jj,kk]]]
temp<-yuen(x1,x2)
output[ic,5]<-mean(x1,tr)-mean(x2,tr)
test[ic]<-temp$p.value
output[ic,6]<-test[ic]
}}}}}}
ncon<-length(test)
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvec[1]<-alpha/2
}
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,7]<-zvec
output[,7]<-2*output[,7]
output
}


qregsm<-function(x, y,est=hd,qval=.5,sm=TRUE,plotit=TRUE,pyhat=FALSE,fr=0.8,nboot=40,xlab="X",
ylab="Y")
{
#
# Do a smooth of x versus the quantiles of y
#
# qval indicates quantiles of interest.
# Example: qval=c(.2,.8) will create two smooths, one for the
# .2 quantile and the other for the .8 quantile.
#
# est can be any quantile estimator having the argument qval, indicating
# the quantile to be used.
#
# est = hd uses Harrel Davis estimator,
# est = qest uses a single order statistic.
#
# sm=T, bagging will be used.
# pyhat=T returns the estimates
#
x<-as.matrix(x)
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
p<-np-1
x<-X[,1:p]
x<-as.matrix(x)
y<-X[,np]
vals<-matrix(NA,ncol=length(y),nrow=length(qval))
for(i in 1:length(qval)){
if(sm)vals[i,]<-rplotsm(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr,nboot=nboot)$yhat
if(!sm)vals[i,]<-rungen(x,y,est=est,q=qval[i],pyhat=TRUE,plotit=FALSE,fr=fr)$output
}
if(p==1){
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
for(i in 1:length(qval)){
sx <- sort(x)
xorder <- order(x)
sysm <- vals[i,]
#lines(sx, sysm)
lines(sx, sysm[xorder])
}}}
output <- "Done"
if(pyhat)output <- vals
output
}

L1median <- function(X, tol = 1e-08, maxit = 200, m.init = apply(X, 2, median),
                     trace = FALSE)
{
  ## L1MEDIAN calculates the multivariate L1 median
  ## I/O: mX=L1median(X,tol);
  ##
  ## X  : the data matrix
  ## tol: the convergence criterium:
  ##      the iterative process stops when ||m_k - m_{k+1}|| < tol.
  ## maxit: maximum number of iterations
  ## init.m: starting value for m; typically coordinatewise median
  ##
  ## Ref: Hossjer and Croux (1995)
  ##  "Generalizing Univariate Signed Rank Statistics for Testing
  ##   and Estimating a Multivariate Location Parameter";
  ##   Non-parametric Statistics, 4, 293-308.
  ##
  ## Implemented by Kristel Joossens
  ## Many thanks to Martin Maechler for improving the program!

  ## slightly faster version of 'sweep(x, 2, m)':
  centr <- function(X,m) X - rep(m, each = n)
  ## computes objective function in m based on X and a:
  mrobj <- function(X,m) sum(sqrt(rowSums(centr(X,m)^2)))

  d <- dim(X); n <- d[1]; p <- d[2]
  m <- m.init
  if(!is.numeric(m) || length(m) != p)
      stop("'m.init' must be numeric of length p =", p)
  k <- 1
  if(trace) nstps <- 0
  while (k <= maxit) {
    mold <- m
    obj.old <- if(k == 1) mrobj(X,mold) else obj
    X. <- centr(X, m)
    Xnorms <- sqrt(rowSums(X. ^ 2))
    inorms <- order(Xnorms)
    dx <- Xnorms[inorms] # smallest first, i.e., 0's if there are
    X  <- X [inorms,]
    X. <- X.[inorms,]
    ## using 1/x weighting {MM: should this be generalized?}
    w <- ## (0 norm -> 0 weight) :
        if (all(dn0 <- dx != 0))  1/dx
        else c(rep.int(0, length(dx)- sum(dn0)), 1/dx[dn0])
    delta <- colSums(X. * rep(w,p)) / sum(w)
    nd <- sqrt(sum(delta^2))

    maxhalf <- if (nd < tol) 0 else ceiling(log2(nd/tol))
    m <- mold + delta    # computation of a new estimate
    ## If step 'delta' is too far, we try halving the stepsize
    nstep <- 0
    while ((obj <- mrobj(X, m)) >= obj.old && nstep <= maxhalf) {
      nstep <- nstep+1
      m <- mold + delta/(2^nstep)
    }
    if(trace) {
        if(trace >= 2)
            cat(sprintf("k=%3d obj=%19.12g m=(",k,obj),
                paste(formatC(m),collapse=","),
                ")", if(nstep) sprintf(" nstep=%2d halvings",nstep) else "",
                "\n", sep="")
        nstps[k] <- nstep
    }
    if (nstep > maxhalf) { ## step halving failed; keep old
        m <- mold
        ## warning("step halving failed in ", maxhalf, " steps")
        break
      }
    k <- k+1
  }
  if (k > maxit) warning("iterations did not converge in ", maxit, " steps")
  if(trace == 1)
      cat("needed", k, "iterations with a total of",
          sum(nstps), "stepsize halvings\n")
  return(m)
}
llocv2<-function(x,est=median,...){
if(!is.list(x))val<-est(x,...)
if(is.list(x)){
val<-NA
for(i in 1:length(x))val[i]<-est(x[[i]],...)
}
if(is.matrix(x))val<-apply(x,2,est,...)
list(center=val)
}
mcppb<-function(x,crit=NA,con=0,tr=.2,alpha=.05,nboot=2000,grp=NA,WIN=FALSE,
win=.1){
#
#   Compute a 1-alpha confidence interval for a set of d linear contrasts
#   involving trimmed means using the percentile bootstrap method.
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode.  Thus,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#
#   Or the data can be stored in a matrix with J columns
#
#   By default, all pairwise comparisons are performed, but contrasts
#   can be specified with the argument con.
#   The columns of con indicate the contrast coefficients.
#   Con should have J rows, J=number of groups.
#   For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first two trimmed means is
#   equal to the sum of the second two, and (2) the difference between
#   the first two is equal to the difference between the trimmed means of
#   groups 5 and 6.
#
#   The default number of bootstrap samples is nboot=2000
#
#
con<-as.matrix(con)
if(is.matrix(x)){
xx<-list()
for(i in 1:ncol(x)){
xx[[i]]<-x[,i]
}
x<-xx
}
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]]
x<-xx
}
J<-length(x)
tempn<-0
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
}
Jm<-J-1
d<-ifelse(sum(con^2)==0,(J^2-J)/2,ncol(con))
if(is.na(crit) && tr != .2)stop("A critical value must be specified when
the amount of trimming differs from .2")
if(WIN){
if(tr < .2)warning("When Winsorizing, the amount of trimming should be at least
.2")
if(win > tr)stop("Amount of Winsorizing must <= amount of trimming")
if(min(tempn) < 15){warning("Winsorizing with sample sizes less than 15 can")
warning(" result in poor control over the probability of a Type I error")
}
for (j in 1:J){
x[[j]]<-winval(x[[j]],win)
}
}
if(is.na(crit)){
if(d==1)crit<-alpha/2
if(d==2 && alpha==.05 && nboot==1000)crit<-.014
if(d==2 && alpha==.05 && nboot==2000)crit<-.014
if(d==3 && alpha==.05 && nboot==1000)crit<-.009
if(d==3 && alpha==.05 && nboot==2000)crit<-.0085
if(d==3 && alpha==.025 && nboot==1000)crit<-.004
if(d==3 && alpha==.025 && nboot==2000)crit<-.004
if(d==3 && alpha==.01 && nboot==1000)crit<-.001
if(d==3 && alpha==.01 && nboot==2000)crit<-.001
if(d==4 && alpha==.05 && nboot==2000)crit<-.007
if(d==5 && alpha==.05 && nboot==2000)crit<-.006
if(d==6 && alpha==.05 && nboot==1000)crit<-.004
if(d==6 && alpha==.05 && nboot==2000)crit<-.0045
if(d==6 && alpha==.025 && nboot==1000)crit<-.002
if(d==6 && alpha==.025 && nboot==2000)crit<-.0015
if(d==6 && alpha==.01 && nboot==2000)crit<-.0005
if(d==10 && alpha==.05 && nboot<=2000)crit<-.002
if(d==10 && alpha==.05 && nboot==3000)crit<-.0023
if(d==10 && alpha==.025 && nboot<=2000)crit<-.0005
if(d==10 && alpha==.025 && nboot==3000)crit<-.001
if(d==15 && alpha==.05 && nboot==2000)crit<-.0016
if(d==15 && alpha==.025 && nboot==2000)crit<-.0005
if(d==15 && alpha==.05 && nboot==5000)crit<-.0026
if(d==15 && alpha==.025 && nboot==5000)crit<-.0006
}
if(is.na(crit) && alpha==.05)crit<-0.0268660714*(1/d)-0.0003321429
if(is.na(crit))crit<-alpha/(2*d)
if(d> 10 && nboot <5000)warning("Suggest using nboot=5000 when the number
of contrasts exceeds 10.")
icl<-round(crit*nboot)+1
icu<-round((1-crit)*nboot)
if(sum(con^2)==0){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
psihat<-matrix(0,ncol(con),6)
dimnames(psihat)<-list(NULL,c("con.num","psihat","se","ci.lower",
"ci.upper","p.value"))
if(nrow(con)!=length(x))stop("The number of groups does not match the number
 of  contrast coefficients.")
bvec<-matrix(NA,nrow=J,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
print(paste("Working on group ",j))
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,mean,tr) # Bootstrapped trimmed means for jth group
}
test<-NA
for (d in 1:ncol(con)){
top<-0
for (i in 1:J){
top<-top+con[i,d]*bvec[i,]
}
test[d]<-sum((top>0))/nboot
test[d]<-min(test[d],1-test[d])
top<-sort(top)
psihat[d,4]<-top[icl]
psihat[d,5]<-top[icu]
}
for (d in 1:ncol(con)){
psihat[d,1]<-d
testit<-lincon(x,con[,d],tr,pr=FALSE)
psihat[d,6]<-test[d]
psihat[d,2]<-testit$psihat[1,2]
psihat[d,3]<-testit$test[1,4]
}
print("Reminder: To control FWE, reject if the p-value is less than")
print("the crit.p.value listed in the output.")
list(psihat=psihat,crit.p.value=crit,con=con)
}

llocv2<-function(x,est=median,...){
if(!is.list(x))val<-est(x,...)
if(is.list(x)){
val<-NA
for(i in 1:length(x))val[i]<-est(x[[i]],...)
}
if(is.matrix(x))val<-apply(x,2,est,...)
list(center=val)
}
NMpca<-function(x,B,...){
#
# Robust PCA using orthogonal matrices and
# robust generalized variance method
# This function is used by Ppca
#
n<-x[1]
m<-x[2]
p=x[3]
x=matrix(x[4:length(x)],ncol=m)
B=matrix(B,ncol=m)
vals<-NA
z<-matrix(nrow=n,ncol=p)
B <- t(ortho(t(B))) # so rows are orthogonal
for(i in 1:n)z[i,]<-B%*%as.matrix(x[i,])
vals<-0-gvarg(z)
vals
}

ancbbpb<-function(x1,y1,x2,y2,fr1=1,est=tmean,fr2=1,nboot=200,pts=NA,plotit=TRUE,
SEED=TRUE,alpha=.05,RNA=T,sm=FALSE,LP=TRUE,xout=FALSE,outfun=outpro,...){
#
# Compare two independent  groups using an ancova method.
# A running-interval smooth is used to estimate the regression lines and is
# based in part on bootstrap bagging.
#
#  This function is limited to two groups and one covariate.
#
# No assumption is made about the parametric form of the regression
# lines.
# Confidence intervals are computed using a percentile bootstrap
# method. Comparisons are made at five empirically chosen design points when
# pts=NA. To compare groups at specified x values, use pts.
# Example: pts=c(60,70,80) will compare groups at the three design points
# 60, 70 and 80.
#
#   xout=F, when plotting, keep leverage points
#   sm=F, when plotting, do not use bootstrap bagging
#
#  Assume data are in x1 y1 x2 and y2
#
# fr1 and fr2 are the spans used by the smooth.
#
# RNA=F, when computing bagged estimate, NA values are not removed
#  resulting in no estimate of Y at the specified design point,
#  RNA=T, missing values are removed and the remaining values are used.
#
xy=elimna(cbind(x1,y1))
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
#
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
if(SEED)set.seed(2)
if(is.na(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,8)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi","p.value","p.crit"))
gv1<-vector("list")
for (i in 1:5){
j<-i+5
temp1<-y1[near(x1,x1[isub[i]],fr1)]
temp2<-y2[near(x2,x1[isub[i]],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(temp1)
mat[i,3]<-length(temp2)
mat[,4]<-runmbo(x1,y1,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA)-
runmbo(x2,y2,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,RNA=RNA)
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
I1<-diag(5)
I2<-0-I1
con<-rbind(I1,I2)
estmat1<-matrix(nrow=nboot,ncol=length(isub))
estmat2<-matrix(nrow=nboot,ncol=length(isub))
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
#
for(ib in 1:nboot){
estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=x1[isub],
pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...)
estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=x1[isub],
pyhat=T,plotit=FALSE,SEED=FALSE,est=est,...)
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
pvals=apply(dif,2,mean,na.rm=TRUE)+.5*apply(dif0,2,mean,na.rm=TRUE)
tmat<-rbind(pvals,1-pvals)
pvals=2*apply(tmat,2,min)
mat[,7]<-pvals
for(ij in 1:length(isub)){
dif<-estmat1[,ij]-estmat2[,ij]
dif<-elimna(dif)
nbad<-length(dif)
lo<-round(nbad*alpha/2)
hi<-nbad-lo
dif<-sort(dif)
mat[ij,5]<-dif[lo]
mat[ij,6]<-dif[hi]
}
}
if(!is.na(pts[1])){
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
if(n1[i]<=5)print(paste("Warning, there are",n1[i]," points corresponding to the design point X=",pts[i]))
if(n2[i]<=5)print(paste("Warning, there are",n2[i]," points corresponding to the design point X=",pts[i]))
}
mat<-matrix(NA,length(pts),7)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi",
"p.value"))
gv<-vector("list",2*length(pts))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
j<-i+length(pts)
gv[[i]]<-g1
gv[[j]]<-g2
}
I1<-diag(length(pts))
I2<-0-I1
con<-rbind(I1,I2)
isub=c(1:length(pts))
estmat1<-matrix(nrow=nboot,ncol=length(isub))
estmat2<-matrix(nrow=nboot,ncol=length(isub))
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
est1=runmbo(x1,y1,pts=pts,pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...)
est2=runmbo(x2,y2,pts=pts,pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=est,...)
mat[,4]<-est1-est2
for(ib in 1:nboot){
estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=pts,pyhat=TRUE,plotit=FALSE,
SEED=FALSE,est=est,...)
estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=pts,pyhat=TRUE,plotit=FALSE,
SEED=FALSE,est=est,...)
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
pvals=apply(dif,2,mean,na.rm=TRUE)+.5*apply(dif0,2,mean,na.rm=TRUE)
tmat<-rbind(pvals,1-pvals)
pvals=2*apply(tmat,2,min)
#
mat[,1]<-pts
mat[,2]<-n1
mat[,3]<-n2
mat[,7]<-pvals
for(ij in 1:length(pts)){
dif<-sort(estmat1[,ij]-estmat2[,ij])
dif<-elimna(dif)
nbad<-length(dif)
lo<-round(nbad*alpha/2)
hi<-nbad-lo
mat[ij,5]<-dif[lo]
mat[ij,6]<-dif[hi]
}
}
temp2<-order(0-pvals)
zvec=alpha/c(1:length(pvals))
mat[temp2,8]=zvec
if(plotit)
runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,LP=LP,xout=FALSE,...) 
#                outliers already removed if argument xout=T
list(output=mat)
}

L1medcen <- function(X, tol = 1e-08, maxit = 200, m.init = apply(X, 2, median),
                     trace = FALSE)
{
  ## L1MEDIAN calculates the multivariate L1 median
  ## I/O: mX=L1median(X,tol);
  ##
  ## X  : the data matrix
  ## tol: the convergence criterium:
  ##      the iterative process stops when ||m_k - m_{k+1}|| < tol.
  ## maxit: maximum number of iterations
  ## init.m: starting value for m; typically coordinatewise median
  ##
  ## Ref: Hossjer and Croux (1995)
  ##  "Generalizing Univariate Signed Rank Statistics for Testing
  ##   and Estimating a Multivariate Location Parameter";
  ##   Non-parametric Statistics, 4, 293-308.
  ##
  ## Implemented by Kristel Joossens
  ## Many thanks to Martin Maechler for improving the program!

  ## slightly faster version of 'sweep(x, 2, m)':
  centr <- function(X,m) X - rep(m, each = n)
  ## computes objective function in m based on X and a:
  mrobj <- function(X,m) sum(sqrt(rowSums(centr(X,m)^2)))
  d <- dim(X); n <- d[1]; p <- d[2]
  m <- m.init
  if(!is.numeric(m) || length(m) != p)
      stop("'m.init' must be numeric of length p =", p)
  k <- 1
  if(trace) nstps <- 0
  while (k <= maxit) {
    mold <- m
    obj.old <- if(k == 1) mrobj(X,mold) else obj
    X. <- centr(X, m)
    Xnorms <- sqrt(rowSums(X. ^ 2))
    inorms <- order(Xnorms)
    dx <- Xnorms[inorms] # smallest first, i.e., 0's if there are
    X  <- X [inorms,]
    X. <- X.[inorms,]
    ## using 1/x weighting {MM: should this be generalized?}
    w <- ## (0 norm -> 0 weight) :
        if (all(dn0 <- dx != 0))  1/dx
        else c(rep.int(0, length(dx)- sum(dn0)), 1/dx[dn0])
    delta <- colSums(X. * rep(w,p)) / sum(w)
    nd <- sqrt(sum(delta^2))

    maxhalf <- if (nd < tol) 0 else ceiling(log2(nd/tol))
    m <- mold + delta    # computation of a new estimate
    ## If step 'delta' is too far, we try halving the stepsize
    nstep <- 0
    while ((obj <- mrobj(X, m)) >= obj.old && nstep <= maxhalf) {
      nstep <- nstep+1
      m <- mold + delta/(2^nstep)
    }
    if(trace) {
        if(trace >= 2)
            cat(sprintf("k=%3d obj=%19.12g m=(",k,obj),
                paste(formatC(m),collapse=","),
                ")", if(nstep) sprintf(" nstep=%2d halvings",nstep) else "",
                "\n", sep="")
        nstps[k] <- nstep
    }
    if (nstep > maxhalf) { ## step halving failed; keep old
        m <- mold
        ## warning("step halving failed in ", maxhalf, " steps")
        break
      }
    k <- k+1
  }
  if (k > maxit) warning("iterations did not converge in ", maxit, " steps")
  if(trace == 1)
      cat("needed", k, "iterations with a total of",
          sum(nstps), "stepsize halvings\n")
#  return(m)
list(center=m)
}

matl<-function(x){
#
# take data in list mode and store it in a matrix
#
J=length(x)
nval=NA
for(j in 1:J)nval[j]=length(x[[j]])
temp<-matrix(NA,ncol=J,nrow=max(nval))
for(j in 1:J)temp[1:nval[j],j]<-x[[j]]
temp
}

list2vec<-function(x){
if(!is.list(x))stop("x should have list mode")
res=as.vector(matl(x))
res
}


list2matrix<-function(x){
#
# take data in list mode and store it in a matrix
#
J=length(x)
nval=NA
for(j in 1:J)nval[j]=length(x[[j]])
temp<-matrix(NA,ncol=J,nrow=max(nval))
for(j in 1:J)temp[1:nval[j],j]<-x[[j]]
temp
}
Aband<-function(x,alpha=.05,plotit=TRUE,sm=TRUE,SEED=TRUE,nboot=500,grp=c(1:4),
xlab="X (First Factor)",ylab="Delta",crit=NA,print.all=FALSE,plot.op=FALSE){
#
# Apply the shift function when analyzing main effect in a
# 2 by 2 design.
#
# For variables x1, x2, x3 and x4,
# In effect, this function applies a shift function to the distributions
# d1=(x1+x2)/2 and d2=(x3+x4)/2
#  That is, focus on first factor.
#  For second factor, use Bband.
#
# grp indicates the groups to be compared. By default grp=c(1,2,3,4)
# meaning that the first level of factor A consists of groups 1 and 2
# and the 2nd level of factor A consists of groups 3 and 4.
# (So level 1 of factor B consists of groups 1 and 3
#
# print.all=F,
# returns number sig, meaning number of confidence intervals that do not
# contain zero,
# the critical value used as well as the KS test statistics.
# print.all=T reports all confidence intervals, the number of which can
# be large.
#
if(!is.list(x) && !is.matrix(x))stop("store data in list mode or a matrix")
if(SEED)set.seed(2)
if(is.matrix(x))x<-listm(x)
for(j in 1:length(x))x[[j]]=elimna(x[[j]])/2
if(length(grp)<4)stop("There must be at least 4 groups")
if(length(x)!=4)stop("The argument grp must have 4 values")
x<-x[grp]
n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]]))
# Approximate the critical value
#
vals<-NA
y<-list()
if(is.na(crit)){
print("Approximating critical value. Please wait.")
for(i in 1:nboot){
for(j in 1:4)
y[[j]]<-rnorm(n[j])
temp<-ks.test(outer(y[[1]],y[[2]],FUN="+"),outer(y[[3]],y[[4]],FUN="+"))
vals[i]<-temp[1]$statistic
}
vals<-sort(vals)
ic<-(1-alpha)*nboot
crit<-vals[ic]
}
if(plot.op){
plotit<-F
g2plot(v1,v2)
}
output<-sband(outer(x[[1]],x[[2]],FUN="+"),outer(x[[3]],x[[4]],FUN="+"),
plotit=plotit,crit=crit,flag=FALSE,sm=sm,xlab=xlab,ylab=ylab)
if(!print.all){
numsig<-output$numsig
ks.test.stat<-ks.test(outer(x[[1]],x[[2]],FUN="+"),
outer(x[[3]],x[[4]],FUN="+"))$statistic
output<-matrix(c(numsig,crit,ks.test.stat),ncol=1)
dimnames(output)<-list(c("number sig","critical value","KS test statistics"),
NULL)
}
output
}

Bband<-function(x,alpha=.05,plotit=TRUE,sm=TRUE,SEED=TRUE,nboot=500,grp=c(1:4),
xlab="X (First Level)",ylab="Delta",crit=NA,print.all=FALSE,plot.op=FALSE){
#
# Apply the shift function when analyzing main effect in a
# 2 by 2 design.
#
# For variables x1, x2, x3 and x4,
# In effect, this function applies a shift function to the distributions
# d1=(x1+x3)/2 and d2=(x2+x4)/2.
# That is, focus on main effects of Factor B.
#
# grp indicates the groups to be compared. By default grp=c(1,2,3,4)
# meaning that the first level of factor A consists of groups 1 and 2
# and the 2nd level of factor A consists of groups 3 and 4.
# (So level 1 of factor B consists of groups 1 and 3
#
# print.all=F,
# returns number sig, meaning number of confidence intervals that do not
# contain zero,
# the critical value used as well as the KS test statistics.
# print.all=T reports all confidence intervals, the number of which can
# be large.
#
if(!is.list(x) && !is.matrix(x))stop("store data in list mode or a matrix")
if(SEED)set.seed(2)
if(is.matrix(x))x<-listm(x)
for(j in 1:length(x))x[[j]]=elimna(x[[j]])/2
if(length(x)<4)stop("There must be at least 4 groups")
if(length(grp)!=4)stop("The argument grp must have 4 values")
x<-x[grp]
grp=c(1,3,2,4)
x<-x[grp]  # Arrange groups for main effects on factor B
n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]]))
# Approximate the critical value
#
vals<-NA
y<-list()
if(is.na(crit)){
print("Approximating critical value. Please wait.")
for(i in 1:nboot){
for(j in 1:4)
y[[j]]<-rnorm(n[j])
temp<-ks.test(outer(y[[1]],y[[2]],FUN="+"),outer(y[[3]],y[[4]],FUN="+"))
vals[i]<-temp[1]$statistic
}
vals<-sort(vals)
ic<-(1-alpha)*nboot
crit<-vals[ic]
}
if(plot.op){
plotit<-F
g2plot(v1,v2)
}
output<-sband(outer(x[[1]],x[[2]],FUN="+"),outer(x[[3]],x[[4]],FUN="+"),
plotit=plotit,crit=crit,flag=FALSE,sm=sm,xlab=xlab,ylab=ylab)
if(!print.all){
numsig<-output$numsig
ks.test.stat<-ks.test(outer(x[[1]],x[[2]],FUN="+"),
outer(x[[3]],x[[4]],FUN="+"))$statistic
output<-matrix(c(numsig,crit,ks.test.stat),ncol=1)
dimnames(output)<-list(c("number sig","critical value","KS test statistics"),
NULL)
}
output
}

iband<-function(x,alpha=.05,plotit=TRUE,sm=TRUE,SEED=TRUE,nboot=500,grp=c(1:4),
xlab="First Difference",ylab="Delta",crit=NA,print.all=FALSE,plot.op=FALSE){
#
# Apply the shift function when analyzing interactions in a
# 2 by 2 design.
#
# For variables x1, x2, x3 and x4,
# In effect, this function applies a shift function to the distributions
# d1=x1-x2 and d2=x3-x4
#
# grp indicates the groups to be compared. By default grp=c(1,2,3,4)
# meaning that the first four groups are used with the difference between
# the first two compared to the difference between the second two.
# (Rows are  being compared in a 2 by 2 design
# To compare difference between groups 1 and 3 versus 2 and 4 (columns in
#  a 2 by 2 design), set grp=c(1,3,2,4).
#
# print.all=F,
# returns number sig, meaning number of confidence intervals that do not
# contain zero,
# the critical value used as well as the KS test statistics.
# print.all=T reports all confidence intervals, the number of which can
# be large.
#
if(!is.list(x) && !is.matrix(x))stop("store data in list mode or a matrix")
if(SEED)set.seed(2)
if(is.matrix(x))x<-listm(x)
if(length(x)<4)stop("There must be at least 4 groups")
for(j in 1:length(x))x[[j]]=elimna(x[[j]])
if(length(grp)!=4)stop("The argument grp must have 4 values")
x<-x[grp]
n<-c(length(x[[1]]),length(x[[2]]),length(x[[3]]),length(x[[4]]))
# Approximate the critical value
#
vals<-NA
y<-list()
if(is.na(crit)){
print("Approximating critical value. Please wait.")
for(i in 1:nboot){
for(j in 1:4)
y[[j]]<-rnorm(n[j])
temp<-ks.test(outer(y[[1]],y[[2]],FUN="-"),outer(y[[3]],y[[4]],FUN="-"))
vals[i]<-temp[1]$statistic
}
vals<-sort(vals)
ic<-(1-alpha)*nboot
crit<-vals[ic]
}
if(plot.op){
plotit<-F
g2plot(v1,v2)
}
output<-sband(outer(x[[1]],x[[2]],FUN="-"),outer(x[[3]],x[[4]],FUN="-"),
plotit=plotit,crit=crit,flag=FALSE,sm=sm,xlab=xlab,ylab=ylab)
if(!print.all){
numsig<-output$numsig
ks.test.stat<-ks.test(outer(x[[1]],x[[2]],FUN="-"),
outer(x[[3]],x[[4]],FUN="-"))$statistic
output<-matrix(c(numsig,crit,ks.test.stat),ncol=1)
dimnames(output)<-list(c("number sig","critical value","KS test statistics"),
NULL)
}
output
}
disband<-function(x,sm=TRUE,op=1,grp=c(1:4),xlab="First Group",
ylab="Delta"){
#
# A shift-type plot aimed at helping see any disordinal interactions
# in a  2 by 2 design.
#
#  x is assumed to be a matrix with columns corresponding to groups
#  or x and have list mode.
#
#  four groups are analyzed,
#
# grp indicates the groups to be compared. By default grp=c(1,2,3,4)
# meaning that the first four groups are used with the difference between
# the first two compared to the difference between the second two.
#
# For four variables stored in x,
# this function plots the shift function for the first two
# variables as well as the second two.
#
#  No disordinal interaction corresponds to the two shift functions being
#  identical. That is, the difference between the quantiles is always the same
#
#  When plotting, the median of x is marked with a + and the two
#  quaratiles are marked with o.
#
#  sm=T, shift function is smoothed using:
#  op!=1, running interval smoother,
#  otherwise use lowess.
#
if(is.matrix(x))x=listm(x)
if(length(grp)!=4)stop("The argument grp must have 4 values")
x=x[grp]
for(j in 1:4)x[[j]]=elimna(x[[j]])
pc<-NA
crit= 1.36 * sqrt((length(x[[1]]) + length(x[[2]]))/(length(x[[1]]) *
    length(x[[2]])))
remx=x
for(iloop in 1:2){
if(iloop==1){
x=remx[[1]]
y=remx[[2]]
}
if(iloop==2){
x=remx[[3]]
y=remx[[4]]
}
xsort<-sort(x)
ysort<-c(NA,sort(y))
l<-0
u<-0
ysort[length(y)+1+1]<-NA
for(ivec in 1:length(x))
{
isub<-max(0,ceiling(length(y)*(ivec/length(x)-crit)))
l[ivec]<-ysort[isub+1]-xsort[ivec]
isub<-min(length(y)+1,floor(length(y)*(ivec/length(x)+crit))+1)
u[ivec]<-ysort[isub+1]-xsort[ivec]
}
num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)])
qhat<-c(1:length(x))/length(x)
m<-matrix(c(qhat,l,u),length(x),3)
dimnames(m)<-list(NULL,c("qhat","lower","upper"))
xsort<-sort(x)
ysort<-sort(y)
del<-0
for (i in 1:length(x)){
ival<-round(length(y)*i/length(x))
if(ival<=0)ival<-1
if(ival>length(y))ival<-length(y)
del[i]<-ysort[ival]-xsort[i]
}
if(iloop==1){
allx<-c(xsort,xsort,xsort)
ally<-c(del,m[,2],m[,3])
}
if(iloop==2){
allx<-c(allx,xsort,xsort,xsort)
ally<-c(ally,del,m[,2],m[,3])
plot(allx,ally,type="n",ylab=ylab,xlab=xlab)
}
ik<-rep(F,length(xsort))
if(sm){
if(op==1){
ik<-duplicated(xsort)
del<-lowess(xsort,del)$y
}
if(op!=1)del<-runmean(xsort,del,pyhat=TRUE)
}
if(iloop==1){
xsort1=xsort[!ik]
del1=del[!ik]
}
if(iloop==2){
lines(xsort1,del1,lty=iloop)
lines(xsort[!ik],del[!ik],lty=iloop)
}}
done="Done"
done
}

scor<-function(x,y=NULL,corfun=pcor,gval=NA,plotit=TRUE,op=TRUE,cop=3,xlab="VAR 1",
ylab="VAR 2",STAND=FALSE,pr=TRUE,SEED=TRUE){
#
# Compute a skipped correlation coefficient.
#
# Eliminate outliers using a projection method
# That is, compute Donoho-Gasko median, for each point
# consider the line between it and the median,
# project all points onto this line, and
# check for outliers using a boxplot rule.
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# For information about the argument cop, see the function
# outpro.
#
# Eliminate any outliers and compute correlation using
# remaining data.
#
# corfun=pcor means Pearson's correlation is used.
# corfun=spear means Spearman's correlation is used.
if(SEED)set.seed(12) # So when using MVE or MCD, get consistent results
if(is.null(y[1]))m<-x
if(!is.null(y[1]))m<-cbind(x,y)
m<-elimna(m)
temp<-outpro(m,gval=gval,plotit=plotit,op=op,cop=cop,
xlab=xlab,ylab=ylab,STAND=STAND,pr=pr)$keep
tcor<-corfun(m[temp,])$cor
#if(ncol(m)==2)tcor<-tcor[1,2]
if(!is.null(dim((m))))tcor<-tcor[1,2]
test<-abs(tcor*sqrt((nrow(m)-2)/(1-tcor**2)))
if(ncol(m)!=2)diag(test)<-NA
crit<-6.947/nrow(m)+2.3197
list(cor.values=tcor,test.stat=test,crit.05=crit)
}



cov.mba<-function(x,COR=F){
val<-covmba2(x)$cov
if(COR){
val=val/outer(sqrt(diag(val)),sqrt(diag(val)))
}
val
}
qregci<-function(x,y,nboot=100,alpha=.05,qval=.5,q=NULL,SEED=TRUE,pr=TRUE,xout=FALSE,outfun=outpro,...){
#
#  Test the hypothesis that the quantile regression slopes are zero.
#
#  qval=.5 i.e, default is to
#  use the .5 quantile regression line only.
#
#  Suggest only using quantiles between
#  .2 and .8. If using both .2 and .8  quantiles, or
#  the .2, .5 and .8 quantile regression lines.
#   FWE is controlled for alpha=.1, .05, .025 and .01.
#
if(!is.null(q))qval=q
xx<-elimna(cbind(x,y))
np<-ncol(xx)
p<-np-1
y<-xx[,np]
x<-xx[,1:p]
x<-as.matrix(x)
if(xout){
if(pr)print("Default for argument outfun is now outpro")
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
x<-as.matrix(x)
n<-length(y)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
# determine critical value.
crit<-NA
if(alpha==.1)crit<-1.645-1.19/sqrt(n)
if(alpha==.05)crit<-1.96-1.37/sqrt(n)
if(alpha==.025)crit<-2.24-1.18/sqrt(n)
if(alpha==.01)crit<-2.58-1.69/sqrt(n)
crit.fwe<-crit
if(length(qval)==2 || p==2){
if(alpha==.1)crit.fwe<-1.98-1.13/sqrt(n)
if(alpha==.05)crit.fwe<-2.37-1.56/sqrt(n)
if(alpha==.025)crit.fwe<-2.60-1.04/sqrt(n)
if(alpha==.01)crit.fwe<-3.02-1.35/sqrt(n)
}
if(length(qval)==3 || p==3){
if(alpha==.1)crit.fwe<-2.145-1.31/sqrt(n)
if(alpha==.05)crit.fwe<-2.49-1.49/sqrt(n)
if(alpha==.025)crit.fwe<-2.86-1.52/sqrt(n)
if(alpha==.01)crit.fwe<-3.42-1.85/sqrt(n)
}
if(is.na(crit.fwe)){
print("Could not determine a critical value")
print("Only alpha=.1, .05, .025 and .01 are allowed")
}
if(p==1){
bvec<-apply(data,1,qindbt.sub,x,y,qval=qval)
estsub<-NA
for(i in 1:length(qval)){
estsub[i]<-qreg(x,y,qval[i])$coef[2]
}
if(is.matrix(bvec))se.val<-sqrt(apply(bvec,1,FUN=var))
if(!is.matrix(bvec))se.val<-sqrt(var(bvec))
test<-abs(estsub)/se.val
ci.mat<-matrix(nrow=length(qval),ncol=3)
dimnames(ci.mat)<-list(NULL,c("Quantile","ci.lower","ci.upper"))
ci.mat[,1]<-qval
ci.mat[,2]<-estsub-crit*se.val
ci.mat[,3]<-estsub+crit*se.val
}
if(p>1){
if(length(qval)>1){
print("With p>1 predictors,only the first qval value is used")
}
bvec<-apply(data,1,regboot,x,y,regfun=qreg,qval=qval[1])
se.val<-sqrt(apply(bvec,1,FUN=var))
estsub<-qreg(x,y,qval=qval[1])$coef
test<-abs(estsub)/se.val
ci.mat<-matrix(nrow=np,ncol=3)
dimnames(ci.mat)<-list(NULL,c("Predictor","ci.lower","ci.upper"))
ci.mat[,1]<-c(0:p)
ci.mat[,2]<-estsub-crit*se.val
ci.mat[,3]<-estsub+crit*se.val
}
list(test=test,se.val=se.val,crit.val=crit,crit.fwe=crit.fwe,
slope.est=estsub,ci=ci.mat)
}




covmba2<-function(x, csteps = 5)
{
# Perform the median ball algorithm.
#
# It returns a measure of location and scatter for the
# multivariate data in x, which is assumed to have
# p>-2 column and n rows.
#
# This code is based on a very slight modificatiion of code originally
# written by David Olive
#
x<-as.matrix(x)
if(!is.matrix(x))stop("x should be a matrix")
         p <- dim(x)[2]
#if(p==1)stop("x should be a matrix with two or more columns of variables")
         ##get the DGK estimator
         covs <- var(x)
         mns <- apply(x, 2, mean)        ## concentrate
         for(i in 1:csteps) {
                 md2 <- mahalanobis(x, mns, covs)
                 medd2 <- median(md2)
#                 mns <- apply(x[md2 <= medd2,  ], 2,
                 mns <- apply(as.matrix(x[md2 <= medd2,  ]), 2,
                         mean)
                 covs <- var(x[md2 <= medd2,  ])
         }
         covb <- covs
         mnb <- mns      ##get the square root of det(covb)
         critb <- prod(diag(chol(covb)))
         ##get the resistant estimator
         covv <- diag(p)
         med <- apply(x, 2, median)
         md2 <- mahalanobis(x, center = med, covv)
         medd2 <- median(md2)    ## get the start
#         mns <- apply(x[md2 <= medd2,  ], 2, mean)
         mns <- apply(as.matrix(x[md2 <= medd2,  ]), 2, mean)
         covs <- var(x[md2 <= medd2,  ]) ## concentrate
         for(i in 1:csteps) {
                 md2 <- mahalanobis(x, mns, covs)
                 medd2 <- median(md2)
 #                mns <- apply(x[md2 <= medd2,  ], 2,mean)
       mns <- apply(as.matrix(x[md2 <= medd2,  ]), 2, mean)
                 covs <- var(x[md2 <= medd2,  ])
         }
         crit <- prod(diag(chol(covs)))
         if(crit < critb) {
                 critb <- crit
                 covb <- covs
                 mnb <- mns
         }
##scale for better performance at MVN
         rd2 <- mahalanobis(x, mnb, covb)
         const <- median(rd2)/(qchisq(0.5, p))
         covb <- const * covb
         list(center = mnb, cov = covb)
}
rmmcp<-function(x, con = 0, tr = 0.2, alpha = 0.05,dif=TRUE){
#
# MCP on trimmed means with FWE controlled with Rom's method
#
flagcon=F
if(!is.matrix(x))x<-matl(x)
if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-ncol(x)
xbar<-vector("numeric",J)
x<-elimna(x)  # Remove missing values
nval<-nrow(x)
h1<-nrow(x)-2*floor(tr*nrow(x))
df<-h1-1
for(j in 1: J)xbar[j]<-mean(x[,j],tr)
if(sum(con^2!=0))CC<-ncol(con)
if(sum(con^2)==0)CC<-(J^2-J)/2
ncon<-CC
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
if(sum(con^2)==0){
flagcon<-T
psihat<-matrix(0,CC,5)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper"))
test<-matrix(NA,CC,6)
dimnames(test)<-list(NULL,c("Group","Group","test","p.value","p.crit","se"))
temp1<-0
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
q1<-(nrow(x)-1)*winvar(x[,j],tr)
q2<-(nrow(x)-1)*winvar(x[,k],tr)
q3<-(nrow(x)-1)*wincor(x[,j],x[,k],tr)$cov
sejk<-sqrt((q1+q2-2*q3)/(h1*(h1-1)))
if(!dif){
test[jcom,6]<-sejk
test[jcom,3]<-(xbar[j]-xbar[k])/sejk
temp1[jcom]<-2 * (1 - pt(abs(test[jcom,3]), df))
test[jcom,4]<-temp1[jcom]
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-(xbar[j]-xbar[k])
}
if(dif){
dv<-x[,j]-x[,k]
test[jcom,6]<-trimse(dv,tr)
temp<-trimci(dv,alpha=alpha/CC,pr=FALSE,tr=tr)
test[jcom,3]<-temp$test.stat
temp1[jcom]<-temp$p.value
test[jcom,4]<-temp1[jcom]
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-mean(dv,tr=tr)
psihat[jcom,4]<-temp$ci[1]
psihat[jcom,5]<-temp$ci[2]
}
}}}
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
if(sum(sigvec)<ncon){
dd<-ncon-sum(sigvec) #number that are sig.
ddd<-sum(sigvec)+1
zvec[ddd:ncon]<-dvec[ddd]
}
test[temp2,5]<-zvec
if(!dif){
psihat[,4]<-psihat[,3]-qt(1-alpha/(2*CC),df)*test[,6]
psihat[,5]<-psihat[,3]+qt(1-alpha/(2*CC),df)*test[,6]
}}
if(sum(con^2)>0){
if(nrow(con)!=ncol(x))warning("The number of groups does not match the number
 of contrast coefficients.")
ncon<-ncol(con)
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","p.value","p.crit","se"))
temp1<-NA
for (d in 1:ncol(con)){
psihat[d,1]<-d
if(!dif){
psihat[d,2]<-sum(con[,d]*xbar)
sejk<-0
for(j in 1:J){
for(k in 1:J){
djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1))
sejk<-sejk+con[j,d]*con[k,d]*djk
}}
sejk<-sqrt(sejk)
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
test[d,5]<-sejk
temp1[d]<-2 * (1 - pt(abs(test[d,2]), df))
}
if(dif){
for(j in 1:J){
if(j==1)dval<-con[j,d]*x[,j]
if(j>1)dval<-dval+con[j,d]*x[,j]
}
temp1[d]<-trimci(dval,tr=tr,pr=FALSE)$p.value
test[d,1]<-d
test[d,2]<-trimci(dval,tr=tr,pr=FALSE)$test.stat
test[d,5]<-trimse(dval,tr=tr)
psihat[d,2]<-mean(dval,tr=tr)
}}
test[,3]<-temp1
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2,3]>=zvec)
if(sum(sigvec)<ncon){
dd<-ncon-sum(sigvec) #number that are sig.
ddd<-sum(sigvec)+1
zvec[ddd:ncon]<-dvec[ddd]
}
test[temp2,4]<-zvec
psihat[,3]<-psihat[,2]-qt(1-test[,4]/2,df)*test[,5]
psihat[,4]<-psihat[,2]+qt(1-test[,4]/2,df)*test[,5]
}
if(flagcon)num.sig<-sum(test[,4]<=test[,5])
if(!flagcon)num.sig<-sum(test[,3]<=test[,4])
list(n=nval,test=test,psihat=psihat,con=con,num.sig=num.sig)
}

# to be consistent with other function names, also store rmmcp as wmcp
wmcp<-rmmcp

snmreg.sub<-function(X,theta){
np<-ncol(X)
p<-np-1
x<-X[,1:p]
y<-X[,np]
temp<-t(t(x)*theta)
yhat<-apply(temp,1,sum)
yhat<-yhat
res<-y-yhat
val<-pbvar(res)
val
}

tstsreg<-function(x,y,sc=pbvar,xout=FALSE,outfun=out,plotit=FALSE,...){
#
# Compute a modified Theil-Sen regression estimator.
# Use s-type initial estimate, eliminate points with
# outlying residuals, then do regular Theil-Sen
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
res=stsreg(x,y,sc=sc)$res
chk<-abs(res-median(res))/mad(res)
xx<-x[chk<=2,]
yy<-y[chk<=2]
temp<-tsreg(xx,yy)
list(coef=temp$coef,residuals=temp$res)
}

tssnmreg<-function(x,y,sc=pbvar,xout=FALSE,outfun=out,plotit=FALSE,...){
#
# Compute a modified Theil--Sen regression estimator.
# Use s-type initial estimate, eliminate points with
# outlying residuals, then do regular Theil--Sen
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
res=snmreg(x,y)$res
chk<-abs(res-median(res))/mad(res)
xx<-x[chk<=2,]
yy<-y[chk<=2]
temp<-tsreg(xx,yy)
list(coef=temp$coef,residuals=temp$res)
}

gyreg<-function(x,y,rinit=lmsreg,K=2.5){
xy=elimna(cbind(x,y))
p=ncol(as.matrix(x))
p1=p+1
x=xy[,1:p]
y=xy[,p1]
library(MASS)
res<-rinit(y~x)$res
res.scale<-abs(res)/mad(res)
flag<-(res.scale >=K)
i0<-sum(flag)
il<-length(y)-i0+1
res.sort<-sort(res.scale)
if(i0>0){
dval<-pnorm(res.sort[il:length(y)])-c(il:length(y))/length(y)
}
if(i0<=0)dval<-0
dval<-max(dval)
ndval<-floor(length(y)*dval)
if(ndval<0)ndval<-0
iup<-length(y)-ndval
rord<-order(res.scale)
flag<-rord[1:iup]
x=as.matrix(x)
temp<-lsfit(x[flag,],y[flag])
list(coef=temp$coef,res=temp$residual)
}



bwrmcp<-function(J,K,x,grp=NA,alpha=.05,bhop=F){
#
# Do all pairwise comparisons of
# main effects for Factor A and B and all interactions
# using a rank-based method that tests for equal distributions.
#
#  A between by within subjects design is assumed.
#  Levels of Factor A are assumed to be independent and
#  levels of Factor B are dependent.
#
#  The data are assumed to be stored in x in list mode or in a matrix.
#  If grp is unspecified, it is assumed x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#  x[[j+1]] is the data for level 2,1, etc.
#  If the data are in wrong order, grp can be used to rearrange the
#  groups. For example, for a two by two design, grp<-c(2,4,3,1)
#  indicates that the second group corresponds to level 1,1;
#  group 4 corresponds to level 1,2; group 3 is level 2,1;
#  and group 1 is level 2,2.
#
#   Missing values are automatically removed.
#
 if(is.list(x))xrem=matl(x)
        JK <- J * K
        if(is.matrix(x)){
                xrem=x
                x <- listm(x)
}

        if(!is.na(grp[1])) {
                yy <- x
                x<-list()
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
#        for(j in 1:JK) {
#                xx <- x[[j]]
#                x[[j]] <- xx[!is.na(xx)] # Remove missing values
#        }
        #
if(JK != length(x))warning("The number of groups does not match the number of contrast coefficients.")
for(j in 1:JK){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
#
CC<-(J^2-J)/2
# Determine critical values
ncon<-CC*(K^2-K)/2
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
Fac.A<-matrix(0,CC,5)
dimnames(Fac.A)<-list(NULL,c("Level","Level","test.stat","p-value","sig.crit"))
mat<-matrix(c(1:JK),ncol=K,byrow=T)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
ic<-ic+1
Fac.A[ic,1]<-j
Fac.A[ic,2]<-jj
datsub=xrem[,c(mat[j,],mat[jj,])]
datsub=elimna(datsub)
#temp<-bwrank(2,K,elimna(x[,c(mat[j,],mat[jj,])]))
temp<-bwrank(2,K,datsub)
Fac.A[ic,3]<-temp$test.A
Fac.A[ic,4]<-temp$p.value.A
}}}
temp2<-order(0-Fac.A[,4])
Fac.A[temp2,5]<-dvec[1:length(temp2)]
CCB<-(K^2-K)/2
ic<-0
Fac.B<-matrix(0,CCB,5)
dimnames(Fac.B)<-list(NULL,c("Level","Level","test.stat","p-value","sig.crit"))
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
Fac.B[ic,1]<-k
Fac.B[ic,2]<-kk
mat1<-cbind(mat[,k],mat[,kk])
rv=c(mat1[,1],mat1[,2])
datsub=elimna(xrem[,sort(c(mat1[,1],mat1[,2]))])
temp<-bwrank(J,2,datsub)
Fac.B[ic,3]<-temp$test.B
Fac.B[ic,4]<-temp$p.value.B
}}}
temp2<-order(0-Fac.B[,4])
Fac.B[temp2,5]<-dvec[1:length(temp2)]
CCI<-CC*CCB
Fac.AB<-matrix(0,CCI,7)
dimnames(Fac.AB)<-list(NULL,c("Lev.A","Lev.A","Lev.B","Lev.B","test.stat","p-value","sig.crit"))
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j < jj){
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
ic<-ic+1
Fac.AB[ic,1]<-j
Fac.AB[ic,2]<-jj
Fac.AB[ic,3]<-k
Fac.AB[ic,4]<-kk
val<-c(mat[j,k],mat[j,kk],mat[jj,k],mat[jj,kk])
val<-sort(val)
datsub=elimna(xrem[,val])
temp<-bwrank(2,2,datsub)
Fac.AB[ic,5]<-temp$test.AB
#Fac.AB[ic,6]<-temp$sig.AB
Fac.AB[ic,6]<-temp$p.value.AB
}}}}}}
temp2<-order(0-Fac.AB[,6])
Fac.AB[temp2,7]<-dvec[1:length(temp2)]
list(Factor.A=Fac.A,Factor.B=Fac.B,Factor.AB=Fac.AB)
}


ancbbmed<-function(x1,y1,x2,y2,fr1=1,fr2=1,nboot=100,pts=NA,plotit=TRUE,
SEED=TRUE,alpha=.05){
#
# Compare two independent  groups using an ancova method
# based in part on a bootstrap bagging estimate of the dependent variable.
# No assumption is made about the form of the regression
# lines--a running interval smoother is used.
# Confidence intervals are computed using a percentile bootstrap
# method. Comparisons are made at five empirically chosen design points.
#
#  Assume data are in x1 y1 x2 and y2
#
if(SEED)set.seed(2)
if(is.na(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,7)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi","p.value"))
gv1<-vector("list")
for (i in 1:5){
j<-i+5
temp1<-y1[near(x1,x1[isub[i]],fr1)]
temp2<-y2[near(x2,x1[isub[i]],fr2)]
temp1<-temp1[!is.na(temp1)]
temp2<-temp2[!is.na(temp2)]
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(temp1)
mat[i,3]<-length(temp2)
mat[,4]<-runmbo(x1,y1,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=tmean)-
runmbo(x2,y2,pts=x1[isub],pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median)
gv1[[i]]<-temp1
gv1[[j]]<-temp2
}
I1<-diag(5)
I2<-0-I1
con<-rbind(I1,I2)
estmat1<-matrix(nrow=nboot,ncol=length(isub))
estmat2<-matrix(nrow=nboot,ncol=length(isub))
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
#
for(ib in 1:nboot){
estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=x1[isub],
pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median)
estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=x1[isub],
pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median)
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
pvals=apply(dif,2,mean,na.rm=TRUE)+.5*apply(dif0,2,mean,na.rm=TRUE)
tmat<-rbind(pvals,1-pvals)
pvals=2*apply(tmat,2,min)
mat[,7]<-pvals
for(ij in 1:length(isub)){
dif<-estmat1[,ij]-estmat2[,ij]
dif<-elimna(dif)
nbad<-length(dif)
lo<-round(nbad*alpha/2)
hi<-nbad-lo
dif<-sort(dif)
mat[ij,5]<-dif[lo]
mat[ij,6]<-dif[hi]
}
}
if(!is.na(pts[1])){
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
if(n1[i]<=5)print(paste("Warning, there are",n1[i]," points corresponding to the design point X=",pts[i]))
if(n2[i]<=5)print(paste("Warning, there are",n2[i]," points corresponding to the design point X=",pts[i]))
}
mat<-matrix(NA,length(pts),7)
dimnames(mat)<-list(NULL,c("X","n1","n2","DIF","ci.low","ci.hi",
"p.value"))
gv<-vector("list",2*length(pts))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
j<-i+length(pts)
gv[[i]]<-g1
gv[[j]]<-g2
}
I1<-diag(length(pts))
I2<-0-I1
con<-rbind(I1,I2)
isub=c(1:length(pts))
estmat1<-matrix(nrow=nboot,ncol=length(isub))
estmat2<-matrix(nrow=nboot,ncol=length(isub))
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
est1=runmbo(x1,y1,pts=pts,pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median)
est2=runmbo(x2,y2,pts=pts,pyhat=TRUE,plotit=FALSE,SEED=FALSE,est=median)
mat[,4]<-est1-est2
for(ib in 1:nboot){
estmat1[ib,]=runmbo(x1[data1[ib,]],y1[data1[ib,]],pts=pts,pyhat=TRUE,plotit=FALSE,
SEED=FALSE,est=median)
estmat2[ib,]=runmbo(x2[data2[ib,]],y2[data2[ib,]],pts=pts,pyhat=TRUE,plotit=FALSE,
SEED=FALSE,est=median)
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
pvals=apply(dif,2,mean,na.rm=TRUE)+.5*apply(dif0,2,mean,na.rm=TRUE)
tmat<-rbind(pvals,1-pvals)
pvals=2*apply(tmat,2,min)
#
mat[,1]<-pts
mat[,2]<-n1
mat[,3]<-n2
mat[,7]<-pvals
for(ij in 1:length(pts)){
dif<-sort(estmat1[,ij]-estmat2[,ij])
dif<-elimna(dif)
nbad<-length(dif)
lo<-round(nbad*alpha/2)
hi<-nbad-lo
mat[ij,5]<-dif[lo]
mat[ij,6]<-dif[hi]
}
}
if(plotit)
runmean2g(x1,y1,x2,y2,fr=fr1,est=median,sm=T)
list(output=mat)
}






miss2na<-function(m,na.val=NULL){
#
# Convert any missing value, indicatd by na.val,
# to NA.
#
#  Example, if 999 is missing value, use miss2na(m,999)
#
if(is.null(na.val))stop("Specify a missing value")
if(is.vector(m)){
if(!is.list(m)){
flag=(m==na.val)
m[flag]=NA
}}
if(is.matrix(m)){
for(j in 1:ncol(m)){
x=m[,j]
flag=(x==na.val)
x[flag]=NA
m[,j]=x
}}
if(is.list(m)){
for(j in 1:length(m)){
x=m[[j]]
flag=(x==na.val)
x[flag]=NA
m[[j]]=x
}}
m
}

rm2mcp<-function(J,K,x,est=tmean,alpha=.05,grp=NA,dif=TRUE,nboot=NA,
plotit=FALSE,BA=FALSE,hoch=FALSE,...){
#
# This function performs multiple comparisons for
# dependent groups in a within by within designs.
# It creates the linear contrasts and call calls rmmcppb
# only it is assumed that main effects and interactions for a
# two-way design are to be tested.
#
        #   The data are assumed to be stored in x in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
        JK <- J * K
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JK) {
                xx <- x[[j]]
               # xx[[j]] <- xx[!is.na(xx)]
               x[[j]] <- xx[!is.na(xx)]
        }
        #
        # Create the three contrast matrices
        #
temp<-con2way(J,K)
conA<-temp$conA
conB<-temp$conB
conAB<-temp$conAB
        ncon <- max(nrow(conA), nrow(conB), nrow(conAB))
FacA<-rmmcppb(x,con=conA,est=est,plotit=plotit,dif=dif,grp=grp,
nboot=nboot,BA=TRUE,hoch=FALSE,...)
FacB<-rmmcppb(x,con=conB,est=est,plotit=plotit,dif=dif,grp=grp,
nboot=nboot,BA=TRUE,hoch=FALSE,...)
FacAB<-rmmcppb(x,con=conAB,est=est,plotit=plotit,dif=dif,grp=grp,
nboot=nboot,BA=TRUE,hoch=FALSE,...)
list(Factor.A=FacA,Factor.B=FacB,Factor.AB=FacAB)

}

dcov<-function(m,tr=.2,dop=1,cop=2,fast=FALSE,pr=TRUE){
#
# Compute multivariate measure of scatter
# using Donoho-Gasko method.
#
# dop=1, use fdepth to compute depths
# dop=2, use fdepthv2  to compute depths
# which is slower but more accurate.
#
# cop=1, use Donoho-Gasko median in fdepth
# cop=2, use MCD in fdepth
# cop=3, use marginal medians in fdepth
# cop=4, use MVE in fdepth
#
# fast=T reduces execution time tremendously.
# it requires the files:
# exefdepth and com.depth
# fast=T calls  FORTRAN function.
# This option runs on a UNIX machine that has FORTRAN.
# Whether it runs on a PC with FORTRAN installed has not
# been checked.
#
if(tr>=.5)stop("Amount of trimming must be less than .5")
if(is.list(m))m<-matl(m)
if(!is.matrix(m))stop("Data must be stored in a matrix or in list mode.")
if(ncol(m)==1){
if(tr<.5)val<-mean(m,tr)
}
if(ncol(m)>1){
temp<-NA
if(ncol(m)!=2){
# Use approximate depth
if(fast)temp<-fdepth.for(m,pr=FALSE,cop=cop)
if(!fast){
if(dop==1)temp<-fdepth(m,plotit=FALSE,cop=cop)
if(dop==2)temp<-fdepthv2(m)
}}
#  Use exact depth if ncol=2
if(ncol(m)==2){
if(fast)temp<-depth2.for(m,pr=FALSE,plotit=FALSE)
if(!fast){
for(i in 1:nrow(m))
temp[i]<-depth(m[i,1],m[i,2],m)
}}
mdep<-max(temp)
flag<-(temp==mdep)
flag2<-(temp>=tr)
if(sum(flag2)==0)stop("Trimmed all of the data")
if(sum(flag2)==1){
if(pr)print("Warning: Trimmed all but one point")
val<-0
}
if(sum(flag2)>1)val<-var(m[flag2,])
}
if(pr && fast)print(val)
val
}

medr<-function(x,est=median,alpha=.05,nboot=500,grp=NA,op=1,MM=FALSE,cop=3,pr=TRUE,
SEED=TRUE,...){
#
#   Test the hypothesis that the distribution for each pairwise
#   difference has a measure of location = 0
#   By default, the  median estimator is used
#
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode or in a matrix.
#   If stored in list mode,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#   If stored in a matrix, columns correspond to groups.
#
#   By default, all pairwise differences are used, but contrasts
#   can be specified with the argument con.
#   The columns of con indicate the contrast coefficients.
#   Con should have J rows, J=number of groups.
#   For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first
#   two measures of location is
#   equal to the sum of the second two, and (2) the difference between
#   the first two is equal to the difference between the
#   measures of location for groups 5 and 6.
#
#   The default number of bootstrap samples is nboot=500
#
#   op controls how depth is measured
#   op=1, Mahalanobis
#   op=2, Mahalanobis based on MCD covariance matrix
#   op=3, Projection distance
#   op=4, Projection distance using FORTRAN version
#
#   for arguments MM and cop, see pdis.
#
if(is.matrix(x)){
xx<-list()
for(i in 1:ncol(x)){
xx[[i]]<-x[,i]
}
x<-xx
}
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(grp)){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[1]]]
x<-xx
}
J<-length(x)
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
Jm<-J-1
d<-(J^2-J)/2
data<-list()
bvec<-matrix(NA,ncol=d,nrow=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
for(it in 1:nboot){
for(j in 1:J)data[[j]]<-sample(x[[j]],size=length(x[[j]]),replace=TRUE)
dval<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
dval<-dval+1
bvec[it,dval]<-loc2dif(data[[j]],data[[k]],est=est,...)
}}}}
output<-matrix(NA,nrow=d,ncol=3)
dimnames(output)<-list(NULL,c("Group","Group","psihat"))
tvec<-NA
dval<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
dval<-dval+1
output[dval,1]<-j
output[dval,2]<-k
tvec[dval]<-loc2dif(x[[j]],x[[k]],est=est,...)
output[dval,3]<-tvec[dval]
}}}
tempcen<-apply(bvec,1,mean)
vecz<-rep(0,d)
smat<-var(bvec-tempcen+tvec)
temp<-bvec-tempcen+tvec
bcon<-rbind(bvec,vecz)
if(op==1)dv<-mahalanobis(bcon,tvec,smat)
if(op==2){
smat<-cov.mcd(temp)$cov
dv<-mahalanobis(bcon,tvec,smat)
}
if(op==3){
print("Computing p-value. Might take a while with op=3")
dv<-pdis(bcon,MM=MM,cop=cop,center=tvec)
}
if(op==4)dv<-pdis.for(bcon,MM=MM,cop=cop,pr=FALSE,center=tvec)
bplus<-nboot+1
sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
if(op==4)print(sig.level)
list(sig.level=sig.level,output=output)
}

medind<-function(x,y,qval=.5,nboot=1000,com.pval=FALSE,SEED=TRUE,alpha=.05,pr=TRUE,
xout=FALSE,outfun=out,...){
#
# Test the hypothesis that the regression surface is a flat
# horizontal plane.
# The method is based on a modification of a method derived by
#  He and Zhu 2003, JASA, 98, 1013-1022.
# Here, resampling is avoided using approximate critical values if
# com.pval=F
#
#  critical values are available for 10<=n<=400, p=1,...,8 and
#  quantiles
#  qval=.25,.5, .75.
#
#  To get a p-value, via simulations, set  com.pval=T
#  nboot is number of simulations used to determine the p-value.
#
if(pr){
if(!com.pval)print("To get a p-value, set com.pval=T")
print("Reject if the test statistic exceeds the critical value")
if(length(y)>400)print("If n>400, current version requires com.pval=TRUE, resulting in high execution time")
}
#store.it=F
x<-as.matrix(x)
p<-ncol(x)
pp1<-p+1
p.val<-NULL
crit.val<-NULL
yx<-elimna(cbind(y,x)) #Eliminate missing values.
y<-yx[,1]
x<-yx[,2:pp1]
x<-as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
n<-length(y)
if(n>400)com.pval=T
if(qval==.5){
resmat1=matrix(c( 0.0339384580, 0.044080032, 0.050923441, 0.064172557,
 0.0153224731, 0.021007108, 0.027687963, 0.032785044,
 0.0106482053, 0.014777728, 0.018249546, 0.023638611,
 0.0066190573, 0.009078091, 0.011690825, 0.014543009,
 0.0031558563, 0.004374515, 0.005519069, 0.007212951,
 0.0015448987, 0.002231473, 0.002748314, 0.003725916,
 0.0007724197, 0.001021767, 0.001370776, 0.001818037),ncol=4,nrow=7,byrow=T)
resmat2=matrix(c(
 0.052847794, 0.061918744, 0.071346969, 0.079163419,
 0.021103277, 0.027198076, 0.031926052, 0.035083610,
 0.013720585, 0.018454145, 0.022177381, 0.026051716,
 0.008389969, 0.010590374, 0.012169233, 0.015346065,
 0.004261627, 0.005514060, 0.007132021, 0.008416836,
 0.001894753, 0.002416311, 0.003085230, 0.003924706,
 0.001045346, 0.001347837, 0.001579373, 0.001864344),ncol=4,nrow=7,byrow=T)
resmat3=matrix(c(
0.071555715, 0.082937665, 0.089554679, 0.097538044,
0.031060795, 0.035798539, 0.043862556, 0.053712151,
0.019503635, 0.023776479, 0.027180121, 0.030991367,
0.011030001, 0.013419347, 0.015557409, 0.017979524,
0.005634478, 0.006804788, 0.007878358, 0.008807657,
0.002552182, 0.003603778, 0.004275965, 0.005021989,
0.001251044, 0.001531919, 0.001800608, 0.002037870),ncol=4,nrow=7,byrow=T)
resmat4=matrix(c(
0.093267532, 0.101584002, 0.108733965, 0.118340448,
0.038677863, 0.045519806, 0.051402903, 0.060097046,
0.024205231, 0.029360145, 0.034267265, 0.039381482,
0.013739157, 0.015856343, 0.018065898, 0.019956084,
0.006467562, 0.007781030, 0.009037972, 0.010127143,
0.003197162, 0.003933525, 0.004656625, 0.005929469,
0.001652690, 0.001926060, 0.002363874, 0.002657071),ncol=4,nrow=7,byrow=T)
resmat5=matrix(c(
0.117216934, 0.124714114, 0.129458602, 0.136456163,
0.048838630, 0.055608712, 0.060580045, 0.067943676,
0.030594644, 0.035003872, 0.040433885, 0.047648696,
0.016940240, 0.019527491, 0.022047442, 0.025313443,
0.008053039, 0.009778574, 0.011490394, 0.013383628,
0.003760567, 0.004376294, 0.005097890, 0.005866240,
0.001894616, 0.002253522, 0.002612405, 0.002938808),ncol=4,nrow=7,byrow=T)
resmat6=matrix(c(
0.136961531, 0.144120225, 0.149003907, 0.152667432,
0.055909481, 0.062627211, 0.069978086, 0.081189957,
0.034634825, 0.040740587, 0.044161376, 0.047722045,
0.020165417, 0.023074738, 0.025881208, 0.028479913,
0.009436297, 0.011246968, 0.013220963, 0.015100546,
0.004644596, 0.005334418, 0.006040595, 0.007237195,
0.002277590, 0.002635712, 0.002997398, 0.003669488),ncol=4,nrow=7,byrow=T)
resmat7=matrix(c(
 0.156184672, 0.163226643, 0.171754686, 0.177142753,
 0.070117003, 0.077052773, 0.082728047, 0.090410797,
 0.041774517, 0.047379662, 0.053101833, 0.057674454,
 0.023384451, 0.026014421, 0.029609042, 0.032619018,
 0.010856382, 0.012567043, 0.013747870, 0.016257014,
 0.005164004, 0.006131755, 0.006868101, 0.008351046,
 0.002537642, 0.003044154, 0.003623654, 0.003974469),ncol=4,nrow=7,byrow=T)
resmat8=matrix(c(
0.178399742, 0.180006714, 0.193799396, 0.199585892,
0.078032767, 0.085624186, 0.091511226, 0.102491785,
0.045997886, 0.052181615, 0.057362163, 0.062630424,
0.025895739, 0.029733034, 0.033764463, 0.037873655,
0.012195876, 0.013663248, 0.015487587, 0.017717864,
0.005892418, 0.006876488, 0.007893475, 0.008520783,
0.002839731, 0.003243909, 0.003738571, 0.004124057),ncol=4,nrow=7,byrow=T)
crit5=array(cbind(resmat1,resmat2,resmat3,resmat4,resmat5,resmat6,resmat7,
resmat8),c(7,4,8))
flag=T
crit.val=NULL
if(p > 8)flag=F
if(n<10 || n>=400)flag=F
aval<-c(.1,.05,.025,.01)
aokay<-duplicated(c(alpha,aval))
if(sum(aokay)==0)flag=F
if(flag){
nalpha=c(0:4)
asel=c(0,aval)
ialpha=nalpha[aokay]
critit=crit5[,ialpha,p]
nvec<-c(10,20,30,50,100,200,400)
nval<-duplicated(c(n,nvec))
nval<-nval[2:8]
if(sum(nval)>0)crit.val<-critit[nval]
loc<-rank(c(n,nvec))
xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5)
yy<-c(critit[loc[1]-1],critit[loc[1]])
icoef<-tsp1reg(xx,yy)$coef
crit.val<-icoef[1]+icoef[2]/n^1.5
}}
mqval<-min(c(qval,1-qval))
if(mqval==.25){
resmat1=matrix(c(
 0.029933486, 0.0395983678, 0.054087714, 0.062961453,
 0.011122294, 0.0149893431, 0.018154062, 0.022685244,
 0.009207200, 0.0113020766, 0.014872309, 0.019930730,
 0.004824185, 0.0070402246, 0.010356886, 0.013176896,
 0.002370379, 0.0033146605, 0.004428004, 0.005122988,
 0.001106460, 0.0016110185, 0.001984450, 0.002650256,
 0.000516646, 0.0006796144, 0.000868751, 0.001202042),ncol=4,nrow=7,byrow=T)
resmat2=matrix(c(
0.0448417783, 0.0602598211, 0.066001091, 0.087040667,
0.0173410522, 0.0224713157, 0.027370822, 0.033435727,
0.0121205549, 0.0150409465, 0.018938516, 0.022643559,
0.0064894201, 0.0084611518, 0.010700320, 0.013232000,
0.0029734778, 0.0040641310, 0.004911086, 0.005769038,
0.0015149104, 0.0020584993, 0.002582982, 0.003114029,
0.0007984207, 0.0009929547, 0.001182739, 0.001398774),ncol=4,nrow=7,byrow=T)
resmat3=matrix(c(
0.0636530860, 0.072974943, 0.083840562, 0.097222407,
0.0216586978, 0.027436566, 0.031875356, 0.036830302,
0.0152898678, 0.018964066, 0.021728817, 0.028959751,
0.0083568493, 0.010071525, 0.012712862, 0.015254576,
0.0039033578, 0.004764140, 0.005577071, 0.006660322,
0.0019139215, 0.002343152, 0.002833612, 0.003465269,
0.0009598105, 0.001146689, 0.001355930, 0.001547572),ncol=4,nrow=7,byrow=T)
resmat4=matrix(c(
 0.085071252, 0.095947936, 0.104197413, 0.118449765,
 0.029503024, 0.034198704, 0.039543410, 0.045043759,
 0.019203266, 0.022768842, 0.026886843, 0.033481535,
 0.011440493, 0.013555017, 0.016138970, 0.018297815,
 0.004863139, 0.005756305, 0.007385239, 0.009114958,
 0.002635144, 0.003111160, 0.003769051, 0.004215897,
 0.001188837, 0.001435179, 0.001727871, 0.001956372),ncol=4,nrow=7,byrow=T)
resmat5=matrix(c(
0.102893512, 0.114258558, 0.122545016, 0.130222265,
0.036733497, 0.042504996, 0.048663576, 0.055456582,
0.024192946, 0.028805967, 0.032924489, 0.038209545,
0.012663224, 0.014635216, 0.017275594, 0.019736410,
0.006105572, 0.007310803, 0.008960242, 0.009745320,
0.003067163, 0.003614637, 0.003997615, 0.004812373,
0.001441008, 0.001732819, 0.002078651, 0.002307551),ncol=4,nrow=7,byrow=T)
resmat6=matrix(c(
0.117642769, 0.126566104, 0.133106804, 0.142280074,
0.044309420, 0.049731991, 0.053912739, 0.060512997,
0.028607224, 0.033826020, 0.038616476, 0.043546500,
0.015445120, 0.017557181, 0.020040720, 0.022747707,
0.007334749, 0.008406468, 0.009392098, 0.010919651,
0.003352200, 0.003814582, 0.004380562, 0.005252154,
0.001703698, 0.002001713, 0.002338651, 0.002772864),ncol=4,nrow=7,byrow=T)
resmat7=matrix(c(
0.106573121, 0.113058950, 0.117388191, 0.121286795,
0.052170054, 0.058363322, 0.064733684, 0.069749344,
0.030696897, 0.035506926, 0.039265698, 0.044437674,
0.016737307, 0.019605734, 0.021253610, 0.022922988,
0.007767232, 0.009231789, 0.010340874, 0.011471110,
0.003998261, 0.004590177, 0.005506926, 0.006217415,
0.001903372, 0.002174748, 0.002519055, 0.002858655),ncol=4,nrow=7,byrow=T)
resmat8=matrix(c(
 0.119571179, 0.126977461, 0.130120853, 0.133258294,
 0.059499563, 0.067185338, 0.071283297, 0.079430577,
 0.034310968, 0.039827130, 0.044451690, 0.048512464,
 0.018599530, 0.021093909, 0.023273085, 0.027471116,
 0.009135712, 0.010901687, 0.012288682, 0.013729545,
 0.004382249, 0.005191810, 0.005598429, 0.006484433,
 0.002196973, 0.002525918, 0.002818550, 0.003242426),ncol=4,nrow=7,byrow=T)
crit5=array(cbind(resmat1,resmat2,resmat3,resmat4,resmat5,resmat6,resmat7,
resmat8),c(7,4,8))
flag=T
crit.val=NULL
if(p > 8)flag=F
if(n<10 || n>=400)flag=F
aval<-c(.1,.05,.025,.01)
aokay<-duplicated(c(alpha,aval))
if(sum(aokay)==0)flag=F
if(flag){
nalpha=c(0:4)
asel=c(0,aval)
ialpha=nalpha[aokay]
critit=crit5[,ialpha,p]
nvec<-c(10,20,30,50,100,200,400)
nval<-duplicated(c(n,nvec))
nval<-nval[2:8]
if(sum(nval)>0)crit.val<-critit[nval,p]
loc<-rank(c(n,nvec))
xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5)
yy<-c(critit[loc[1]-1],critit[loc[1]])
icoef<-tsp1reg(xx,yy)$coef
crit.val<-icoef[1]+icoef[2]/n^1.5
}}
if(is.null(crit.val))com.pval=T
# no critical value found, so a p-value will be computed
# the code for checking the file medind.crit, which appears
# next, is not working yet.
gdot<-cbind(rep(1,n),x)
gdot<-ortho(gdot)
x<-gdot[,2:pp1]
x<-as.matrix(x)
coef<-NULL
if(qval==.5)coef<-median(y)
if(qval==.25)coef<-idealf(y)$ql
if(qval==.75)coef<-idealf(y)$qu
if(is.null(coef))coef<-qest(y,q=qval)
res<-y-coef
psi<-NA
psi<-ifelse(res>0,qval,qval-1)
rnmat<-matrix(0,nrow=n,ncol=pp1)
ran.mat<-apply(x,2,rank)
flagvec<-apply(ran.mat,1,max)
for(j in 1:n){
flag<-ifelse(flagvec<=flagvec[j],T,F)
flag<-as.numeric(flag)
rnmat[j,]<-apply(flag*psi*gdot,2,sum)
}
rnmat<-rnmat/sqrt(n)
temp<-matrix(0,pp1,pp1)
for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,])
temp<-temp/n
test<-max(eigen(temp)$values)
if(com.pval){
if(SEED)set.seed(2)
p.val<-0
rem<-0
for(i in 1:nboot){
yboot<-rnorm(n)
if(p==1)xboot<-rnorm(n)
if(p>1)xboot<-rmul(n,p=p)
temp3<-medindsub(x,yboot,qval=qval)
if(test>=temp3)p.val<-p.val+1
rem[i]<-temp3
}
ic10<-round(.9*nboot)
ic05<-round(.95*nboot)
ic025<-round(.975*nboot)
ic001<-round(.99*nboot)
rem<-sort(rem)
p.val<-1-p.val/nboot
# now remember the critical values by storing them in "medind.crit"
#if(store.it)
#write(c(n,p,qval,rem[ic10],rem[ic05],rem[ic025],rem[ic001]),"medind.crit",
#append=T,ncolumns=7)
print("The .1, .05, .025 and .001 critical values are:")
print(c(rem[ic10],rem[ic05],rem[ic025],rem[ic001]))
crit.val<-rem[ic05]
}
names(crit.val)=""
Decision="Fail To Reject"
if(test>=crit.val)Decision="Reject"
list(test.stat=test,crit.value=crit.val,p.value=p.val,Decision=Decision)
}


medindsub<-function(x,y,qval=.5){
#
x<-as.matrix(x)
n<-length(y)
p<-ncol(x)
pp1<-p+1
tvec<-c(qval,0-qval,1-qval,qval-1)
pval<-c((1-qval)/2,(1-qval)/2,qval/2,qval/2)
gdot<-cbind(rep(1,n),x)
gdot<-ortho(gdot)
x<-gdot[,2:pp1]
x<-as.matrix(x)
if(qval==.5)coef<-median(y)
if(qval!=.5)coef<-qest(y)
res<-y-coef
psi<-NA
psi<-ifelse(res>0,qval,qval-1)
rnmat<-matrix(0,nrow=n,ncol=pp1)
ran.mat<-apply(x,2,rank)
flagvec<-apply(ran.mat,1,max)
for(j in 1:n){
#flag<-ifelse(flagvec<=flagvec[j],T,F)
flag<-ifelse(flagvec>=flagvec[j],T,F)
rnmat[j,]<-apply(flag*psi*gdot,2,sum)
}
rnmat<-rnmat/sqrt(n)
temp<-matrix(0,pp1,pp1)
for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,])
temp<-temp/n
test<-max(eigen(temp)$values)
test
}
linplot<-function(x,con=0,plotfun=akerd,nboot=800,plotit=TRUE,pyhat=FALSE,...){
#
#  plot distribtion of the linear contrast
#  c_1X_2+c_2X_2+...
#
#  con contains contrast coefficients. If not specified,
#  con<-c(1,1,...,1)
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
J<-length(x)
tempn<-0
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
}
Jm<-J-1
#
# Determine contrast matrix
# If not specified, assume distribution of the sum is to be plotted
#
if(sum(con^2)==0)con<-matrix(1,J,1)
bvec<-matrix(NA,nrow=J,ncol=nboot)
for(j in 1:J){
data<-matrix(sample(x[[j]],size=nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-data
}
bcon<-t(con)%*%bvec #ncon by nboot matrix
bcon<-as.vector(bcon)
dval<-plotfun(bcon,pyhat=pyhat,...)
dval
}
lin2plot<-function(x,con,op=4,nboot=800,plotit=TRUE,pyhat=F){
#
#  plot two distribtions.
#   The first is the distribtion  of the linear contrast
#  c_1X_2+c_2X_2+... c_i>0
#  and the second is the distribution of c_1X_2+c_2X_2+... c_i<0
#
#  con contains contrast coefficients. If not specified,
#  function terminates.
#
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
J<-length(x)
if(J != length(con)){
stop("Number of contrast coefficients must equal the number of groups")
}
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
#
# Determine contrast matrix for positive contrast coefficients
#
flag<-(con<0)
con1<-con
con1[flag]<-0
# Determine contrast matrix for negative contrast coefficients
flag<-(con>0)
con2<-con
con2[flag]<-0
bvec<-matrix(NA,nrow=J,ncol=nboot)
for(j in 1:J){
data<-matrix(sample(x[[j]],size=nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-data
}
bcon1<-t(con1)%*%bvec
bcon2<-t(con2)%*%bvec
bcon1<-as.vector(bcon1)
bcon2<-as.vector(bcon2)
fval<-g2plot(bcon1,bcon2,op=op,rval=15,fr=0.8,aval=0.5,xlab="X",ylab="")
fval
}
adrunl<-function(x,y,est=tmean,iter=10,pyhat=FALSE,plotit=TRUE,fr=.8,
xlab="x1",ylab="x2",zlab="",theta=50,phi=25,expand=.5,scale=FALSE,
zscale=TRUE,xout=FALSE,outfun=out,ticktype="simple",...){
#
# additive model based on running interval smoother
# and backfitting algorithm
#
m<-elimna(cbind(x,y))
x<-as.matrix(x)
p<-ncol(x)
if(p==1)val<-lplot(x[,1],y,pyhat=TRUE,plotit=plotit,span=fr)$yhat.values
if(p>1){
library(MASS)
library(akima)
np<-p+1
x<-m[,1:p]
y<-m[,np]
fhat<-matrix(NA,ncol=p,nrow=length(y))
fhat.old<-matrix(NA,ncol=p,nrow=length(y))
res<-matrix(NA,ncol=np,nrow=length(y))
dif<-1
for(i in 1:p)
fhat.old[,i]<-lplot(x[,i],y,pyhat=TRUE,plotit=FALSE,span=fr)$yhat.values
eval<-NA
for(it in 1:iter){
for(ip in 1:p){
res[,ip]<-y
for(ip2 in 1:p){
if(ip2 != ip)res[,ip]<-res[,ip]-fhat.old[,ip2]
}
fhat[,ip]<-lplot(x[,ip],res[,ip],pyhat=TRUE,plotit=FALSE,span=fr)$yhat.values
}
eval[it]<-sum(abs(fhat/sqrt(sum(fhat^2))-fhat.old/sqrt(sum(fhat.old^2))))
if(it > 1){
itm<-it-1
dif<-abs(eval[it]-eval[itm])
}
fhat.old<-fhat
if(dif<.01)break
}
val<-apply(fhat,1,sum)
aval<-est(y-val,...)
val<-val+aval
if(plotit && p==2){
fitr<-val
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fitr<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand,
scale=scale,ticktype=ticktype)
}}
if(!pyhat)val<-"Done"
val
}




Rpca<-function(x,p=ncol(x)-1,locfun=llocv2,loc.val=NULL,iter=100,SCORES=FALSE,
gvar.fun=cov.mba,SEED=TRUE,...){
#
# Robust PCA using random orthogonal matrices and
# robust generalized variance method
#
#  locfun, by default, use the marginal medians
#  alternatives are mcd, tauloc, spat,...
#
if(SEED)set.seed(2)
x<-elimna(x)
n<-nrow(x)
m<-ncol(x)
if(is.null(loc.val))info<-locfun(x,...)$center
if(!is.null(loc.val))info<-loc.val
for(i in 1:n)x[i,]<-x[i,]-info
vals<-NA
z<-matrix(nrow=n,ncol=p)
bval<-array(NA,c(p,m,iter))
for(it in 1:iter){
B<-matrix(runif(p*m),nrow=p,ncol=m)
B <- t(ortho(t(B))) # so rows are orthogonal
bval[,,it]<-B
for(i in 1:n)z[i,]<-B%*%as.matrix(x[i,])
#vals[it]<-gvar(z)
vals[it]<-gvarg(z,var.fun=gvar.fun)
}
iord<-order(vals)
Bop<-0-bval[,,iord[iter]]
zval<-NULL
if(SCORES){
for(i in 1:n)z[i,]<-Bop%*%as.matrix(x[i,])
zval<-z
}
list(B=Bop,gen.var=vals[iord[iter]],scores=zval)
}

Rsq<-function(x,y){
res=lsfit(x,y)$residuals
yhat=y-res
rsq=var(yhat)/var(y)
rsq
}

ols<-function(x,y,xout=FALSE,outfun=outpro,plotit=FALSE,xlab='X',ylab='Y',zlab='Z',...){
#
# Performs OLS regression calling built-in R function.
#
# xout=T will eliminate any leverage points (outliers among x values)
# if one predictor,
# plotit=TRUE will plot the points and the regression line
#
m<-elimna(cbind(x,y))
n=nrow(m)
n.keep=n
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
x<-m[,1:p]
y<-m[,pp]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,pp]
n.keep=length(y)
}
x<-as.matrix(x)
temp<-summary(lm(y~x))
coef<-temp[4]$coefficients

if(plotit){
if(p==1){
plot(x,y,xlab=xlab,ylab=ylab)
abline(coef[,1])
}
if(p==2){
regp2plot(x,y,regfun=ols,xlab=xlab,ylab=ylab,zlab=zlab)
}}
Ftest<-temp[10]$fstatistic
Ftest.p.value<-1-pf(Ftest[1],Ftest[2],Ftest[3])
Rval=Rsq(x,y)
list(n=n,n.keep=n.keep,summary=coef,coef=coef[,1],Ftest.p.value=Ftest.p.value,R.squared=Rval)
}

olstest<-function(x,y,nboot=500,SEED=TRUE,RAD=TRUE,xout=FALSE,outfun=out,...){
#
# Test the hypothesis that all OLS slopes are zero.
# Heteroscedasticity is allowed.
#
# RAD=T: use Rademacher function to generate wild bootstrap values.
# RAD=F, use standardized uniform distribution.
#
if(SEED)set.seed(2)
m<-elimna(cbind(x,y))
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
x<-m[,1:p]
y<-m[,pp]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,pp]
}
x<-as.matrix(x)
temp<-lsfit(x,y)
yhat<-mean(y)
res<-y-yhat
test<-sum(temp$coef[2:pp]^2)
print("Taking bootstrap sample, please wait.")
if(RAD)data<-matrix(ifelse(rbinom(length(y)*nboot,1,0.5)==1,-1,1),nrow=nboot)
if(!RAD){
data<-matrix(runif(length(y)*nboot),nrow=nboot)#
data<-(data-.5)*sqrt(12) # standardize the random numbers.
}
rvalb<-apply(data,1,olstests1,yhat,res,x)
p.val<-sum(rvalb>=test)/nboot
list(p.value=p.val)
}

qrchkv2<-function(x,y,qval=.5,...){
#
# Test of a linear fit based on quantile regression
# The method stems from He and Zhu 2003, JASA, 98, 1013-1022.
# Here, resampling is avoided using approximate critical values if
# com.pval=F
#
#  To get a p-value, via simulations, set  com.pval=T
#  nboot is number of simulations used to determine p-value.
#  Execution time can be quite high
#
#  This function quickly determines .1, .05, .025 and .01
#  critical values for n<=400 and p<=6 (p= number of predictors)
#  and when dealing with the .5 quantile.
#  Otherwise, critical values are determined via simulations, which
#  can have high execution time.
#
#  But, once critical values are determined for a given n, p and
#  quantile qval, the function will remember these values and use them
# in the future. They are stored in a file called qrchk.crit
# Currently, however, when you source the Rallfun files, these values
#  will be lost. You might save the file qrchk.crit in another file,
# source Rallfun, then copy the save file back to qrchk.crit
#
x=as.matrix(x)
p<-ncol(x)
pp1<-p+1
yx<-elimna(cbind(y,x)) #Eliminate missing values.
y<-yx[,1]
x<-yx[,2:pp1]
store.it=F
x<-as.matrix(x)
p.val<-NULL
crit.val<-NULL
x<-as.matrix(x)
# shift the  marginal x values so that the test statistic is
# invariant under changes in location
n<-length(y)
x=standm(x)
gdot<-cbind(rep(1,n),x)
gdot<-ortho(gdot)
x<-gdot[,2:pp1]
x<-as.matrix(x)
temp<-rqfit(x,y,qval=qval,res=T)
coef<-temp$coef
psi<-NA
psi<-ifelse(temp$residuals>0,qval,qval-1)
rnmat<-matrix(0,nrow=n,ncol=pp1)
ran.mat<-apply(x,2,rank)
flagvec<-apply(ran.mat,1,max)
for(j in 1:n){
flag<-ifelse(flagvec<=flagvec[j],T,F)
flag<-as.numeric(flag)
rnmat[j,]<-apply(flag*psi*gdot,2,sum)
}
rnmat<-rnmat/sqrt(n)
temp<-matrix(0,pp1,pp1)
for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,])
temp<-temp/n
test<-max(eigen(temp)$values)
test
}

sm2str<-function(xx,y,iv=c(1,2),nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,
STAND=FALSE,...){
#
# Compare robust measures of association of two predictors
# based on a smooth
#
if(!is.matrix(xx))stop("x should be a matrix with 2 or more columns")
if(ncol(xx)<2)stop("x should be a matrix with 2 or more columns")
val1=NA
val2=NA
x=xx[,iv]
xy=elimna(cbind(x,y))
x=xy[,1:2]
y=xy[,3]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(SEED)set.seed(2)
data1<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec1=apply(data1,1,sm2str.sub,x[,1],y) #  2 by nboot matrix
bvec2=apply(data2,1,sm2str.sub,x[,2],y) #  2 by nboot matrix
bvecd=bvec1-bvec2
pv=akerdcdf(bvecd,pts=0)
vcor=cor(x,method="kendall")
pv=2*min(c(pv,1-pv))
p.crit=.25*abs(vcor[1,2])+.05+(100-length(y))/10000
p.crit=max(c(.05,p.crit))
list(p.value=pv,p.crit=p.crit)
}

sm2str.sub<-function(isub,x,y){
xmat<-x[isub]
val1<-lplot(xmat,y[isub],plotit=FALSE)$Explanatory.power
val1
}

akerdcdf<-function(xx,hval=NA,aval=.5,op=1,fr=.8,pyhat=TRUE,pts=0,plotit=FALSE,
xlab="",ylab=""){
#
# Compute cumulative adaptive kernel density estimate
# for univariate data
# (See Silverman, 1986)
# By default (univiate case) determine P(X<=pts),
# pts=0 by default.
#
# op=1 Use expected frequency as initial estimate of the density
# op=2 Univariate case only
#      Use normal kernel to get initial estimate of the density
#
fval<-"Done"
if(is.matrix(xx)){
if(ncol(xx)>1)fval<-akerdmul(xx,pts=pts,hval=hval,aval=aval,fr=fr,pr=pyhat,plotit=plotit)
plotit<-F
}
if(is.matrix(xx) && ncol(xx)==1)xx<-xx[,1]
if(!is.matrix(xx)){
x<-sort(xx)
if(op==1){
m<-mad(x)
if(m==0){
temp<-idealf(x)
m<-(temp$qu-temp$ql)/(qnorm(.75)-qnorm(.25))
}
if(m==0)m<-sqrt(winvar(x)/.4129)
if(m==0)stop("All measures of dispersion are equal to 0")
fhat <- rdplot(x,pyhat=TRUE,plotit=FALSE,fr=fr)
if(m>0)fhat<-fhat/(2*fr*m)
}
if(op==2){
init<-density(xx)
fhat <- init$y
x<-init$x
}
n<-length(x)
if(is.na(hval)){
sig<-sqrt(var(x))
temp<-idealf(x)
iqr<-(temp$qu-temp$ql)/1.34
A<-min(c(sig,iqr))
if(A==0)A<-sqrt(winvar(x))/.64
hval<-1.06*A/length(x)^(.2)
# See Silverman, 1986, pp. 47-48
}
gm<-exp(mean(log(fhat[fhat>0])))
alam<-(fhat/gm)^(0-aval)
dhat<-NA
if(is.na(pts[1]))pts<-x
pts<-sort(pts)
for(j in 1:length(pts)){
temp<-(pts[j]-x)/(hval*alam)
sq5=0-sqrt(5)
epan=.75*(temp-.2*temp^3/3)/sqrt(5)-.75*(sq5-.2*sq5^3/3)/sqrt(5)
flag=(temp>=sqrt(5))
epan[flag]=1
flag=(temp<sq5)
epan[flag]=0
dhat[j]<-mean(epan)
}
if(plotit){
plot(pts,dhat,type="n",ylab=ylab,xlab=xlab)
lines(pts,dhat)
}
if(pyhat)fval<-dhat
}
fval
}


epmod<-function(x,y,smfun=lplot,xout=FALSE,outfun=outpro,STAND=FALSE,...){
#
# Estimates explanatory power, via a smoother, for all possible
# subsets of the p predictors. Currently limited to p<=5
# By default, use lowess. (smfun=lplot)
#
x<-as.matrix(x)
d<-ncol(x)
p1<-d+1
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
y<-temp[,p1]
x<-as.matrix(x)
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:d]
y<-m[,p1]
}
x<-as.matrix(x)
model<-modgen(d)
mout<-matrix(NA,length(model),2,dimnames=list(NULL,c("Model",
"Explanatory power")))
for (imod in 1:length(model)){
mout[imod,1]<-imod
mout[imod,2]<-smfun(x[,model[[imod]]],y,plotit=FALSE,...)$Explanatory.power
}
list(models=model,Explanatory.power=mout)
}
resdepth<-function(x,res)
{
##########################################################################
# This function computes the regression depth of a regression line based
# on its residuals. The fit could be, for example, a nonparametric
# regression or smooth.
#
# The algorithm is based on a simple modification of
#
#           Rousseeuw, P.J. and Hubert, M. (1996),
#           Regression Depth, Technical report, University of Antwerp
#
##########################################################################
        if(!is.vector(x)) stop("x should be a vector")
        n <- length(x)
        if(n < 2)
                stop("you need at least two observations")
flag=is.na(res)
x=x[!flag]
res[!flag]
xord=order(x)
x=x[xord]
res=res[xord]
        posres <- res >= 0
        negres <- res <= 0
        lplus <- cumsum(posres)
        rplus <- lplus[n] - lplus
        lmin <- cumsum(negres)
        rmin <- lmin[n] - lmin
        depth <- pmin(lplus + rmin, rplus + lmin)
        min(depth)
}
depthcom<-function(x1,y1,x2,y2,est=tmean,fr=1){
temp1=depthcomsub(x1,y1,x2,y2,est=est,fr=fr)
temp2=depthcomsub(x2,y2,x1,y1,est=est,fr=fr)
dep=max(c(abs(temp1$dep1-temp1$dep2),abs(temp2$dep1-temp2$dep2)))
dep
}
depthcomsub<-function(x1,y1,x2,y2,est=tmean,fr=1){
x1=(x1-median(x1))/mad(x1)
x2=(x2-median(x2))/mad(x2)
yh1=runhat(x1,y1,est=tmean,fr=fr)
yh2=runhat(x2,y2,pts=x1,est=tmean,fr=fr)
flag=is.na(yh2)
res1=y1-yh1
res2=y1[!flag]-yh2[!flag]
dep1=resdepth(x1,res1)
dep2=resdepth(x1[!flag],res2)
list(dep1=dep1,dep2=dep2)
}

ancsm<-function(x1,y1,x2,y2,crit.mat=NULL,nboot=200,SEED=TRUE,REP.CRIT=FALSE,LP=TRUE,
est=tmean,fr=NULL,plotit=TRUE,sm=FALSE,xout=FALSE,outfun=out,xlab="X",ylab="Y",...){
#
# Compare two nonparametric
# regression lines corresponding to two independent groups
#  using the depths of smooths.
# One covariate only is allowed.
#
# A running interval smoother is used.
#
#  sm=T will create smooths using bootstrap bagging.
#
if(ncol(as.matrix(x1))>1)stop("One covariate only is allowed")
if(xout){
flag1=outfun(x1,...)$keep
flag2=outfun(x2,...)$keep
x1=x1[flag1]
y1=y1[flag1]
x2=x2[flag2]
y2=y2[flag2]
}
xy=elimna(cbind(x1,y1))
x1=xy[,1]
xord=order(x1)
x1=x1[xord]
y1=xy[xord,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
xord=order(x2)
x2=x2[xord]
y2=xy[xord,2]
n1=length(y1)
n2=length(y2)
if(is.null(fr)){
fr=1
if(min(n1,n2)>150)fr=.2
if(max(n1,n2)<35)fr=.5
}
if(SEED)set.seed(2)
if(is.null(crit.mat[1])){
crit.val=NA
yall=c(y1,y2)
xall=c(x1,x2)
nn=n1+n2
il=n1+1
for(i in 1:nboot){
data=sample(nn,nn,TRUE)
yy1=yall[data[1:n1]]
yy2=yall[data[il:nn]]
xx1=xall[data[1:n1]]
xx2=xall[data[il:nn]]
crit.mat[i]=depthcom(xx1,yy1,xx2,yy2,est=est,fr=fr)
}}
if(plotit)runmean2g(x1,y1,x2,y2,fr=fr,est=est,sm=sm,xlab=xlab,ylab=ylab,LP=LP,...)
dep=depthcom(x1,y1,x2,y2,est=est,fr=fr)
n=min(n1,n2)
pv=1-mean(crit.mat<dep)
if(!REP.CRIT)crit.mat=NULL
list(p.value=pv,crit.mat=crit.mat,test.depth=dep)
}


ts2str.sub<-function(isub,x,y){
val1<-tsreg(x[isub],y[isub])$Explanatory.Power
val1
}

ts2str<-function(xx,y,iv=c(1,2),nboot=400,SEED=TRUE){
#
# Compare strength of association of two predictors via the Theil-Sen
# estimator.
#
if(!is.matrix(xx))stop("x should be a matrix with 2 or more columns")
if(ncol(xx)<2)stop("x should be a matrix with 2 or more columns")
val1=NA
val2=NA
if(SEED)set.seed(2)
x=xx[,iv]
xy=elimna(cbind(x,y))
x=xy[,1:2]
y=xy[,3]
data1<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data1,1,ts2str.sub,x[,1],y) #  2 by nboot matrix
bvec2<-apply(data2,1,ts2str.sub,x[,2],y) #  2 by nboot matrix
bvec=bvec1-bvec2
pv=akerdcdf(bvec,pts=0)
temp=corb(x[,1],x[,2],corfun=tau,SEED=F)
padj=.05
if(temp$p.value<=.05)padj=.352*abs(temp$cor.est)+.049
pv=2*min(c(pv,1-pv))
list(p.value=pv,p.crit=padj)
}

tsplitbt<-function(J,K,x,tr=.2,alpha=.05,JK=J*K,grp=c(1:JK),nboot=599,
SEED=TRUE,monitor=F){
#
# A bootstrap-t for performing a split-plot design
# with trimmed means.
# By default, 20% trimming is used with B=599 bootstrap samples.
#
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(SEED)set.seed(2)
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x)) {
y <- list()
ik=0
il=c(1:K)-K
for(j in 1:J){
il=il+K
zz=x[,il]
zz=elimna(zz)
for(k in 1:K){
ik=ik+1
y[[ik]]=zz[,k]
}}
                x <- y
}
JK<-J*K
data<-list()
xcen<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
xcen[[j]]<-data[[j]]-mean(data[[j]],tr) # Centered data
}
x<-data
#
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[j]])
}
if(min(nvec)<10){
print("Warning: with small sample sizes, a bootstrap-t method can")
print("result in estimated variances equal to zero, resulting in")
print("this function terminating and giving no results and peculiar error messages.")
}
blist<-list()
print("Taking bootstrap samples. Please wait.")
testmat<-matrix(NA,ncol=3,nrow=nboot)
for(iboot in 1:nboot){
iv<-0
for(j in 1:J){
temp<-sample(nvec[j],replace = T)
for(k in 1:K){
iv<-iv+1
tempx<-xcen[[iv]]
blist[[iv]]<-tempx[temp]
}}
if(monitor)print(paste("Bootstrap iteration" ,iboot, "is complete"))
btest<-tsplit(J,K,blist,tr)
testmat[iboot,1]<-btest$Qa
testmat[iboot,2]<-btest$Qb
testmat[iboot,3]<-btest$Qab
}
lcrit<-round((1-alpha)*nboot)
temp3<-sort(testmat[,1])
crit.Qa<-temp3[lcrit]
temp3<-sort(testmat[,2])
crit.Qb<-temp3[lcrit]
temp3<-sort(testmat[,3])
crit.Qab<-temp3[lcrit]
temp4<-tsplit(J,K,x,tr=tr)
list(Qa=temp4$Qa,Qb=temp4$Qb,Qab=temp4$Qab,crit.Qa=crit.Qa,crit.Qb=crit.Qb,crit.Qab=crit.Qab)
}
ogkcor<-function(x,y=NA,n.iter=1,sigmamu=taulc,v=gkcov,beta=.9,...){
#
# Compute robust (weighted) correlation matrix in Maronna and Zamar
# (2002, Technometrics, eq. 7).
#
# n.iter number of iterations. 1 seems to be best
# sigmamu computes a robust measure of location and scale for
#  data stored in a single vector.
#  v robust correlation coefficient
#  estloc, a robust measure of location
#
if(!is.na(y[1]))x<-cbind(x,y)
if(!is.matrix(x))stop("x should be a matrix")
x<-elimna(x)
n<-nrow(x)
p<-ncol(x)
val<-matrix(NA,p,p)
temp<-ogk(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)$cov
J=(p^2-p)/2
p.values=matrix(NA,nrow=J,ncol=3)
info=matrix(NA,nrow=J,ncol=4)
dimnames(p.values)<-list(NULL,c("VAR","VAR","p.value"))
dimnames(info)<-list(NULL,c("VAR","VAR","COR","Test.Stat"))
ic=0
for(j in 1:p){
for(k in 1:p){
val[j,k]<-temp[j,k]/sqrt(temp[k,k]*temp[j,j])
test.stat<-abs(val*sqrt((n-2)/(1-val^2)))
if(j<k){
test=test.stat[j,k]
ic=ic+1
p.values[ic,1]=j
p.values[ic,2]=k
info[ic,1]=j
info[ic,2]=k
info[ic,3]=val[j,k]
info[ic,4]=test
p.value=c("Greater than .1")
crit<-4.8/n+2.72
if(test>=crit)p.value<-c("Less than .1")
crit<-15.49/n+2.68
if(test>=crit)p.value<-c("Less than .05")
crit<-14.22/n+3.26
if(test>=crit)p.value<-c("Less than .025")
crit<-24.83/n+3.74
if(test>=crit)p.value<-c("Less than .01")
p.values[ic,3]=p.value
}}}
list(cor=val,test.results=info,p.values=p.values)
}


resdepth.sub<-function(x,res)
{
##########################################################################
# This function computes the regression depth of a regression line based
# on its residuals. The fit could be, for example, a nonparmatric
# regression or smooth.
#
# The algorithm is based on a simple modification of
#
#           Rousseeuw, P.J. and Hubert, M. (1996),
#           Regression Depth, Technical report, University of Antwerp
#
##########################################################################
        if(!is.vector(x)) stop("x should be vectors")
        n <- length(x)
        if(n < 2)
                stop("you need at least two observations")
flag=is.na(res)
x=x[!flag]
res[!flag]
xord=order(x)
x=x[xord]
res=res[xord]
        posres <- res >= 0
        negres <- res <= 0
        lplus <- cumsum(posres)
        rplus <- lplus[n] - lplus
        lmin <- cumsum(negres)
        rmin <- lmin[n] - lmin
        depth <- pmin(lplus + rmin, rplus + lmin)
        min(depth)
}
tbs <- function(x,eps=1e-3,maxiter=20,r=.45,alpha=.05,init.est=cov.mcd){
#        Rocke's contrained s-estimator
#
#      r=.45 is the breakdown point
#      alpha=.05 is the asymptotic rejection probability.
#
library(MASS)
#if(!is.matrix(x))stop("x should be a matrix with two or more columns")
x<-elimna(x)
x=as.matrix(x)
    n <- nrow(x)
    p <- ncol(x)
LIST=F
if(p==1){
LIST=T
p=2
x=cbind(x,rnorm(nrow(x)))
# Yes, this code is odd, but for moment easiest way of handling p=1
}
temp<-init.est(x)
#  very poor outside rate per obs under normality.
t1<-temp$center
s<-temp$cov
#if(p==1)stop("x should be a matrix with two or more columns")
c1M<-cgen.bt(n,p,r,alpha,asymp=FALSE)
c1<-c1M$c1
if(c1==0)c1<-.001 #Otherwise get division by zero
M<-c1M$M
    b0 <- erho.bt(p,c1,M)
    crit <- 100
    iter <- 1
    w1d <- rep(1,n)
    w2d <- w1d
    while ((crit > eps)&(iter <= maxiter))
    {
        t.old <- t1
        s.old <- s
        wt.old <- w1d
        v.old <- w2d
        d2 <- mahalanobis(x,center=t1,cov=s)
        d <- sqrt(d2)
        k <- ksolve.bt(d,p,c1,M,b0)
        d <- d/k
        w1d <- wt.bt(d,c1,M)
        w2d <- v.bt(d,c1,M)
        t1 <- (w1d %*% x)/sum(w1d)
        s <- s*0
        for (i in 1:n)
        {
            xc <- as.vector(x[i,]-t1)
            s <- s + as.numeric(w1d[i])*(xc %o% xc)
        }
        s <- p*s/sum(w2d)
        mnorm <- sqrt(as.vector(t.old) %*% as.vector(t.old))
        snorm <- eigen(s.old)$values[1]
        crit1 <- max(abs(t1 - t.old))
#        crit <- max(crit1,crit2)
        crit <- max(abs(w1d-wt.old))/max(w1d)
        iter <- iter+1
    }
if(LIST){
v1=t1[1]
v2=s[1,1]
return(list(center=v1,var=v2))
}
if(!LIST)return(list(center=t1,cov=s))
}

pcorhc4sub<-function(x,y,CN=F){
#
#   Compute a .95 confidence interval for Pearson's correlation coefficient.
#   using the HC4 method
#
# CN=T degrees of freedom are infinite, as done by Cribari-Neto (2004)
# CN=F degrees of freedom are n-p
#
xy<-elimna(cbind(x,y))
x<-xy[,1]
y<-xy[,2]
z1=(x-mean(x))/sqrt(var(x))
z2=(y-mean(y))/sqrt(var(y))
ans=olshc4sub(z1,z2,CN=CN)
ci=ans$ci[2,3:4]
ci
}

TWOpNOV<-function(x,y){
#
#   Compute a .95 confidence interval
#   for the difference between two dependent Pearson correlations,
#   non-overlapping case.
#
#    Both x and y are assumed to be matrices with two columns.
#   The function compares the correlation between x[,1] and y[,1]
#   to the correlation between x[,2] and y[,2].
#
#  For simulation results, see Wilcox (2008).
#  COMPARING PEARSON CORRELATIONS: DEALING WITH
#  HETEROSCEDASTICITY AND NON-NORMALITY, unpublished tech report.
#
#
if(!is.matrix(x))stop("x should be a matrix")
if(!is.matrix(y))stop("y should be a matrix")
if(ncol(x)!=2)stop("x should be a matrix with 2 columns")
if(ncol(y)!=2)stop("y should be a matrix with 2 columns")
xy=elimna(cbind(x,y))
x1=xy[,1]
x2=xy[,2]
y1=xy[,3]
y2=xy[,4]
r12=cor(x1,x2)
r13=cor(x1,y1)
r14=cor(x1,y2)
r23=cor(x2,y1)
r24=cor(x2,y2)
r34=cor(y1,y2)
term1=.5*r12*r34*(r13^2+r14^2+r23^2+r24^2)
term2=r12*r13*r14+r12*r23*r24+r13*r23*r34+r14*r24*r34
corhat=(term1+r13*r24+r14*r23-term2)/((1-r12^2)*(1-r34^2))
temp=pcorbv4(x1,x2,SEED=F)
ci12=temp$ci[1]
ci12[2]=temp$ci[2]
temp=pcorbv4(y1,y2,SEED=F)
ci34=temp$ci[1]
ci34[2]=temp$ci[2]
terml=2*corhat*(r12-ci12[1])*(ci34[2]-r34)
termu=2*corhat*(ci12[2]-r12)*(r34-ci34[1])
L=r12-r34-sqrt((r12-ci12[1])^2+(ci34[2]-r34)^2-terml)
U=r12-r34+sqrt((r12-ci12[2])^2+(ci34[1]-r34)^2-termu)
list(ci.lower=L,ci.upper=U)
}
TWOpov<-function(x,y,alpha=.05,CN=F){
#
# Comparing two dependent correlations: Overlapping case
#
# x is assumed to be a matrix with 2 columns
#
#  Compare correlation of x[,1] with y to x[,2] with y
#
if(!is.matrix(x))stop("x should be a matrix")
if(ncol(x)!=2)stop("x should be a matrix with two columns")
xy=elimna(cbind(x,y))
x1=xy[,1]
x2=xy[,2]
y=xy[,3]
r12=cor(x1,y)
r13=cor(x2,y)
r23=cor(x1,x2)
ci12=pcorhc4(x1,y,alpha=alpha,CN=CN)$ci
ci13=pcorhc4(x2,y,alpha=alpha,CN=CN)$ci
corhat=((r23-.5*r12*r13)*(1-r12^2-r13^2-r23^2)+r23^3)/((1-r12^2)*(1-r13^2))
term1=2*corhat*(r12-ci12[1])*(ci13[2]-r13)
term2=2*corhat*(r12-ci12[2])*(ci13[1]-r13)
L=r12-r13-sqrt((r12-ci12[1])^2+(ci13[2]-r13)^2-term1)
U=r12-r13+sqrt((r12-ci12[2])^2+(ci13[1]-r13)^2-term2)
c(L,U)
}
sm2str.sub<-function(isub,x,y){
xmat<-x[isub]
val1<-lplot(xmat,y[isub],plotit=FALSE)$Explanatory.power
val1
}

sm2strv7<-function(xx,y,iv=c(1,2),nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,
STAND=FALSE,...){
#
# Compare robust measures of association of two predictors
# based on a smooth
#
# x is a matrix with two columns
# robust explanatory of  x[,1] with y is compared to x[,2] with y.
# xout=T eliminates any leverage points found with outfun, which
# defaults to outpro, a projecion method for detecting outliers.
#
#  iv: indicates the two columns of x that will be used. By default, col 1 and 2 are used.
#
if(!is.matrix(xx))stop("x should be a matrix with 2 or more columns")
if(ncol(xx)<2)stop("x should be a matrix with 2 or more columns")
val1=NA
val2=NA
x=xx[,iv]
xy=elimna(cbind(x,y))
x=xy[,1:2]
y=xy[,3]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(SEED)set.seed(2)
data1<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec1=apply(data1,1,sm2str.sub,x[,1],y) #  2 by nboot matrix
bvec2=apply(data2,1,sm2str.sub,x[,2],y) #  2 by nboot matrix
bvecd=bvec1-bvec2
pv=akerdcdf(bvecd,pts=0)
vcor=cor(x,method="kendall")
pv=2*min(c(pv,1-pv))
p.crit=.25*abs(vcor[1,2])+.05+(100-length(y))/10000
p.crit=max(c(.05,p.crit))
list(p.value=pv,p.crit=p.crit)
}

pcorhc4<-function(x,y,alpha=.05,CN=F){
#
#   Compute a .95 confidence interval for Pearson's correlation coefficient.
#   using the HC4 method
#
# CN=F, degrees of freedom are n-p; seems better for general use.
# CN=T  degrees of freedom are infinite, as done by Cribari-Neto (2004)
#
xy<-elimna(cbind(x,y))
x<-xy[,1]
y<-xy[,2]
z1=(x-mean(x))/sqrt(var(x))
z2=(y-mean(y))/sqrt(var(y))
ans=olshc4(z1,z2,alpha=alpha,CN=CN)
list(r=ans$r,ci=ans$ci[2,3:4],p.value=ans$ci[2,5])
}
regpreS<-function(x,y,regfun=lsfit,error=absfun,nboot=100,
mval=round(5*log(length(y))),locfun=mean,pr=TRUE,
xout=FALSE,outfun=out,
plotit=TRUE,xlab="Model Number",ylab="Prediction Error",SEED=TRUE,...){
#
#   Stepwise selection of predictors based on
#   estimates of  prediction error using the regression method
#   regfun,
#   which defaults to least squares.  Prediction error
#   is estimated with .632 method.
#   (See Efron and Tibshirani, 1993, pp. 252--254)
#
#   The predictor values are assumed to be in the n by p matrix x.
#   The default number of bootstrap samples is nboot=100
#
#   Prediction error is the expected value of the function error.
#   The argument error defaults to absolute  error. To use
#   squared error, set error=sqfun.
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimate of
#   the first predictor, etc.
#
#   The default value for mval, the number of observations to resample
#   for each of the B bootstrap samples is based on results by
#   Shao (JASA, 1996, 655-665). (Resampling n vectors of observations,
#   model selection may not lead to the correct model as n->infinity.
#
if(SEED)set.seed(2)
q=ncol(x)
qm1=q-1
x<-as.matrix(x)
d<-ncol(x)
p1<-d+1
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
y<-temp[,d+1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,SEED=FALSE,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
adit=NULL
pval=c(1:ncol(x))
#pval=c(1:q)
allp=pval
for(ip in 1:qm1){
model=list()
for(j  in 1:length(pval))model[[j]]=c(adit,pval[j])
temp=regpre(x,y,model=model,pr=FALSE,plotit=FALSE,adz=FALSE,regfun=regfun,
SEED=SEED)$estimates
pbest=order(temp[,5])
adit=model[[pbest[1]]]
pval=allp[-adit]
}
output=model[[pbest[1]]]
output=c(output,allp[-output])
output
}

akp.effect<-function(x,y,EQVAR=TRUE,tr=.2){
#
# Computes the robust effect size suggested by
#Algina, Keselman, Penfield Psych Methods, 2005, 317-328
library(MASS)
x<-elimna(x)
y<-elimna(y)
n1<-length(x)
n2<-length(y)
s1sq=winvar(x,tr=tr)
s2sq=winvar(y,tr=tr)
spsq<-(n1-1)*s1sq+(n2-1)*s2sq
sp<-sqrt(spsq/(n1+n2-2))
cterm=1
if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr
cterm=sqrt(cterm)
if(EQVAR)dval<-cterm*(tmean(x,tr)-tmean(y,tr))/sp
if(!EQVAR){
dval<-cterm*(tmean(x,tr)-tmean(y,tr))/sqrt(s1sq)
dval[2]=cterm*(tmean(x,tr)-tmean(y,tr))/sqrt(s2sq)
}
dval
}
wwwtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L){
#  Perform a within by within by within (three-way) anova on trimmed means where
#
#  That is, there are three factors with a total of JKL dependent groups.
#
#  The variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data][2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
if(is.data.frame(data))data=as.matrix(data)
if(is.list(data))data=listm(elimna(matl(data)))
if(is.matrix(data))data=listm(elimna(data))
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
tmeans[i]<-mean(data[[grp[i]]],tr)
h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]]))
#    h is the effective sample size
}
v=covmtrim(data,tr=tr)
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
il<-matrix(c(rep(1,L)),1,L)
jm1<-J-1
cj<-diag(1,jm1,J)
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
lm1<-L-1
cl<-diag(1,lm1,L)
for (i in 1:lm1)cl[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,kron(ik,il))  # Contrast matrix for factor A
Qa=bwwtrim.sub(cmat, tmeans, v, h,p)
Qa.siglevel <- 1 - pf(Qa, J - 1, 999)
# Do test for factor B
cmat<-kron(ij,kron(ck,il))  # Contrast matrix for factor B
Qb=bwwtrim.sub(cmat, tmeans, v, h,p)
 Qb.siglevel <- 1 - pf(Qb, K - 1, 999)
# Do test for factor C
cmat<-kron(ij,kron(ik,cl))  # Contrast matrix for factor C
Qc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qc.siglevel <- 1 - pf(Qc, L - 1, 999)
# Do test for factor A by B interaction
cmat<-kron(cj,kron(ck,il))  # Contrast matrix for factor A by B
Qab<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qab.siglevel <- 1 - pf(Qab, (J - 1) * (K - 1), 999)
# Do test for factor A by C interaction
cmat<-kron(cj,kron(ik,cl))  # Contrast matrix for factor A by C
Qac<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qac.siglevel <- 1 - pf(Qac, (J - 1) * (L - 1), 999)
# Do test for factor B by C interaction
cmat<-kron(ij,kron(ck,cl))  # Contrast matrix for factor B by C
Qbc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qbc.siglevel <- 1 - pf(Qbc, (K - 1) * (L - 1), 999)
# Do test for factor A by B by C interaction
cmat<-kron(cj,kron(ck,cl))  # Contrast matrix for factor A by B by C
Qabc<-bwwtrim.sub(cmat, tmeans, v, h,p)
Qabc.siglevel <-1-pf(Qabc,(J-1)*(K-1)*(L-1), 999)
list(Qa=Qa,Qa.p.value=Qa.siglevel,Qb=Qb,Qb.crit=Qb.siglevel,
Qc=Qc,Qc.p.value=Qc.siglevel,Qab=Qab,Qab.p.value=Qab.siglevel,
Qac=Qac,Qac.p.value=Qac.siglevel,Qbc=Qbc,Qbc.p.value=Qbc.siglevel,
Qabc=Qabc,Qabc.p.value=Qabc.siglevel)
}


ltsR<-function(x,y,RES=FALSE,varfun=pbvar,corfun=pbcor){
#
library(MASS)
xy=elimna(cbind(x,y))
p1=ncol(xy)
p=p1-1
x=xy[,1:p]
y=xy[,p1]
temp=ltsreg(x,y)$coef
x=as.matrix(x)
p=ncol(x)+1
res<-y-x%*%temp[2:p]-temp[1]
yhat<-y-res
if(!RES)res=NULL
e.pow<-varfun(yhat)/varfun(y)
if(is.na(e.pow))e.pow<-1
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
list(coef=temp,residuals=res,Explanatory.Power=e.pow,
Strength.Assoc=sqrt(e.pow))
}

standmar<-function(x,locfun=lloc,est=mean,scat=var,...){
# standardize a matrix x
#
x=as.matrix(x)
m1=lloc(x,est=est,na.rm=TRUE)
v1=apply(x,2,scat,na.rm=TRUE)
p=ncol(x)
for(j in 1:p)x[,j]=(x[,j]-m1[j])/sqrt(v1[j])
x
}

qsmcobs<-function(x,y,qval=.5,xlab="X",ylab="Y",FIT=TRUE,pc=".",plotit=TRUE,
xout=FALSE,outfun=out,q=NULL,...){
#
# Plots smooths of quantile regression lines using R package cobs
#
# qval is the quantile
#  qsmcobs(x,y,qval=c(.2,.5,.8)) will plot three smooths corresponding to
#  the .2, .5 and .8 quantile regression lines.
#
# FIT=T, uses the values returned by predict
# FIT=F, determines predicted Y for each X and plots the results
library(cobs)
if(!is.null(q))qval=q
x=as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
yhat=NULL
res=NULL
if(plotit)plot(x,y,xlab=xlab,ylab=ylab,pch=pc)
if(FIT){
for(j in 1:length(qval)){
if(plotit)lines(predict(cobs(x,y,tau=qval[j],print.mesg=FALSE,print.warn=FALSE)))
}}
if(!FIT){
for(j in 1:length(qval)){
temp=cobs(x,y,tau=qval[j],print.mesg=FALSE,print.warn=FALSE)
xord=order(x)
if(plotit)lines(x[xord],temp$fitted[xord])
}
if(length(qval)==1){
yhat=temp$fitted
#res=y-yhat
  # yhat is only for the unique x values. If x has,say,
#  three tied values = 6, then
#  yhat contains only one predicted value for x=6, not three yhat values
#  all equal to the predicted value at x=6 
}
}
list(yhat=yhat)
}


Qdepthcom<-function(x1,y1,x2,y2,qval){
temp1=Qdepthcomsub(x1,y1,x2,y2,qval)
temp2=Qdepthcomsub(x2,y2,x1,y1,qval)
dep=max(c(abs(temp1$dep1-temp1$dep2),abs(temp2$dep1-temp2$dep2)))
dep
}
Qdepthcomsub<-function(x1,y1,x2,y2,qval){
x1=(x1-median(x1))/mad(x1)
x2=(x2-median(x2))/mad(x2)
yh1=qsmcobs(x1,y1,FIT=FALSE,qval=qval,plotit=FALSE)$yhat
temp2=cobs(x2,y2,print.mesg=FALSE,print.warn=FALSE,tau=qval)
yh2=predict(temp2,z=x1)
yh2=yh2[,2]
flag=is.na(yh2)
res1=y1-yh1
res2=y1[!flag]-yh2[!flag]
dep1=resdepth(x1,res1)
dep2=resdepth(x1[!flag],res2)
list(dep1=dep1,dep2=dep2)
}


mulgreg<-function(x,y,cov.fun=rmba){
#
# Do Multivariate regression in Rousseeuw, Van Aelst, Van Driessen Agullo
# (2004) Technometrics, 46, 293-305
#
# (y can be multivariate)
#
library(MASS)
if(!is.matrix(y))stop("y is not a matrix")
X<-cbind(x,y)
X<-elimna(X)
qy<-ncol(y)
qx<-ncol(x)
qxp1<-qx+1
tqyqx<-qy+qx
y<-X[,qxp1:tqyqx]
# compute initial estimate of slopes and intercept:
locscat<-cov.fun(X)
sig<-locscat$cov
mu<-locscat$center
sigxx<-sig[1:qx,1:qx]
sigxy<-sig[1:qx,qxp1:tqyqx]
sigyy<-sig[qxp1:tqyqx,qxp1:tqyqx]
Bhat<-solve(sigxx)%*%sigxy
sige<-sigyy-t(Bhat)%*%sigxx%*%Bhat
sige.inv<-solve(sige)
Ahat<-t(mu[qxp1:tqyqx]-t(Bhat)%*%mu[1:qx])
resL<-matrix(nrow=nrow(X),ncol=qy)
for(i in 1:nrow(X))resL[i,]<-y[i,]-t(Bhat)%*%X[i,1:qx]
for(j in 1:qy)resL[,j]<-resL[,j]-Ahat[j]
list(coef=rbind(Ahat,Bhat),residuals=resL)
}

tsp1reg<-function(x,y,plotit=FALSE,HD=FALSE){
#
# Compute the Theil-Sen regression estimator.
# Only a single predictor is allowed in this version
#
temp<-matrix(c(x,y),ncol=2)
temp<-elimna(temp)     # Remove any pairs with missing values
x<-temp[,1]
y<-temp[,2]
ord<-order(x)
xs<-x[ord]
ys<-y[ord]
vec1<-outer(ys,ys,"-")
vec2<-outer(xs,xs,"-")
v1<-vec1[vec2>0]
v2<-vec2[vec2>0]
slope<-median(v1/v2,na.rm=TRUE)
if(!HD)coef<-median(y,na.rm=TRUE)-slope*median(x,na.rm=TRUE)
if(HD)coef<-hd(y,na.rm=TRUE)-slope*hd(x,na.rm=TRUE)
names(coef)<-"Intercept"
coef<-c(coef,slope)
if(plotit){
plot(x,y,xlab="X",ylab="Y")
abline(coef)
}
res<-y-slope*x-coef[1]
list(coef=coef,residuals=res)
}

gplot<-function(x,xlab="Group",ylab="",xnum=F){
if(is.matrix(x))x<-listm(x)
if(!xnum)par(xaxt="n")
mval<-NA
vals<-x[[1]]
gval<-rep(1,length(x[[1]]))
for(j in 2:length(x)){
vals<-c(vals,x[[j]])
gval<-c(gval,rep(j,length(x[[j]])))
}
plot(gval,vals,xlab=xlab,ylab=ylab)
}

trimpb<-function(x,tr=.2,alpha=.05,nboot=2000,WIN=FALSE,win=.1,
plotit=FALSE,pop=1,null.value=0,pr=TRUE,xlab="X",fr=NA){
#
#   Compute a 1-alpha confidence interval for
#   a trimmed mean.
#
#   The default number of bootstrap samples is nboot=2000
#
#   win is the amount of Winsorizing before bootstrapping
#   when WIN=T.
#
#   Missing values are automatically removed.
#
#  nv is null value. That test hypothesis trimmed mean equals nv
#
#  plotit=TRUE gives a plot of the bootstrap values
#  pop=1 results in the expected frequency curve.
#  pop=2 kernel density estimate
#  pop=3 boxplot
#  pop=4 stem-and-leaf
#  pop=5 histogram
#  pop=6 adaptive kernel density estimate.
#
#  fr controls the amount of smoothing when plotting the bootstrap values
#  via the function rdplot. fr=NA means the function will use fr=.8
#  (When plotting bivariate data, rdplot uses fr=.6 by default.)
#
if(pr){
print("The p-value returned by the this function is based on the")
print("null value specified by the argument null.value, which defaults to 0")
}
x<-x[!is.na(x)]
if(WIN){
if(win > tr)stop("The amount of Winsorizing must be <= to the amount of trimming")
x<-winval(x,win)
}
crit<-alpha/2
icl<-round(crit*nboot)+1
icu<-nboot-icl
bvec<-NA
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,mean,tr) # Bootstrapped trimmed means
bvec<-sort(bvec)
#p.value<-sum(bvec<null.value)/nboot
p.value<-mean(bvec<null.value)+.5*mean(bvec==null.value)
p.value<-2*min(p.value,1-p.value)
ci<-NA
ci[1]<-bvec[icl]
ci[2]<-bvec[icu]
if(plotit){
if(pop==1)rdplot(as.vector(bvec),fr=fr,xlab=xlab)
if(pop==2)kdplot(as.vector(bvec),rval=rval)
if(pop==3)boxplot(as.vector(bvec))
if(pop==4)stem(as.vector(bvec))
if(pop==5)hist(as.vector(bvec))
if(pop==6)akerd(as.vector(bvec),xlab=xlab)
}
list(ci=ci,p.value=p.value)
}

cobs2g<-function(x1,y1,x2,y2,xlab="X",ylab="Y",qval=.5,xout=FALSE,outfun=out,...){
#
# Plot two regression lines, estimated via COBS
# (quantile regression using B-splines)
#
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
library(cobs)
xy=elimna(cbind(x1,y1))
x1=xy[,1]
xord=order(x1)
x1=x1[xord]
y1=xy[xord,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
xord=order(x2)
x2=x2[xord]
y2=xy[xord,2]
plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab)
temp1=cobs(x1,y1,print.mesg=FALSE,print.warn=FALSE,tau=qval)
temp2=cobs(x2,y2,print.mesg=FALSE,print.warn=FALSE,tau=qval)
points(x1,y1)
points(x2,y2,pch="+")
lines(x1,temp1$fitted)
lines(x2,temp2$fitted,lty=2)
}


wwtrim<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2){
#
#  Perform a J by K anova using trimmed means with
#  repeated measures on both factors.
#
#  tr=.2 is default trimming
#
#  The R variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(is.data.frame(x))x=as.matrix(x)
if(is.list(x))x<-elimna(matl(x))
if(is.matrix(x))x<-elimna(x)
data<-x
if(is.matrix(data))data<-listm(data)
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups stored in x is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
tmeans<-0
h<-length(data[[grp[1]]])
v<-matrix(0,p,p)
for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr=tr,na.rm=TRUE)
v<-covmtrim(data,tr=tr)
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,ik)  # Contrast matrix for factor A
#Qa<-johansp(cmat,tmeans,v,h,J,K)
Qa<-trimww.sub(cmat,tmeans,v,h,J,K)
#Qa.siglevel<-1-pf(Qa$teststat,J-1,999)
Qa.siglevel<-1-pf(Qa,J-1,999)
# Do test for factor B
cmat<-kron(ij,ck)  # Contrast matrix for factor B
#Qb<-johansp(cmat,tmeans,v,h,J,K)
Qb<-trimww.sub(cmat,tmeans,v,h,J,K)
Qb.siglevel<-1-pf(Qb,K-1,999)
# Do test for factor A by B interaction
cmat<-kron(cj,ck)  # Contrast matrix for factor A by B
#Qab<-johansp(cmat,tmeans,v,h,J,K)
Qab<-trimww.sub(cmat,tmeans,v,h,J,K)
Qab.siglevel<-1-pf(Qab,(J-1)*(K-1),999)
list(Qa=Qa,Qa.siglevel=Qa.siglevel,
Qb=Qb,Qb.siglevel=Qb.siglevel,
Qab=Qab,Qab.siglevel=Qab.siglevel)
}


dnormvar<-function(x){
x^2*dnorm(x)
}
ebarplot<-function(x,y=NULL,nse=1, liw = uiw, aui=NULL, ali=aui,
err="y", tr=0,ylim=NULL, sfrac = 0.01, gap=0, add=FALSE,
col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab="Group",
                    ylab=NULL, ...) {
# plots error bars using the data in
# x, which is assumed to be a matrix with J columns (J groups) or
# x has list mode.
# nse indicates how many standard errors to use when plotting.
#
# By default, means are used. To use a trimmed mean, set
# tr to some value between 0 and .5
# So tr=.2 would use a 20% trimmed mean
#
# Missing values are automatically removed.
#
if(tr==.5)stop("For medians, use ebarplot.med")
if(!is.null(y)){
if(is.matrix(x))stop("When y is given, x should not be a matrix")
if(is.list(x))stop("When y is given, x should not be in list mode")
rem=x
x=list()
x[[1]]=rem
x[[2]]=y
}
if(is.matrix(x))x<-listm(x)
mval<-NA
if(!is.list(x) && is.null(y))stop("This function assumes there
 are  two or more groups")
for(j in 1:length(x))mval[j]<-mean(x[[j]],na.rm=TRUE,tr=tr)
se<-NA
#for(j in 1:length(x))se[j]<-sqrt(var(x[[j]],na.rm=TRUE)/length(x[[j]])
for(j in 1:length(x))se[j]<-trimse(x[[j]],na.rm=TRUE,tr=tr)
uiw<-nse*se
plotCI(mval,y=NULL, uiw=uiw, liw = uiw, aui=NULL, ali=aui,
                    err="y", ylim=NULL, sfrac = 0.01, gap=0, add=FALSE,
                    col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab=xlab,
                    ylab=ylab)
}

Scov<-function(m){
#
# Compute Davies' covariance S-estimator
#
library("rrcov")
res=CovSest(m)
center=res@center
z=res@cov
list(center=center,cov=z)
}

outproad<-function(m,center=NA,plotit=TRUE,op=TRUE,MM=FALSE,cop=3,
xlab="VAR 1",ylab="VAR 2",rate=.05,iter=100,ip=6,pr=TRUE,SEED=TRUE,STAND=FALSE){
#
# Adjusts the critical value, gval used by outpro,
# so that the outside rate per observation, under normality
# is approximatley equal to the value given by the argument
# rate, which defaults to .05.
# That is, expected proportion of points declared outliers under normality
# is intended to be rate=.05
#
# When dealing with p-variate data, p>9, this adjustment can be crucial
#
m=elimna(m)
n=nrow(m)
if(SEED)set.seed(2)
z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m)))
newq=0
gtry=NA
for(itry in 1:ip){
newq=newq+9/10^itry
gtry[itry]=newq
}
gtry=c(.95,.975,gtry[-1])
if(pr)print("Computing adjustment")
for(itry in 1:ip){
val=NA
for(i in 1:iter){
temp=outpro(z[i,,],gval = sqrt(qchisq(gtry[itry],ncol(m))),
center=center,plotit=FALSE,op=op,MM=MM,cop=cop,STAND=STAND)$out.id
val[i]=length(temp)
}
erate=mean(val)/n
if(erate<rate){
newgval=sqrt(qchisq(gtry[itry],ncol(m)))
break
}}
res=outpro(m,gval=newgval,center=center,plotit=TRUE,op=op,MM=MM,
    cop = cop, xlab = "VAR 1", ylab = "VAR 2",STAND=STAND)
list(results=res,used.gval=newgval)
}


mdepreg.sub<-function(X,theta){
np<-ncol(X)
p<-np-1
x<-X[,1:p]
y<-X[,np]
temp<-t(t(x)*theta[2:np])
yhat<-apply(temp,1,sum)+theta[1]
res<-y-yhat
val<-0-mregdepth(x,res)
val
}

l2drmci<-function(x,y=NULL,est=median,alpha=.05,nboot=500,SEED=TRUE,pr=TRUE,
na.rm=TRUE,...){
#
#   Compute a bootstrap confidence interval for a
#   measure of location associated with
#   the distribution of x-y,
#   est indicates which measure of location will be used
#   x and y are possibly dependent
#
#   na.rm=F, assumes missing values occur at random and will  use
#   all of the data that is not missing.
#   na.rm=T eliminates any pair with one or both values are missing.
#
if(is.null(y[1])){
if(!is.matrix(x))stop("With y missing, x should be a matrix")
}
if(!is.null(y[1]))x<-cbind(x,y)
if(ncol(x)!=2)stop("Should have bivariate data")
if(na.rm)x=elimna(x)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(nrow(x),size=nrow(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-NA
for(i in 1:nboot)bvec[i]<-loc2dif(x[data[i,],1],x[data[i,],2],est=est,
na.rm=na.rm,...)
bvec<-sort(bvec)
low<-round((alpha/2)*nboot)+1
up<-nboot-low
temp<-sum(bvec<0)/nboot+sum(bvec==0)/(2*nboot)
sig.level<-2*(min(temp,1-temp))
list(ci=c(bvec[low],bvec[up]),p.value=sig.level)
}


rmmismcp<-function(x,y=NA,alpha=.05,con=0,est=tmean,plotit=TRUE,grp=NA,nboot=500,
SEED=TRUE,xlab="Group 1",ylab="Group 2",pr=FALSE,...){
#
#   Use a percentile bootstrap method to  compare dependent groups.
#   Missing values are allowed; vectors of observations that contain
#   missing values are not simply removed as done by rmmcppb.
#   Only marginal measures of location are compared,
#   The function computes a .95 confidence interval for all linear contrasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#   If con is not specified, all pairwise comparisons are done.
#
#   By default, a 20% trimmed is used and a sequentially rejective method
#   is used to control the probability of at least one Type I error.
#
#   nboot is the bootstrap sample size.
#
#   x can be an n by J matrix or it can have list mode
#   for two groups, data for second group can be put in y
#   otherwise, assume x is a matrix (n by J) or has list mode.
#
#
if(!is.na(y[1]))x<-cbind(x,y)
if(is.list(x))x=matl(x)
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
}}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matl(x)
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the number of groups.")
mat<-x
}
J<-ncol(x)
Jm<-J-1
flag.con=F
if(sum(con^2)==0){
flag.con=T
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
n<-nrow(x)
crit.vec<-alpha/c(1:d)
connum<-ncol(con)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xbars<-apply(x,2,est,na.rm=TRUE)
psidat<-NA
bveccen<-matrix(NA,ncol=J,nrow=nboot)
for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars)
psihat<-matrix(0,connum,nboot)
psihatcen<-matrix(0,connum,nboot)
bvec<-matrix(NA,ncol=J,nrow=nboot)
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib,]<-apply(x[data[ib,],],2,est,na.rm=TRUE,...)
}
#
# Now have an nboot by J matrix of bootstrap measures of location.
#
test<-1
for (ic in 1:connum){
for(ib in 1:nboot){
psihat[ic,ib]=sum(con[,ic]*bvec[ib,])
}
matcon=c(0,psihat[ic,])
#dis=pdis(matcon,cop=3)
dis=mean((psihat[ic,]<0))+.5*mean((psihat[ic,]==0))
test[ic]<-2*min(c(dis,1-dis)) # the p-value
}
ncon<-ncol(con)
dvec<-alpha/c(1:ncon)
if(plotit && ncol(bvec)==2){
z<-c(0,0)
one<-c(1,1)
plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n")
points(bvec)
totv<-apply(x,2,est,na.rm=TRUE,...)
cmat<-var(bvec)
dis<-mahalanobis(bvec,totv,cmat)
temp.dis<-order(dis)
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value",
"crit.sig","ci.lower","ci.upper"))
tmeans<-apply(x,2,est,na.rm=TRUE,...)
psi<-1
output[temp2,4]<-zvec
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(psihat[ic,])
icl<-round(output[ic,4]*nboot/2)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
if(!flag.con){
}
if(flag.con){
CC=(J^2-J)/2
test<-matrix(NA,CC,7)
dimnames(test)<-list(NULL,c("Group","Group","psi.hat","p.value","p.crit",
"ci.low","ci.upper"))
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
test[jcom,1]=j
test[jcom,2]=k
test[jcom,3:5]=output[jcom,2:4]
test[jcom,6:7]=output[jcom,5:6]
con=NULL
}}}}
if(!flag.con)test=output
#num.sig<-sum(output[,4]<=output[,5])
if(flag.con)num.sig<-sum(test[,4]<=test[,5])
if(!flag.con)num.sig<-sum(test[,3]<=test[,4])
list(output=test,con=con,num.sig=num.sig)
}



mulrank<-function(J,K,x,grp=c(1:p),p=J*K){
#
# Perform the Munzel and Brunner
# multivariate one-way rank-based ANOVA
# (Munzel and Brunner, Biometrical J., 2000, 42, 837--854
#
# x can be a matrix with columns corresponding to groups
#
# Have a J by K design with J independent levels and K dependent
# measures
#
# or it can have list mode.
#
newx=list()
GV=matrix(c(1:p),ncol=K,byrow=TRUE)
if(is.list(x)){
temp=NA
jk=0
for(j in 1:J){
temp=elimna(matl(x[GV[j,]]))
for(k in 1:K){
jk=jk+1
newx[[jk]]=temp[,k]
}}
x=NA
x=newx
}
if(is.matrix(x)){
x=elimna(x)
x<-listm(x)
}
xx<-list()
nvec<-NA
for(j in 1:p){
xx[[j]]<-x[[grp[j]]]
nvec[j]<-length(xx[[j]])
}
Nrow=nvec[GV[,1]]
v<-matrix(0,p,p)
Ja<-matrix(1,J,J)
Ia<-diag(1,J)
Pa<-Ia-Ja/J
Jb<-matrix(1,K,K)
Ib<-diag(1,K)
Pb<-Ib-Jb/K
cona<-kron(Pa,Ib)
xr<-list()
N<-0
jj=0
for(k in 1:K){
temp<-x[[k]]
jk<-k
for (j in 2:J){
jj=jj+1
jk<-jk+K
temp<-c(temp,x[[jk]])
}
N<-length(temp)
pr<-rank(temp)
xr[[k]]<-pr[1:nvec[k]] #Put ranks of pooled data for first
#                       variable in xr
top<-nvec[k]
jk<-k
bot<-1
for (j in 2:J){
jk<-jk+K
bot<-bot+nvec[jk]
top<-top+nvec[jk]
xr[[jk]]<-pr[bot:top] # Put midranks in xr
}}
phat<-NA
botk<-0
for(j in 1:J){
for(k in 1:K){
botk<-botk+1
phat[botk]<-(mean(xr[[botk]])-.5)/N
}}
klow<-1-K
kup<-0
for(j in 1:J){
klow<-klow+K
kup<-kup+K
sel<-c(klow:kup)
v[sel,sel]<-covmtrim(xr[klow:kup],tr=0)/N
}
qhat<-matrix(phat,J,K,byrow=TRUE)
test<-N*t(phat)%*%cona%*%phat/sum(diag(cona%*%v))
nu1<-sum(diag(cona%*%v))^2/sum(diag(cona%*%v%*%cona%*%v))
sig.level<-1-pf(test,nu1,1000000)
list(test.stat=test[1,1],nu1=nu1,p.value=sig.level,N=N,q.hat=qhat)
}

lincon<-function(x,con=0,tr=.2,alpha=.05,pr=TRUE,crit=NA,SEED=TRUE,KB=FALSE){
#
#  A heteroscedastic test of d linear contrasts using trimmed means.
#
#  The data are assumed to be stored in $x$ in list mode.
#  Length(x) is assumed to correspond to the total number of groups.
#  It is assumed all groups are independent.
#
#  con is a J by d matrix containing the contrast coefficients that are used.
#  If con is not specified, all pairwise comparisons are made.
#
#  Missing values are automatically removed.
#
#  To apply the Kaiser-Bowden method, use the function kbcon
#
if(tr==.5)stop("Use the R function medpb to compare medians")
if(is.data.frame(x))x=as.matrix(x)
if(KB)stop("Use the function kbcon")
flag<-T
if(alpha!= .05 && alpha!=.01)flag<-F
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
sam=NA
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
sam[j]=length(x[[j]])
h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]]))
   # h is the number of observations in the jth group after trimming.
w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1))
xbar[j]<-mean(x[[j]],tr)
}
if(sum(con^2)==0){
CC<-(J^2-J)/2
if(CC>28)print("For faster execution time but less power, use kbcon")
psihat<-matrix(0,CC,6)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper",
"p.value"))
test<-matrix(NA,CC,6)
dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","df"))
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
test[jcom,3]<-abs(xbar[j]-xbar[k])/sqrt(w[j]+w[k])
sejk<-sqrt(w[j]+w[k])
test[jcom,5]<-sejk
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-(xbar[j]-xbar[k])
df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1))
test[jcom,6]<-df
psihat[jcom,6]<-2*(1-pt(test[jcom,3],df))
if(!KB){
if(CC>28)flag=F
if(flag){
if(alpha==.05)crit<-smmcrit(df,CC)
if(alpha==.01)crit<-smmcrit01(df,CC)
}
if(!flag || CC>28)crit<-smmvalv2(dfvec=rep(df,CC),alpha=alpha,SEED=SEED)
}
if(KB)crit<-sqrt((J-1)*(1+(J-2)/df)*qf(1-alpha,J-1,df))
test[jcom,4]<-crit
psihat[jcom,4]<-(xbar[j]-xbar[k])-crit*sejk
psihat[jcom,5]<-(xbar[j]-xbar[k])+crit*sejk
}}}}
if(sum(con^2)>0){
if(nrow(con)!=length(x)){
stop("The number of groups does not match the number of contrast coefficients.")
}
psihat<-matrix(0,ncol(con),5)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper",
"p.value"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","crit","se","df"))
df<-0
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
sejk<-sqrt(sum(con[,d]^2*w))
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1))
if(flag){
if(alpha==.05)crit<-smmcrit(df,ncol(con))
if(alpha==.01)crit<-smmcrit01(df,ncol(con))
}
if(!flag)crit<-smmvalv2(dfvec=rep(df,ncol(con)),alpha=alpha,SEED=SEED)
test[d,3]<-crit
test[d,4]<-sejk
test[d,5]<-df
psihat[d,3]<-psihat[d,2]-crit*sejk
psihat[d,4]<-psihat[d,2]+crit*sejk
psihat[d,5]<-2*(1-pt(abs(test[d,2]),df))
}
}
if(pr){
print("Note: confidence intervals are adjusted to control FWE")
print("But p-values are not adjusted to control FWE")
}
list(n=sam,test=test,psihat=psihat)
}
poireg<-function(x,y,xout=FALSE,outfun=outpro,plotit=FALSE,xlab="X",ylab="Y",
varfun=var,YHAT=FALSE,STAND=FALSE,...){
#
# Perform Poisson regression.
# The predictors are assumed to be stored in the n by p matrix x.
# The y values are typically count data (integers).
#
# xout=T will remove outliers from among the x values and then fit
# the regression line.
#  Default:
# One predictor, a mad-median rule is used.
# With more than one, projection method is used.
#
# outfun=out will use MVE method
#
xy=elimna(cbind(x,y))
x<-as.matrix(x)
x=xy[,1:ncol(x)]
y=xy[,ncol(xy)]
x<-as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
temp=glm(formula=y~x,family=poisson)
init=summary(temp)
yhat=temp$coef[1]
for(j in 1:ncol(x)){
j1=j+1
yhat=yhat+temp$coef[j1]*x[,j]
}
yhat=exp(yhat)
if(plotit){
x=as.matrix(x)
if(ncol(x)>1)stop("Cannot plot with more than one predictor")
plot(x,y,xlab=xlab,ylab=ylab)
#points(x,yhat,pch=".")
xord=order(x)
lines(x[xord],yhat[xord])
init$coef
}
ex=varfun(yhat)/varfun(y)
str=sqrt(ex)
hatv=NULL
if(YHAT)hatv=yhat
list(results=init,Explanatory.Power=ex,Strength.Assoc=str,yhat=hatv)
}


smcorcom<-function(x1,y1,x2,y2,nboot=200,pts=NA,plotit=TRUE,
SEED=TRUE,varfun=pbvar,xout=TRUE,outfun=out,...){
#
# Compare strength of association of pairs of variables associated with
# two independent  group.
# The strength of the association is based on Cleveland's LOWESS
# smoother coupled with a  robust analog of explanatory power.
#
# The method generalizes the goal of compared the usual
# coefficient of determination associated with two independent groups.
#
#  Assume data are in x1 y1 x2 and y2
#
# Reject at the .05 level if the reported p-value is less than or
# equal to p.crit, which is returned by the function.
#
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
}
m<-elimna(cbind(x2,y2))
x2<-m[,1]
y2<-m[,2]
if(xout){
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
if(SEED)set.seed(2)
estmat1=NA
estmat2=NA
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
#
for(ib in 1:nboot){
estmat1[ib]=lplot(x1[data1[ib,]],y1[data1[ib,]],plotit=FALSE,
varfun=varfun)$Explanatory.power
estmat2[ib]=lplot(x2[data2[ib,]],y2[data2[ib,]],
varfun=varfun,plotit=FALSE)$Explanatory.power
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
p.value=mean(dif)+.5*mean(dif0)
p.value=2*min(c(p.value,1-p.value))
n1=length(y1)
n2=length(y2)
p1=.05
p2=.05
temp1=tsreg(c(100,200),c(.08,.05))$coef
temp2=tsreg(c(50,100),c(.21,.08))$coef
temp3=tsreg(c(30,50),c(.3,.21))$coef
if(n1<200)p1=temp1[1]+temp1[2]*n1
if(n1<100)p1=temp2[1]+temp2[2]*n1
if(n1<50)p1=temp3[1]+temp3[2]*n1
if(n1<30)p1=.3
if(n2<200)p2=temp1[1]+temp1[2]*n2
if(n2<100)p2=temp2[1]+temp2[2]*n2
if(n2<50)p2=temp3[1]+temp3[2]*n2
if(n2<30)p2=.3
pcrit=(n2*p1+n1*p2)/(n1+n2)
names(pcrit)=NULL
if(plotit)lplot2g(x1,y1,x2,y2)
list(p.value=p.value,pcrit.05=pcrit)
}


tsreg<-function(x,y,xout=FALSE,outfun=out,iter=10,varfun=pbvar,
corfun=pbcor,plotit=FALSE,WARN=TRUE,HD=FALSE,xlab='X',ylab='Y',...){
#
#  Compute Theil-Sen regression estimator
#
#  Use Gauss-Seidel algorithm
#  when there is more than one predictor
#
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
n=nrow(x)
n.keep=n
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=FALSE,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
n.keep=nrow(x)
}
if(ncol(x)==1){
temp1<-tsp1reg(x,y)
coef<-temp1$coef
res<-temp1$res
}
if(ncol(x)>1){
for(p in 1:ncol(x)){
temp[p]<-tsp1reg(x[,p],y)$coef[2]
}
res<-y-x%*%temp
if(!HD)alpha<-median(res)
if(HD)alpha<-hd(res)
r<-matrix(NA,ncol=ncol(x),nrow=nrow(x))
tempold<-temp
for(it in 1:iter){
for(p in 1:ncol(x)){
r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p]
temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2]
}
if(!HD)alpha<-median(y-x%*%temp)
if(HD)alpha<-hd(y-x%*%temp)
tempold<-temp
}
coef<-c(alpha,temp)
res<-y-x%*%temp-alpha
}
yhat<-y-res
stre=NULL
temp=varfun(y)
if(temp==0){
if(WARN)print("Warning: When computing strength of association, measure of variation=0")
}
e.pow=NULL
if(temp>0){
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
e.pow=as.numeric(e.pow)
stre=sqrt(e.pow)
}}
if(plotit){
if(ncol(x)==1){
plot(x,y,xlab=xlab,ylab=ylab)
abline(coef)
}}
list(n=n,n.keep=n.keep,
coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow)
}

lplotv2<-function(x,y,span=.75,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE,
expand=.5,low.span=2/3,varfun=pbvar,cor.op=FALSE,cor.fun=pbcor,ADJ=FALSE,nboot=20,
scale=FALSE,xlab="X",ylab="Y",zlab="",theta=50,phi=25,family="gaussian",
duplicate="error",pr=TRUE,SEED=TRUE,ticktype="simple"){
#
# Plot regression surface using LOESS
#
# low.span is the span when lowess is used and there is one predictor
# span is the span when loess is used with two or more predictors
# pyhat=T will return Y hat values
# eout=T will eliminate outliers
# xout=T  will eliminate points where X is an outliers
# family="gaussian"; see the description of the built-in function loess
#
# duplicate="error"
# In some situations where duplicate values occur, when plotting with
# two predictors, it is necessary to set duplicate="strip"
#
st.adj=NULL
e.adj=NULL
if(ADJ){
if(SEED)set.seed(2)
}
si=1
library(stats)
x<-as.matrix(x)
if(!is.matrix(x))stop("x is not a matrix")
d<-ncol(x)
if(d>=2){
library(akima)
if(ncol(x)==2 && !scale){
if(pr){
print("scale=F is specified.")
print("If there is dependence, might use scale=T")
}}
m<-elimna(cbind(x,y))
x<-m[,1:d]
y<-m[,d+1]
if(eout && xout)stop("Can't have both eout and xout = F")
if(eout){
flag<-outfun(m,plotit=FALSE)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
}
x<-m[,1:d]
y<-m[,d+1]
if(d==2)fitr<-fitted(loess(y~x[,1]*x[,2],span=span,family=family))
if(d==3)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3],span=span,family=family))
if(d==4)fitr<-fitted(loess(y~x[,1]*x[,2]*x[,3]*x[,4],span=span,family=family))
if(d>4)stop("Can have at most four predictors")
last<-fitr
if(d==2 && plotit){
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fitr<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fitr,theta=theta,phi=phi,xlab=xlab,ylab=ylab,zlab=zlab,expand=expand,
scale=scale,ticktype=ticktype)
}}
if(d==1){
m<-elimna(cbind(x,y))
x<-m[,1:d]
y<-m[,d+1]
if(eout && xout)stop("Can't have both eout and xout = F")
if(eout){
flag<-outfun(m)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(x)$keep
m<-m[flag,]
}
x<-m[,1:d]
y<-m[,d+1]
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
lines(lowess(x,y,f=low.span))
}
yyy<-lowess(x,y)$y
xxx<-lowess(x,y)$x
if(d==1){
ordx=order(xxx)
yord=yyy[ordx]
flag=NA
for (i in 2:length(yyy))flag[i-1]=sign(yord[i]-yord[i-1])
if(sum(flag)<0)si=-1
}
last<-yyy
chkit<-sum(duplicated(x))
if(chkit>0){
last<-rep(1,length(y))
for(j in 1:length(yyy)){
for(i in 1:length(y)){
if(x[i]==xxx[j])last[i]<-yyy[j]
}}
}
}
E.power<-1
if(!cor.op)E.power<-varfun(last[!is.na(last)])/varfun(y)
if(cor.op || E.power>=1){
if(d==1){
xord<-order(x)
E.power<-cor.fun(last,y[xord])$cor^2
}
if(d>1)E.power<-cor.fun(last,y)$cor^2
}
if(ADJ){
x=as.matrix(x)
val=NA
n=length(y)
data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(i in 1:nboot){
temp=lplot.sub(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)
val[i]=temp$Explanatory.power
}
vindt=median(val)
v2indt=median(sqrt(val))
st.adj=(sqrt(E.power)-max(c(0,v2indt)))/(1-max(c(0,v2indt)))
e.adj=(E.power-max(c(0,vindt)))/(1-max(c(0,vindt)))
st.adj=max(c(0,st.adj))
e.adj=max(c(0,e.adj))
}
if(!pyhat)last <- NULL
list(Strength.Assoc=si*sqrt(E.power),Explanatory.power=E.power,
Strength.Adj=st.adj,Explanatory.Adj=e.adj,yhat.values=last)
}
yuendna<-function(x,y=NULL,tr=.2,alpha=.05){
#
#  Compare the trimmed means of two dependent random variables
#  using the data in x and y.
#  The default amount of trimming is 20%
#
# If y is not supplied, this function assumes x is a matrix with 2 columns.
#
#  pairs of observations, for which one value is missing, are NOT deleted.
#  Marginal trimmed means are compared
#  using all available data.
#
if(is.null(y)){
if(!is.matrix(x))stop("y is null and x is not a matrix")
y=x[,2]
x=x[,1]
}
if(length(x)!=length(y))stop("The number of observations must be equal")
m<-cbind(x,y)
# first eliminate any rows with both values missing.
flag=(apply(is.na(m),1,sum)==2)
m=m[!flag,]
x<-m[,1]
y<-m[,2]
flagx=is.na(y) # Indicates observed x values for which y is missing
flagy=is.na(x) # Indicates the y values for which x is missing
m<-elimna(m)   # m has data where both values are available--no missing values
n=nrow(m)
n1=sum(flagx)  # number of x values for which y is missing
n2=sum(flagy)
h=n-2*floor(tr*n)
h1=n1-2*floor(tr*n1)
h2=n2-2*floor(tr*n2)
xbarn=mean(x,tr=tr,na.rm=TRUE)
xbarn1=0
if(h1>0)xbarn1=mean(x[flagx],tr=tr)
ybarn=mean(y[!flagy],tr=tr,na.rm=TRUE)
ybarn1=0
if(h2>0)ybarn1=mean(y[flagy],tr=tr)
lam1=h/(h+h1)
lam2=h/(h+h2)
est=lam1*xbarn-lam2*ybarn+(1-lam1)*xbarn1-(1-lam2)*ybarn1
sex=trimse(elimna(x),tr=tr)
sey=trimse(elimna(y),tr=tr)
q1<-(n-1)*winvar(m[,1],tr)
q2<-(n-1)*winvar(m[,2],tr)
q3<-(n-1)*wincor(m[,1],m[,2],tr)$cov
sen=sqrt((lam1^2*q1+lam2^2*q2-2*lam1*lam2*q3)/(h*(h-1)))
SE=sqrt(sen^2+(1-lam1)^2*sex^2+(1-lam2)^2*sey^2)
test=est/SE
list(estimate=est,test=test,se=SE)
}

rm2miss<-function(x,y=NULL,tr=0,nboot=1000,alpha=.05,SEED=TRUE){
#
#   Compare the marginal trimmed means of two dependent groups
#   using a bootstrap t method that allows missing values
#
# If y is not supplied, this function assumes x is a matrix with 2 columns.
#
#  NOTE: This function can fail if there are too many missing values
# get the error: incorrect number of dimensions
#
#
if(SEED)set.seed(2)
if(is.null(y)){
if(!is.matrix(x))stop("y is null and x is not a matrix")
}
if(!is.null(y))x=cbind(x,y)
if(ncol(x)!=2)
print("warning: x has more than one column; columns 1 and 2 are used")
n=nrow(x)
test=yuendna(x,tr=tr)
cen=x
cen[,1]=cen[,1]-mean(x[,1],na.rm=TRUE,tr=tr)
cen[,2]=cen[,2]-mean(x[,2],na.rm=TRUE,tr=tr)
data=matrix(sample(n,n*nboot,replace=TRUE),ncol=nboot)
tval=apply(data,2,FUN=rm2miss.sub,x=cen,tr=tr)
tval=sort(abs(tval))
icrit<-floor((1-alpha)*nboot+.5)
ci=test$est-tval[icrit]*test$se
ci[2]=test$est+tval[icrit]*test$se
pv=mean(abs(test$test)<=abs(tval))
list(ci=ci,p.value=pv)
}
rm2miss.sub<-function(data,x,tr){
n=nrow(x)
m=x[data,]
ans=yuendna(m,tr=tr)$test
ans
}
ydbt<-function(x,y,tr=.2,alpha=.05,nboot=599,side=TRUE,plotit=FALSE,op=1,SEED=TRUE){
#
#   Using the bootstrap-t method,
#   compute a .95 confidence interval for the difference between
#   the marginal trimmed means of paired data.
#   By default, 20% trimming is used with B=599 bootstrap samples.
#
#   side=F returns equal-tailed ci
#   side=T returns symmetric ci.
#
side<-as.logical(side)
if(length(x)!=length(y))stop("Must have equal sample sizes.")
m<-cbind(x,y)
m<-elimna(m)
x<-m[,1]
y<-m[,2]
if(sum(c(!is.na(x),!is.na(y)))!=(length(x)+length(y)))stop("Missing values are not allowed.")
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
xcen<-x-mean(x,tr)
ycen<-y-mean(y,tr)
bvec<-apply(data,1,tsub,xcen,ycen,tr)
# bvec is a 1 by nboot matrix containing the bootstrap test statistics.
dotest=yuend(x,y,tr=tr)
estse<-dotest$se
p.value=NULL
dif<-mean(x,tr)-mean(y,tr)
if(!side){
ilow<-round((alpha/2)*nboot)
ihi<-nboot-ilow
bsort<-sort(bvec)
ci<-0
ci[1]<-dif-bsort[ihi]*estse
ci[2]<-dif-bsort[ilow+1]*estse
}
if(side){
bsort<-sort(abs(bvec))
ic<-round((1-alpha)*nboot)
ci<-0
ci[1]<-dif-bsort[ic]*estse
ci[2]<-dif+bsort[ic]*estse
p.value<-(sum(abs(dotest$teststat)<=abs(bvec)))/nboot
}
if(plotit){
if(op==1)akerd(bsort)
if(op==2)rdplot(bsort)
if(op==3)boxplot(bsort)
}
list(ci=ci,dif=dif,p.value=p.value)
}


rmrvar<-function(x,y=NA,alpha=.05,con=0,est=pbvar,plotit=FALSE,grp=NA,
hoch=TRUE,nboot=NA,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,...){
#
#   Use a percentile bootstrap method to compare dependent groups.
#   based on some robust measure of variation indicated by the argument
#   est
#   By default, est=pbvar, the percentage bend midvariance.
#
#   The function computes a .95 confidence interval for all linear contrasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#   If con is not specified, all pairwise comparisons are done.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of contrasts there are.
#
#   x can be an n by J matrix or it can have list mode
#   for two groups, data for second group can be put in y
#   otherwise, assume x is a matrix (n by J) or has list mode.
#
#   Hochberg's  sequentially rejective method is used to control alpha.
#
if(!is.na(y[1]))x=cbind(x,y)
if(is.list(x)){
# put the data in an n by J matrix
mat<-matl(x)
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the
number of groups.")
mat<-x
}
if(is.matrix(x))mat<-x
if(!is.na(sum(grp)))mat<-mat[,grp]
mat<-elimna(mat) # Remove rows with missing values.
x<-mat
J<-ncol(mat)
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
if(is.na(nboot)){
if(d<=4)nboot<-1000
if(d>4)nboot<-5000
}
n<-nrow(mat)
crit.vec<-alpha/c(1:d)
connum<-ncol(con)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xbars<-apply(mat,2,est)
psidat<-NA
for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars)
psihat<-matrix(0,connum,nboot)
bvec<-matrix(NA,ncol=J,nrow=nboot)
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib,]<-apply(x[data[ib,],],2,est,...)
}
#
# Now have an nboot by J matrix of bootstrap values.
#
test<-1
bias<-NA
for (ic in 1:connum){
psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic])
test[ic]<-sum((psihat[ic,]>0))/nboot
test[ic]<-min(test[ic],1-test[ic])
}
test<-2*test
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
#if(hoch)dvec<-alpha/(2* c(1:ncon))
#dvec<-2*dvec
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvecba<-dvec
dvec[1]<-alpha/2
}
if(hoch)dvec<-alpha/(c(1:ncon))
if(plotit && ncol(bvec)==2){
z<-c(0,0)
one<-c(1,1)
plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n")
points(bvec)
totv<-apply(x,2,est,...)
cmat<-var(bvec)
dis<-mahalanobis(bvec,totv,cmat)
temp.dis<-order(dis)
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","est.var","p.value","crit.p.value",
"ci.lower","ci.upper"))
tmeans<-apply(mat,2,est,...)
psi<-1
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
output[temp2,4]<-zvec
temp<-sort(psihat[ic,])
icl<-round(output[ic,4]*nboot/2)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}

bprm<-function(x,y=NULL,grp=NA){
#
# Perform Brunner-Puri within groups rank-based ANOVA
#
# x can be a matrix with columns corresponding to groups
# or it can have list mode.
#
# For computational details, see Brunner, B., Domhof, S.  and Langer, F. (2002,
#  section 7.2.2, Nonparametric Analysis of Longitudinal Data in
#  Factorial Designs)
#
if(is.list(x))x<-matl(x)
if(!is.null(y[1]))x=cbind(x,y)
x<-elimna(x)
if(is.na(grp[1]))grp <- c(1:ncol(x))
if(!is.matrix(x))stop("Data are not stored in a matrix or in list mode.")
K<-length(grp) # The number of groups.
Jb<-matrix(1,K,K)
Ib<-diag(1,K)
Pb<-Ib-Jb/K
y<-matrix(rank(x),ncol=ncol(x)) #ranks of pooled data
ybar<-apply(y,2,mean) # average of ranks
N<-ncol(x)*nrow(x)
vhat<-var(y)/N^2
test<-nrow(x)*sum((ybar-(N+1)/2)^2)/N^2
trval<-sum(diag(Pb%*%vhat))
test<-test/trval # See Brunner, Domhof and Langer, p. 98, eq. 7.12
nu1<-trval^2/sum(diag(Pb%*%vhat%*%Pb%*%vhat))
sig.level<-1-pf(test,nu1,1000000)
list(test.stat=test,nu1=nu1,p.value=sig.level)
}



effectg.sub<-function(x,y,locfun=tmean,varfun=winvarN,...){
#
#  Compute a robust-heteroscedastic measure of effect size
#  based on the measure of location indicated by the argument
#  locfun, and the measure of scatter indicated by
#  varfun.
#
#  This subfunction is for the equal sample size case and is called by
#   effectg when sample sizes are not equal.
#
#  varfun defaults to winvarN, the Winsorized variance rescaled so that
#  it estimates the population variance under normality.
#
library(MASS)
x<-x[!is.na(x)]  # Remove any missing values in x
y<-y[!is.na(y)]  # Remove any missing values in y
m1=locfun(x,...)
m2=locfun(y,...)
top=var(c(m1,m2))
pts=c(x,y)
#
bot=varfun(pts,...)
#
e.pow=top/bot
list(Var.Explained=e.pow,Effect.Size=sqrt(e.pow))
}


effectg<-function(x,y,locfun=tmean,varfun=winvarN,nboot=100,SEED=TRUE,...){
#
# Compute a robust heteroscedastic measure of effect size
#  (explanatory power) based on the measures of location and scale
# indicated by the arguments locfun and varfun, respectively
#
library(MASS)
if(SEED)set.seed(2)
x<-x[!is.na(x)]  # Remove any missing values in x
y<-y[!is.na(y)]  # Remove any missing values in y
n1=length(x)
n2=length(y)
if(n1==n2){
temp=effectg.sub(x,y,locfun=locfun,varfun=varfun,...)
e.pow=temp$Var.Explained
}
if(n1!=n2){
N=min(c(n1,n2))
vals=0
for(i in 1:nboot)vals[i]=effectg.sub(sample(x,N),sample(y,N),
locfun=locfun,varfun=varfun,...)$Var.Explained
e.pow=mean(vals)
}
list(Explanatory.power=e.pow,Effect.Size=sqrt(e.pow))
}


winvarN<-function(x,tr=.2){
#
# rescale the winsorized variance so that it equals one for the standard
# normal distribution
#
x=elimna(x)
library(MASS)
cterm=NULL
if(tr==0)cterm=1
if(tr==0.1)cterm=0.6786546
if(tr==0.2)cterm=0.4120867
if(is.null(cterm))cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr
bot=winvar(x,tr=tr)/cterm
bot
}
covloc<-function(x){
#
# Return mean and covarinace matrix
#
loc=apply(x,2,mean)
mcov=cov(x)
list(center=loc,cov=mcov)
}
g2plotdifxy<-function(x,y,xlab="Difference",ylab=""){
#
# Plot an estimate of the distribution of X-Y
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
m<-as.vector(outer(x,y,FUN="-"))
akerd(m,xlab=xlab,ylab=ylab)
}
sumplot2g<-function(x,y=NULL,xlab="X",ylab="",eblabx="Groups",eblaby="",nse=1){
#
# create four plots useful when comparing two groups
# 1. error bars
# 2. boxplots
# 3. kernel density estimates
# 4 shift function
#
if(!is.null(y)){
xy=list()
xy[[1]]=x
xy[[2]]=y
}
if(is.null(y)){
if(is.matrix(x))xy=matl(x)
}
par(mfrow=c(2,2))
par(oma=c(4,0,0,0))
ebarplot(xy,xlab=eblabx,ylab=eblaby)
boxplot(xy)
g2plot(xy[[1]],xy[[2]])
sband(xy[[1]],xy[[2]])
par(mfrow=c(1,1))
}

yuenv2<-function(x,y,tr=.2,alpha=.05,plotit=FALSE,plotfun=splot,op=TRUE,VL=TRUE,cor.op=FALSE,loc.fun=median,
xlab="Groups",ylab="",PB=FALSE,nboot=100,SEED=TRUE){
#
#  Perform Yuen's test for trimmed means on the data in x and y.
#  The default amount of trimming is 20%
#  Missing values (values stored as NA) are automatically removed.
#
#  A confidence interval for the trimmed mean of x minus the
#  the trimmed mean of y is computed and returned in yuen$ci.
#  The significance level is returned in yuen$siglevel
#
#  For an omnibus test with more than two independent groups,
#  use t1way.
#
#   Unlike the function yuen, a robust heteroscedastic measure
#   of effect size is returned.
#
if(tr==.5)stop("Use medpb to compare medians.")
if(tr>.5)stop("Can't have tr>.5")
library(MASS)
if(SEED)set.seed(2)
x<-x[!is.na(x)]  # Remove any missing values in x
y<-y[!is.na(y)]  # Remove any missing values in y
n1=length(x)
n2=length(y)
h1<-length(x)-2*floor(tr*length(x))
h2<-length(y)-2*floor(tr*length(y))
q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1))
q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1))
df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1)))
crit<-qt(1-alpha/2,df)
m1=mean(x,tr)
m2=mean(y,tr)
mbar=(m1+m2)/2
dif=m1-m2
low<-dif-crit*sqrt(q1+q2)
up<-dif+crit*sqrt(q1+q2)
test<-abs(dif/sqrt(q1+q2))
yuen<-2*(1-pt(test,df))
xx=c(rep(1,length(x)),rep(2,length(y)))
if(h1==h2){
pts=c(x,y)
top=var(c(m1,m2))
#
if(!PB){
if(tr==0)cterm=1
if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr
bot=winvar(pts,tr=tr)/cterm
e.pow=top/bot
if(e.pow>1){
x0=c(rep(1,length(x)),rep(2,length(y)))
y0=c(x,y)
e.pow=wincor(x0,y0,tr=tr)$cor^2
}}
#
if(PB){
bot=pbvar(pts)
e.pow=top/bot
}
#
}
if(n1!=n2){
N=min(c(n1,n2))
vals=0
for(i in 1:nboot)vals[i]=yuen.effect(sample(x,N),sample(y,N))$Var.Explained
e.pow=loc.fun(vals)
}
if(plotit){
plot(xx,pts,xlab=xlab,ylab=ylab)
if(op)
points(c(1,2),c(m1,m2))
if(VL)lines(c(1,2),c(m1,m2))
}
list(ci=c(low,up),n1=n1,n2=n2,
p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test,
crit=crit,df=df,Var.Explained=e.pow,Effect.Size=sqrt(e.pow))
}

yuen.effect.ci<-function(x,y,SEED=TRUE,nboot=400,tr=.2,alpha=.05){
#
# Compute a 1-alpha  confidence interval
# for a robust, heteroscedastic  measure of effect size
#  The absolute value of the measure of effect size is used.
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
x=elimna(x)
y=elimna(y)
bvec=0
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot)
for(i in 1:nboot){
bvec[i]=yuenv2(datax[i,],datay[i,],tr=tr,SEED=F)$Effect.Size
}
bvec<-sort(abs(bvec))
crit<-alpha/2
icl<-round(crit*nboot)+1
icu<-nboot-icl
ci<-NA
ci[1]<-bvec[icl]
pchk=yuen(x,y,tr=tr)$p.value
if(pchk>alpha)ci[1]=0
ci[2]<-bvec[icu]
if(ci[1]<0)ci[1]=0
es=abs(yuenv2(x,y,tr=tr)$Effect.Size)
list(CI=ci,Effect.Size=es)
}

interplot<-function(J,K,x,locfun=mean,locvec=NULL,
g1lev=NULL,g2lev=NULL,type = c("l",
    "p", "b"), xlab = "Fac 1", ylab = "means",trace.label="Fac 2"){
if(is.null(locvec))locvec=lloc(x,est=locfun)
if(is.null(g1lev[1])){
g1=c(rep(1,K))
for(j in 2:J)g1=c(g1,rep(j,K))
}
if(!is.null(g1lev)){
g1=c(rep(g1lev[1],K))
for(j in 2:J)g1=c(g1,rep(g1lev[j],K))
}
g1=as.factor(g1)
if(is.null(g2lev[1]))g2=as.factor(rep(c(1:K),J))
if(!is.null(g2lev[1]))g2=as.factor(rep(g2lev,J))
g2=as.factor(g2)
interaction.plot(g1,g2,locvec, xlab = xlab, ylab = ylab,
trace.label=trace.label)
}



pbad2way<-function(J,K,x,est=onestep,conall=TRUE,alpha=.05,nboot=2000,grp=NA,
op=FALSE,pro.dis=FALSE,MM=FALSE,...){
#
# This function is like the function pbadepth,
# only it is assumed that main effects and interactions for a
# two-way design are to be tested.
#
        #   The data are assumed to be stored in x in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
        JK <- J * K
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JK) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)]
        }
        #
        # Create the three contrast matrices
        #
        if(!conall){
        ij <- matrix(c(rep(1, J)), 1, J)
        ik <- matrix(c(rep(1, K)), 1, K)
        jm1 <- J - 1
        cj <- diag(1, jm1, J)
        for(i in 1:jm1)
                cj[i, i + 1] <- 0 - 1
        km1 <- K - 1
        ck <- diag(1, km1, K)
        for(i in 1:km1)
                ck[i, i + 1] <- 0 - 1
        conA <- t(kron(cj, ik))
        conB <- t(kron(ij, ck))
        conAB <- t(kron(cj, ck))
        conAB <- t(kron(abs(cj), ck))
}
if(conall){
temp<-con2way(J,K)
conA<-temp$conA
conB<-temp$conB
conAB<-temp$conAB
}
        ncon <- max(nrow(conA), nrow(conB), nrow(conAB))
        if(JK != length(x))
                warning("The number of groups does not match the number of contrast coefficients.")
if(!is.na(grp[1])){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
mvec<-NA
for(j in 1:JK){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
bvec<-matrix(NA,nrow=JK,ncol=nboot)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:JK){
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # J by nboot matrix, jth row contains
#                          bootstrapped  estimates for jth group
}
bconA<-t(conA)%*%bvec #C by nboot matrix
tvecA<-t(conA)%*%mvec
tvecA<-tvecA[,1]
tempcenA<-apply(bconA,1,mean)
veczA<-rep(0,ncol(conA))
bconA<-t(bconA)
smatA<-var(bconA-tempcenA+tvecA)
bconA<-rbind(bconA,veczA)
if(!pro.dis){
if(!op)dv<-mahalanobis(bconA,tvecA,smatA)
if(op){
dv<-out(bconA)$dis
}}
if(pro.dis)dv=pdis(bconA,MM=MM)
bplus<-nboot+1
sig.levelA<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
bconB<-t(conB)%*%bvec #C by nboot matrix
tvecB<-t(conB)%*%mvec
tvecB<-tvecB[,1]
tempcenB<-apply(bconB,1,mean)
veczB<-rep(0,ncol(conB))
bconB<-t(bconB)
smatB<-var(bconB-tempcenB+tvecB)
bconB<-rbind(bconB,veczB)
if(!pro.dis){
if(!op)dv<-mahalanobis(bconB,tvecB,smatB)
if(op){
dv<-out(bconA)$dis
}}
if(pro.dis)dv=pdis(bconB,MM=MM)
sig.levelB<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
bconAB<-t(conAB)%*%bvec #C by nboot matrix
tvecAB<-t(conAB)%*%mvec
tvecAB<-tvecAB[,1]
tempcenAB<-apply(bconAB,1,mean)
veczAB<-rep(0,ncol(conAB))
bconAB<-t(bconAB)
smatAB<-var(bconAB-tempcenAB+tvecAB)
bconAB<-rbind(bconAB,veczAB)
if(!pro.dis){
if(!op)dv<-mahalanobis(bconAB,tvecAB,smatAB)
if(op){
dv<-out(bconAB)$dis
}}
if(pro.dis)dv=pdis(bconAB,MM=MM)
sig.levelAB<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
list(sig.levelA=sig.levelA,sig.levelB=sig.levelB,sig.levelAB=sig.levelAB,conA=conA,conB=conB,conAB=conAB)

}




t2way.no.p<-function(J,K,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K){
#  Perform a J by K (two-way) anova on trimmed means where
#  all jk groups are independent.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode.
#  If grp is unspecified, it is assumed x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#  x[[j+1]] is the data for level 2,1, etc.
#  If the data are in wrong order, grp can be used to rearrange the
#  groups. For example, for a two by two design, grp<-c(2,4,3,1)
#  indicates that the second group corresponds to level 1,1;
#  group 4 corresponds to level 1,2; group 3 is level 2,1;
#  and group 1 is level 2,2.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that the input variable x has length JK, the total number of
#  groups being tested. If not, a warning message is printed.
#
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data are not stored in a matrix or in list mode")
if(p!=length(x)){
print("Warning: The number of groups in your data is not equal to JK")
}
for(j in 1:p)x[[j]]<-elimna(x[[j]])
xbar<-0
h<-0
d<-0
R<-0
W<-0
d<-0
r<-0
w<-0
nuhat<-0
omegahat<-0
DROW<-0
DCOL<-0
xtil<-matrix(0,J,K)
aval<-matrix(0,J,K)
for (j in 1:p){
xbar[j]<-mean(x[[grp[j]]],tr)
h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
d[j]<-(length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr)/(h[j]*(h[j]-1))
}
d<-matrix(d,J,K,byrow=T)
xbar<-matrix(xbar,J,K,byrow=T)
h<-matrix(h,J,K,byrow=T)
for(j in 1:J){
R[j]<-sum(xbar[j,])
nuhat[j]<-(sum(d[j,]))^2/sum(d[j,]^2/(h[j,]-1))
r[j]<-1/sum(d[j,])
DROW[j]<-sum(1/d[j,])
}
for(k in 1:K){
W[k]<-sum(xbar[,k])
omegahat[k]<-(sum(d[,k]))^2/sum(d[,k]^2/(h[,k]-1))
w[k]<-1/sum(d[,k])
DCOL[k]<-sum(1/d[,k])
}
D<-1/d
for(j in 1:J){
for(k in 1:K){
xtil[j,k]<-sum(D[,k]*xbar[,k]/DCOL[k])+sum(D[j,]*xbar[j,]/DROW[j])-
sum(D*xbar/sum(D))
aval[j,k]<-(1-D[j,k]*(1/sum(D[j,])+1/sum(D[,k])-1/sum(D)))^2/(h[j,k]-3)
}
}
Rhat<-sum(r*R)/sum(r)
What<-sum(w*W)/sum(w)
Ba<-sum((1-r/sum(r))^2/nuhat)
Bb<-sum((1-w/sum(w))^2/omegahat)
Va<-sum(r*(R-Rhat)^2)/((J-1)*(1+2*(J-2)*Ba/(J^2-1)))
Vb<-sum(w*(W-What)^2)/((K-1)*(1+2*(K-2)*Bb/(K^2-1)))
nu2<-(J^2-1)/(3*Ba)
sig.A<-1-pf(Va,J-1,nu2)
nu2<-(K^2-1)/(3*Bb)
sig.B<-1-pf(Vb,K-1,nu2)
# Next, do test for interactions
Vab<-sum(D*(xbar-xtil)^2)
dfinter<-(J-1)*(K-1)
crit<-qchisq(1-alpha,dfinter)
hc<-(crit/(2*dfinter))*(1+(3*crit)/(dfinter+2))*sum(aval)
adcrit<-crit+hc
list(Qa=Va,sig.A=sig.A,Qb=Vb,sig.B=sig.B,Qab=Vab,critinter=adcrit)
}


t2waybt<-function(J,K,x,tr=.2,grp=c(1:p),p=J*K,nboot=599,SEED=TRUE){
#
#   Two-way ANOVA based on trimmed means and a bootstrap-t method
#
#   The data are assumed to be stored as described in the function t2way
#
#   The default number of bootstrap samples is nboot=599
#
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# compute test statistics:
tests=t2way.no.p(J=J,K=K,x,tr=tr,grp=grp)
TA=NULL
TB=NULL
TAB=NULL
data=list()
xcen=list()
for(j in 1:length(x))xcen[[j]]<-x[[j]]-mean(x[[j]],tr)
print("Taking bootstrap samples. Please wait.")
for(b in 1:nboot){
for(j in 1:length(x))data[[j]]<-sample(xcen[[j]],size=length(x[[j]]),replace=TRUE)
bt=t2way.no.p(J,K,data,tr=tr,grp=grp)
TA[b]=bt$Qa
TB[b]=bt$Qb
TAB[b]=bt$Qab
}
pA<-sum(tests$Qa<=TA)/nboot
pB<-sum(tests$Qb<=TB)/nboot
pAB<-sum(tests$Qab<=TAB)/nboot
list(A.p.value=pA,B.p.value=pB,AB.p.value=pAB)
}


t3way<-function(J,K,L,x,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,MAT=FALSE,
lev.col=c(1:3),var.col=4,pr=TRUE,IV1=NULL,IV2=NULL,IV3=NULL){
#  Perform a J by K by L (three-way) anova on trimmed means where
#  all JKL groups are independent.
#
#  The R variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data][2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
#  MAT=T, assumes data are stored in matrix with 3 columns indicating
#  levels of the three factors.
#  That is, this function calls selby2 for you.
#
if(is.data.frame(x))x=as.matrix(x)
if(!is.null(IV1[1])){
if(is.null(IV2[1]))stop("IV2 is NULL")
if(is.null(IV3[1]))stop("IV3 is NULL")
if(pr)print("Assuming x is a vector containing all of the data; the dependent variable")
xi=elimna(cbind(x,IV1,IV2,IV3))
x=fac2list(xi[,1],xi[,2:4])
J=length(unique(IV1))
K=length(unique(IV2))
L=length(unique(IV3))
p=J*K*L
}
data=x
if(MAT){
if(!is.matrix(data))stop("With MAT=T, data must be a matrix")
if(length(lev.col)!=3)stop("Argument lev.col should have 3 values")
temp=selby2(data,lev.col,var.col)
lev1=length(unique(temp$grpn[,1]))
lev2=length(unique(temp$grpn[,2]))
lev3=length(unique(temp$grpn[,3]))
gv=apply(temp$grpn,2,rank)
gvad=100*gv[,1]+10*gv[,2]+gv[,3]
grp=rank(gvad)
if(pr){
print(paste("Factor 1 has", lev1, "levels"))
print(paste("Factor 2 has", lev2, "levels"))
print(paste("Factor 3 has", lev3, "levels"))
}
if(J!=lev1)warning("J is being reset to the number of levels found")
if(K!=lev2)warning("K is being reset to the number of levels found")
if(L!=lev3)warning("K is being reset to the number of levels found")
J=lev1
K=lev2
L=lev3
data=temp$x
}
if(is.matrix(data))data=listm(data)
if(!is.list(data))stop("Data are not stored in list mode")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
tmeans[i]<-mean(data[[grp[i]]],tr)
h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]]))
#    h is the effective sample size
v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1))
#    v contains the squared standard errors
}
v<-diag(v,p,p)   # Put squared standard errors in a diag matrix.
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
il<-matrix(c(rep(1,L)),1,L)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
lm1<-L-1
cl<-diag(1,lm1,L)
for (i in 1:lm1)cl[i,i+1]<-0-1
alval<-c(1:999)/1000
#  Do test for factor A
cmat<-kron(cj,kron(ik,il))  # Contrast matrix for factor A
Qa<-johan(cmat,tmeans,v,h,alpha)
A.p.value=t3pval(cmat,tmeans,v,h)
# Do test for factor B
cmat<-kron(ij,kron(ck,il))  # Contrast matrix for factor B
Qb<-johan(cmat,tmeans,v,h,alpha)
B.p.value=t3pval(cmat,tmeans,v,h)
# Do test for factor C
cmat<-kron(ij,kron(ik,cl))  # Contrast matrix for factor C
#Qc<-johan(cmat,tmeans,v,h,alpha)
for(i in 1:999){
irem<-i
Qc<-johan(cmat,tmeans,v,h,alval[i])
if(Qc$teststat>Qc$crit)break
}
C.p.value=irem/1000
# Do test for factor A by B interaction
cmat<-kron(cj,kron(ck,il))  # Contrast matrix for factor A by B
for(i in 1:999){
irem<-i
Qab<-johan(cmat,tmeans,v,h,alval[i])
if(Qab$teststat>Qab$crit)break
}
AB.p.value=irem/1000
# Do test for factor A by C interaction
cmat<-kron(cj,kron(ik,cl))  # Contrast matrix for factor A by C
for(i in 1:999){
irem<-i
Qac<-johan(cmat,tmeans,v,h,alval[i])
if(Qac$teststat>Qac$crit)break
}
AC.p.value=irem/1000
#Qac<-johan(cmat,tmeans,v,h,alpha)
# Do test for factor B by C interaction
cmat<-kron(ij,kron(ck,cl))  # Contrast matrix for factor B by C
#Qbc<-johan(cmat,tmeans,v,h,alpha)
for(i in 1:999){
irem<-i
Qbc<-johan(cmat,tmeans,v,h,alval[i])
if(Qbc$teststat>Qbc$crit)break
}
BC.p.value=irem/1000
# Do test for factor A by B by C interaction
cmat<-kron(cj,kron(ck,cl))  # Contrast matrix for factor A by B by C
#Qabc<-johan(cmat,tmeans,v,h,alpha)
for(i in 1:999){
irem<-i
Qabc<-johan(cmat,tmeans,v,h,alval[i])
if(Qabc$teststat>Qabc$crit)break
}
ABC.p.value=irem/1000
list(Qa=Qa$teststat,Qa.crit=Qa$crit,A.p.value=A.p.value,
Qb=Qb$teststat,Qb.crit=Qb$crit,
B.p.value=B.p.value,
Qc=Qc$teststat,Qc.crit=Qc$crit,C.p.value=C.p.value,
Qab=Qab$teststat,Qab.crit=Qab$crit,
AB.p.value=AB.p.value,
Qac=Qac$teststat,Qac.crit=Qac$crit,AC.p.value=AC.p.value,
Qbc=Qbc$teststat,Qbc.crit=Qbc$crit,
BC.p.value=BC.p.value,
Qabc=Qabc$teststat,Qabc.crit=Qabc$crit,ABC.p.value=ABC.p.value)
}

regciMC<-function(x,y,regfun=tsreg,nboot=599,alpha=.05,plotit=FALSE,pr=TRUE,
xlab="Predictor 1",ylab="Predictor 2",xout=FALSE,outfun=outpro,SEED=TRUE,...){
#
#   Compute a .95 confidence interval for each of the parameters of
#   a linear regression equation. The default regression method is
#   Theil-Sen estimator.
#
#   When using the least squares estimator, and when n<250, use
#   lsfitci instead.
#
#   Same as the function regci, only a multi-core processor is used.
#
#   The predictor values are assumed to be in the n by p matrix x.
#   The default number of bootstrap samples is nboot=599
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimated of
#   the first predictor, etc.
#
#   plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based
#  on the bootstrap samples.
#
library(parallel)
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
nrem=length(y)
estit=regfun(x,y,xout=xout,...)$coef
if(xout){
if(pr)print("Default for argument outfun is now outpro")
m<-cbind(x,y)
flag<-outfun(x,plotit=F,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
flagF=identical(regfun,tsreg)
if(flagF){
if(pr){
if(sum(duplicated(y)>0))print("Duplicate values detected; tshdreg might have more power than tsreg")
}}
x=as.matrix(x)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE,...)
bvec=matl(bvec)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
p1<-ncol(x)+1
regci<-matrix(0,p1,5)
vlabs="Intercept"
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(regci)<-list(vlabs,c("ci.low","ci.up","Estimate","S.E.","p-value"))
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
se<-NA
sig.level<-NA
for(i in 1:p1){
temp=(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot
sig.level[i]<-2*(min(temp,1-temp))
bsort<-sort(bvec[i,])
regci[i,1]<-bsort[ilow]
regci[i,2]<-bsort[ihi]
se[i]<-sqrt(var(bvec[i,]))
}
if(p1==3){
if(plotit){
plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab)
}}
regci[,3]=estit
regci[,4]=se
regci[,5]=sig.level
list(regci=regci,n=nrem,n.keep=length(y))
}

regbootMC<-function(data,x,y,regfun,...){
vals=regfun(x[data,],y[data],...)$coef
}

rmdat2mat<-function(m,id.col=NULL,dv.col=NULL){
#
# This function helps manipulate data when dealing with repeated measures
#
# Have data stored in R in a matrix or data.frame.
# One of the columns indicates subject id. So for a repeated measures
# at times 1, 2 and 3, say, Subject one's id will appear 3 times
# subject two's id will appear 3 times, etc.
#
# convert the data to a matrix where time 1 times 2 and time 3 data are
# in columns 1, 2, and 3.
#
x<-vector("list")
grpn<-sort(unique(m[,id.col]))
it<-0
for (ig in 1:length(grpn)){
for (ic in 1:length(dv.col)){
it<-it+1
flag<-(m[,id.col]==grpn[ig])
x[[it]]<-m[flag,dv.col[ic]]
}}
x=t(matl(x))
x
}

bd1way<-function(x,est=onestep,nboot=599,alpha=.05,SEED=TRUE,misran=FALSE,na.rm=NULL,...){
#
#   Test the hypothesis of equal measures of location for J
#   dependent groups using a
#   percentile bootstrap method.
#   By default, a one-step M-estimator is used.
#   For example, bd1way(x,mean) would compare means
#
#   Data are assumed to be stored  in list mode or an n by J matrix.
#   misran=F means missing values do not occur at random, case wise deletion is used.
#   misran=T, all values will be used assuming missing values occur at random
#   OR set na.rm=F to use all of the data. na.rm=F means misran=T will be used.
#   In effect, specifying na.rm=T, for example, the argument misran is ignored.
#
if(!is.list(x) && !is.matrix(x))stop("Data must be store in list mode or in an n by J matrix.")
if(is.list(x)){
m<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))m[,j]<-x[[j]]
}
if(is.matrix(x))m<-x
if(!is.null(na.rm))misran=!na.rm
if(!misran)m=elimna(m)
xcen<-m
locval=apply(m,2,est,na.rm=TRUE,...)
for (j in 1:ncol(m))xcen[,j]<-m[,j]-est(m[,j],na.rm=misran,...)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(nrow(m),size=nrow(m)*nboot,replace=TRUE),nrow=nboot)
bvec<-vector("numeric")
bvec<-apply(data,1,bd1way1,xcen,est,misran=misran,...)
# A vector of  nboot test statistics.
icrit<-floor((1-alpha)*nboot+.5)
testv<-vector("numeric")
for (j in 1:ncol(m))testv[j]<-est(m[,j],na.rm=misran,...)
test<-(length(testv)-1)*var(testv)
pv=mean((test<bvec))
list(test=test,estimates=locval,p.value=pv)
}



pdisMC<-function(m,MM=FALSE,cop=3,dop=1,center=NA){
#
# Compute projection distances for points in m
#
#
#
#  MM=F  Projected distance scaled
#  using interquatile range.
#  MM=T  Scale projected distances using MAD.
#
#  There are five options for computing the center of the
#  cloud of points when computing projections:
#  cop=1 uses Donoho-Gasko median
#  cop=2 uses MCD center
#  cop=3 uses median of the marginal distributions.
#  cop=4 uses MVE center
#  cop=5 uses skipped mean
#
library(parallel)
m<-elimna(m) # Remove missing values
m<-as.matrix(m)
if(ncol(m)==1){
if(is.na(center[1]))center<-median(m)
dis<-abs(m[,1]-center)
if(!MM){
temp<-idealf(dis)
pdis<-dis/(temp$qu-temp$ql)
}
if(MM)pdis<-dis/mad(dis)
}
if(ncol(m)>1){
if(is.na(center[1])){
if(cop==1)center<-dmean(m,tr=.5,dop=dop)
if(cop==2)center<-cov.mcd(m,print=F)$center
if(cop==3)center<-apply(m,2,median)
if(cop==4)center<-cov.mve(m,print=F)$center
if(cop==5)center<-smean(m)
}
cenmat=matrix(rep(center,nrow(m)),ncol=ncol(m),byrow=T)
Amat=m-cenmat
B=listm(t(Amat))  # so rows are now in B[[1]]...B[[n]]
dis=mclapply(B,outproMC.sub,Amat,mc.preschedule=TRUE)
if(!MM){
dmat<-mclapply(dis,IQRstand,mc.preschedule=TRUE)
}
if(MM)dmat<-mclapply(dis,MADstand,mc.preschedule=TRUE)
pdis<-apply(matl(dmat),1,max,na.rm=TRUE)
}
pdis
}
IQRstand<-function(x){
vals=idealf(x)
res=x/(vals$qu-vals$ql)
res
}
MADstand<-function(x){
val=x/mad(x)
val
}
regtestMC<-function(x,y,regfun=tsreg,nboot=600,alpha=.05,plotit=TRUE,
grp=c(1:ncol(x)),nullvec=c(rep(0,length(grp))),xout=FALSE,outfun=outpro,SEED=TRUE,pr=TRUE,...){
#
#  Test the hypothesis that q of the p predictors are equal to
#  some specified constants. By default, the hypothesis is that all
#  p predictors have a coefficient equal to zero.
#  The method is based on a confidence ellipsoid.
#  The critical value is determined with the percentile bootstrap method
#  in conjunction with Mahalanobis distance.
#
library(parallel)
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
if(pr)print("Default for outfun is now outpro")
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
x<-as.matrix(x)
if(length(grp)!=length(nullvec))stop("The arguments grp and nullvec must have the same length.")
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
# bvec<-apply(data,1,regboot,x,y,regfun) # A p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
bvec=mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE) # list mode bvec[[1]]
#     contains estimate from first bootstrap sample, etc.
bvec=matl(bvec)
grp<-grp+1
est<-regfun(x,y)$coef
estsub<-est[grp]
bsub<-t(bvec[grp,])
if(length(grp)==1){
m1<-sum((bvec[grp,]-est)^2)/(length(y)-1)
dis<-(bsub-estsub)^2/m1
}
if(length(grp)>1){
mvec<-apply(bsub,2,FUN=mean)
m1<-var(t(t(bsub)-mvec+estsub))
dis<-mahalanobis(bsub,estsub,m1)
}
dis2<-order(dis)
dis<-sort(dis)
critn<-floor((1-alpha)*nboot)
crit<-dis[critn]
test<-mahalanobis(t(estsub),nullvec,m1)
sig.level<-1-sum(test>dis)/nboot
if(length(grp)==2 && plotit){
plot(bsub,xlab="Parameter 1",ylab="Parameter 2")
points(nullvec[1],nullvec[2],pch=0)
xx<-bsub[dis2[1:critn],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(test=test,crit=crit,sig.level=sig.level,nullvec=nullvec,est=estsub)
}

pbadepth<-function(x,est=onestep,con=0,alpha=.05,nboot=2000,grp=NA,op=1,allp=TRUE,
MM=FALSE,MC=FALSE,cop=3,SEED=TRUE,na.rm=FALSE,...){
#
#   Test the hypothesis that C linear contrasts all have a value of zero.
#   By default, an M-estimator is used
#
#   Independent groups are assumed.
#
#   The data are assumed to be stored in x in list mode or in a matrix.
#   If stored in list mode,
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J, say.
#   If stored in a matrix, columns correspond to groups.
#
#   By default, all pairwise differences are used, but contrasts
#   can be specified with the argument con.
#   The columns of con indicate the contrast coefficients.
#   Con should have J rows, J=number of groups.
#   For example, con[,1]=c(1,1,-1,-1,0,0) and con[,2]=c(,1,-1,0,0,1,-1)
#   will test two contrasts: (1) the sum of the first
#   two measures of location is
#   equal to the sum of the second two, and (2) the difference between
#   the first two is equal to the difference between the
#   measures of location for groups 5 and 6.
#
#   The default number of bootstrap samples is nboot=2000
#
#   op controls how depth is measured
#   op=1, Mahalanobis
#   op=2, Mahalanobis based on MCD covariance matrix
#   op=3, Projection distance
#   op=4, Projection distance using FORTRAN version
#
#   for arguments MM and cop, see pdis.
#
con<-as.matrix(con)
if(is.matrix(x)){
xx<-list()
for(i in 1:ncol(x)){
xx[[i]]<-x[,i]
}
x<-xx
}
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(grp)){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
if(na.rm)temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
Jm<-J-1
d<-ifelse(con==0,(J^2-J)/2,ncol(con))
if(sum(con^2)==0){
if(allp){
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
if(!allp){
con<-matrix(0,J,Jm)
for (j in 1:Jm){
jp<-j+1
con[j,j]<-1
con[jp,j]<-0-1
}}}
bvec<-matrix(NA,nrow=J,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
#print(paste("Working on group ",j))
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,na.rm=na.rm,...) # J by nboot matrix, jth row contains
#                          bootstrapped  estimates for jth group
}
chkna=sum(is.na(bvec))
if(chkna>0){
print("Bootstrap estimates of location could not be computed")
print("This can occur when using an M-estimator")
print("Might try est=tmean")
}
bcon<-t(con)%*%bvec #C by nboot matrix
tvec<-t(con)%*%mvec
tvec<-tvec[,1]
tempcen<-apply(bcon,1,mean)
vecz<-rep(0,ncol(con))
bcon<-t(bcon)
smat<-var(bcon-tempcen+tvec)
temp<-bcon-tempcen+tvec
bcon<-rbind(bcon,vecz)
if(op==1)dv<-mahalanobis(bcon,tvec,smat)
if(op==2){
smat<-cov.mcd(temp)$cov
dv<-mahalanobis(bcon,tvec,smat)
}
if(op==3){
print("Computing p-value. Might take a while with op=3")
if(!MC)dv<-pdis(bcon,MM=MM,cop=cop)
if(MC)dv<-pdisMC(bcon,MM=MM,cop=cop)
}
if(op==4)dv<-pdis.for(bcon,MM=MM,cop=cop,pr=FALSE)
bplus<-nboot+1
sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
if(op==4)print(sig.level)
list(p.value=sig.level,psihat=tvec,con=con)
}

outproMC.sub<-function(B,Amat){
dis<-NA
bot<-sum(B^2)
Bmat=matrix(rep(B,nrow(Amat)),ncol=ncol(Amat),byrow=TRUE)
temp<-apply(Bmat*Amat,1,sum)
temp=matrix(rep(temp,ncol(Amat)),ncol=ncol(Amat))
temp=temp*Bmat/bot
temp=temp^2
dis=apply(temp,1,sum)
dis<-sqrt(dis)
flag=(dis==Inf)
dis[flag]=NA
dis
}
outproMC.sub2<-function(dis,MM,gval){
temp<-idealf(dis)
if(!MM)cu<-median(dis)+gval*(temp$qu-temp$ql)
if(MM)cu<-median(dis)+gval*mad(dis)
outid<-NA
temp2<-(dis> cu)
flag<-rep(0,length(dis))
flag[temp2]<-1
flag
}
bdm2way<-function(J,K,x,grp=c(1:p),p=J*K){
#
# Perform the Brunner, Dette, Munk rank-based ANOVA
# (JASA, 1997, 92, 1494--1502)
# for a J by K independent groups design.
#
# x can be a matrix with columns corresponding to groups
# or it can have list mode.
#
if(is.matrix(x))x<-listm(x)
xx<-list()
for(j in 1:p)xx[[j]]<-x[[grp[j]]]
Ja<-matrix(1,J,J)
Ia<-diag(1,J)
Pa<-Ia-Ja/J
Jb<-matrix(1,K,K)
Ib<-diag(1,K)
Pb<-Ib-Jb/K
cona<-kron(Pa,Jb/K)
conb<-kron(Ja/J,Pb)
conab<-kron(Pa,Pb)
outA<-bdms1(xx,cona)
releff=matrix(outA$q.hat,nrow=J,ncol=K,byrow=TRUE)
outB<-bdms1(xx,conb)
outAB<-bdms1(xx,conab)
list(p.valueA=outA$p.value,p.valueB=outB$p.value,p.valueAB=outAB$p.value,
Relative.Effects=releff)
}
mregdepth<-function(X,RES){
X=as.matrix(X)
XRES=elimna(cbind(X,RES))
p=ncol(X)
p1=p+1
vals=NA
for(j in 1:p)vals[j]=resdepth(XRES[,j],XRES[,p1])
mdepthappr=min(vals)
mdepthappr
}


lband<-function(x,y=NULL,alpha=.05,plotit=TRUE,sm=TRUE,op=1,ylab="delta",
xlab="x (first group)"){
#
#  Compute a confidence band for the shift function.
#  Assuming two dependent groups are being compared
#
#  See Lombard (2005, Technometrics, 47, 364-369)
#
#  if y=NA, assume x is a matrix with two columns or it has list mode
#
#  If plotit=TRUE, a plot of the shift function is created, assuming that
#  the graphics window has already been activated.
#
#  sm=T, plot of shift function is smoothed using:
#  expected frequency curve if op!=1
#  otherwise use S+ function lowess is used.
#
#  This function removes all missing observations.
#
#  When plotting, the median of x is marked with a + and the two
#  quartiles are marked with o.
#
if(!is.null(y[1]))x<-cbind(x,y)
if(is.list(x))x=matl(x)
if(ncol(x)!=2)stop("Should have two groups only")
m<-elimna(x)
y<-m[,2]
x<-m[,1]
n<-length(x)
crit<-nelderv2(m,1,lband.fun2,alpha=alpha)
plotit<-as.logical(plotit)
xsort<-sort(x)
ysort<-sort(y)
l<-0
u<-0
ysort[0]<-NA
ysort[n+1]<-NA
lsub<-c(1:n)-floor(sqrt(2*n)*crit)
usub<-c(1:n)+floor(sqrt(2*n)*crit)
for(ivec in 1:n){
isub<-max(0,lsub[ivec])
l[ivec]<-NA
if(isub>0)l[ivec]<-ysort[isub]-xsort[ivec]
isub<-min(n+1,usub[ivec])
u[ivec]<-NA
if(isub <= n)u[ivec]<-ysort[isub]-xsort[ivec]
}
num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)])
qhat<-c(1:n)/n
m<-cbind(qhat,l,u)
dimnames(m)<-list(NULL,c("qhat","lower","upper"))
if(plotit){
xsort<-sort(x)
ysort<-sort(y)
del<-0
for (i in 1:n)del[i]<-ysort[i]-xsort[i]
xaxis<-c(xsort,xsort)
yaxis<-c(m[,1],m[,2])
allx<-c(xsort,xsort,xsort)
ally<-c(del,m[,2],m[,3])
temp2<-m[,2]
temp2<-temp2[!is.na(temp2)]
plot(allx,ally,type="n",ylab=ylab,xlab=xlab)
ik<-rep(F,length(xsort))
if(sm){
if(op==1){
ik<-duplicated(xsort)
del<-lowess(xsort,del)$y
}
if(op!=1)del<-runmean(xsort,del,pyhat=TRUE)
}
lines(xsort[!ik],del[!ik])
lines(xsort,m[,2],lty=2)
lines(xsort,m[,3],lty=2)
temp<-summary(x)
text(temp[3],min(temp2),"+")
text(temp[2],min(temp2),"o")
text(temp[5],min(temp2),"o")
}
list(m=m,crit=crit,numsig=num)
}

cov.ogk<-function(x,y=NA,n.iter=1,sigmamu=taulc,v=gkcov,beta=.9,...){
#
# Compute robust (weighted) covariance matrix in Maronna and Zamar
# (2002, Technometrics, eq. 7).
#
# n.iter number of iterations. 1 seems to be best
# sigmamu computes a robust measure of location and scale for
#  data stored in a single vector.
#  v robust correlation coefficient
#  estloc, a robust measure of location
#
if(!is.na(y[1]))x<-cbind(x,y)
if(!is.matrix(x))stop("x should be a matrix")
x<-elimna(x)
n<-nrow(x)
p<-ncol(x)
val<-matrix(NA,p,p)
temp<-ogk(x,sigmamu=sigmamu,v=v,n.iter=n.iter,beta=beta,...)$cov
temp
}
pbmcp<-function(x,alpha=.05,nboot=NA,grp=NA,est=onestep,con=0,bhop=FALSE,
SEED=TRUE,...){
#
#   Multiple comparisons for  J independent groups.
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   est is the measure of location and defaults to an M-estimator
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
tempn<-0
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
nmax=max(tempn)
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J){
stop("Something is wrong with con; the number of rows does not match the numbe\
r of groups.")
}
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(nmax>=100)dvec[1]=.01
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvec[1]<-alpha/2
}
dvec<-2*dvec
}
if(nmax>80){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-matrix(NA,nrow=J,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
paste("Working on group ",j)
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group
}
chkna=sum(is.na(bvec))
if(chkna>0){
print("Bootstrap estimates of location could not be computed")
print("This can occur when using an M-estimator")
print("Might try est=tmean")
}
test<-NA
bcon<-t(con)%*%bvec #ncon by nboot matrix
tvec<-t(con)%*%mvec
for (d in 1:ncon){
test[d]<-(sum(bcon[d,]>0)+.5*sum(bcon[d,]==0))/nboot
if(test[d]> .5)test[d]<-1-test[d]
}
test<-2*test
output<-matrix(0,ncon,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit",
"ci.lower","ci.upper"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tvec[ic,]
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(bcon[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}

bmpmul<-function(x,alpha=.05){
#
#  Perform Brunner-Munzel method for all pairs of J independent groups.
#
#  The familywise type I error probability is controlled by using
#  a critical value from the Studentized maximum modulus distribution.
#
#  The data are assumed to be stored in $x$ in list mode
#  or in a matrix having J columns.
#
#  Missing values are automatically removed.
#
#  The default value for alpha is .05. Any other value results in using
#  alpha=.01.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
J<-length(x)
CC<-(J^2-J)/2
test<-matrix(NA,CC,7)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
}
dimnames(test)<-list(NULL,c("Group","Group","P.hat","ci.lower","ci.upper","df","p.value"))
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
temp<-bmp(x[[j]],x[[k]],alpha)
crit<-0-smmcrit(temp$df,CC)
if(alpha!=.05)crit<-0-smmcrit01(temp$df,CC)
temp<-bmp(x[[j]],x[[k]],crit=crit)
jcom<-jcom+1
test[jcom,1]<-j
test[jcom,2]<-k
test[jcom,3]<-temp$phat
test[jcom,4]<-temp$ci.p[1]
test[jcom,5]<-temp$ci.p[2]
test[jcom,6]<-temp$df
test[jcom,7]<-temp$p.value
}}}
list(test=test)
}
outproadMC<-function(m,center=NA,plotit=TRUE,op=TRUE,MM=FALSE,cop=3,
xlab="VAR 1",ylab="VAR 2",rate=.05,iter=100,ip=6,pr=TRUE,SEED=TRUE){
#
# Adjusts the critical value, gval used by outpro,
# so that the outside rate per observation, under normality
# is approximatley equal to the value given by the argument
# rate, which defaults to .05.
# That is, expected proportion of points declared outliers under normality
# is intended to be rate=.05
#
# When dealing with p-variate data, p>9, this adjustment can be crucial
#
library(parallel)
m=elimna(m)
n=nrow(m)
z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m)))
newq=0
gtry=NA
for(itry in 1:ip){
newq=newq+9/10^itry
gtry[itry]=newq
}
gtry=c(.95,.975,gtry[-1])
if(pr)print("Computing adjustment")
if(SEED)set.seed(2)
for(itry in 1:ip){
for(i in 1:iter){
temp=outproMC(z[i,,],gval = sqrt(qchisq(gtry[itry],ncol(m))),
center=center,plotit=FALSE,op=op,MM=MM,cop=cop)$out.id
val[i]=length(temp)
}
erate=mean(val)/n
if(erate<rate){
newgval=sqrt(qchisq(gtry[itry],ncol(m)))
break
}}
res=outproMC(m,gval=newgval,center=center,plotit=TRUE,op=op,MM=MM,
    cop = cop, xlab = "VAR 1", ylab = "VAR 2")
list(results=res,used.gval=newgval)
}



kbcon<-function(x,con=0,tr=.2,alpha=.05,pr=T){
#
#  A heteroscedastic test of d linear contrasts using trimmed means.
#  based on the Kaiser-Bowden method
#
#  The data are assumed to be stored in $x$ in list mode or in a matrix
#  Length(x) (or ncol(x))is assumed to correspond to the total number of groups
#  It is assumed all groups are independent.
#
#  con is a J by d matrix containing the contrast coefficients.
#  If con is not specified, all pairwise comparisons are made.
#
#  Missing values are automatically removed.
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]]))
   # h is the number of observations in the jth group after trimming.
w[j]<-((length(x[[j]])-1)*winvar(x[[j]],tr))/(h[j]*(h[j]-1))
xbar[j]<-mean(x[[j]],tr)
}
if(sum(con^2)==0){
CC<-(J^2-J)/2
v1=J-1
psihat<-matrix(0,CC,6)
dimnames(psihat)<-list(NULL,c("Group","Group","psihat","ci.lower","ci.upper",
"p.value"))
test<-matrix(NA,CC,6)
dimnames(test)<-list(NULL,c("Group","Group","test","crit","se","df"))
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
df<-(w[j]+w[k])^2/(w[j]^2/(h[j]-1)+w[k]^2/(h[k]-1))
term=sqrt((J-1)*(1+(J-2)/df))
test[jcom,3]<-((xbar[j]-xbar[k])/(term*sqrt(w[j]+w[k])))^2
sejk<-sqrt(w[j]+w[k])
test[jcom,5]<-sejk
psihat[jcom,1]<-j
psihat[jcom,2]<-k
test[jcom,1]<-j
test[jcom,2]<-k
psihat[jcom,3]<-(xbar[j]-xbar[k])
test[jcom,6]<-df
crit=qf(1-alpha,v1,df)
aterm=sqrt(crit)*term
test[jcom,4]<-qf(1-alpha,v1,df)
psihat[jcom,4]<-(xbar[j]-xbar[k])-aterm*sejk
psihat[jcom,5]<-(xbar[j]-xbar[k])+aterm*sejk
psihat[jcom,6]<-1-pf(test[jcom,3],v1,df)
}}}}
if(sum(con^2)>0){
if(nrow(con)!=length(x)){
stop("The number of groups does not match the number of contrast coefficients.")
}
v1=nrow(con)-1
psihat<-matrix(0,ncol(con),5)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper",
"p.value"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","crit","se","df"))
df<-0
L=nrow(con)
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
sejk<-sqrt(sum(con[,d]^2*w))
test[d,1]<-d
df<-(sum(con[,d]^2*w))^2/sum(con[,d]^4*w^2/(h-1))
A=(L-1)*(1+(L-2)/df)
test[d,2]<-(sum(con[,d]*xbar)/(sqrt(A)*sejk))^2
crit=qf(1-alpha,v1,df)
test[d,3]<-crit
test[d,4]<-sejk
test[d,5]<-df
psihat[d,3]<-psihat[d,2]-sqrt(crit*A)*sejk
psihat[d,4]<-psihat[d,2]+sqrt(crit*A)*sejk
psihat[d,5]<-1-pf(test[d,2],v1,df)
}}
#
if(pr){
print("Note: confidence intervals are adjusted to control FWE")
print("But p-values are not adjusted to control FWE")
}
list(test=test,psihat=psihat)
}
smmvalv2<-function(dfvec,iter=10000,alpha=.05,SEED=TRUE){
#
if(SEED)set.seed(1)
dfv<-length(dfvec)/sum(1/dfvec)
vals<-NA
tvals<-NA
J<-length(dfvec)
z=matrix(nrow=iter,ncol=J)
for(j in 1: J)z[,j]=rt(iter,dfvec[j])
vals=apply(z,1,max)
vals<-sort(vals)
ival<-round((1-alpha)*iter)
qval<-vals[ival]
qval
}
bwtrim<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,MAT=FALSE,grpc=1,coln=c(2:3)){
#  Perform a J by K anova on trimmed means with
#  repeated measures on the second factor. That is, a split-plot design
#  is assumed, with the first factor consisting of independent groups.
#
#  The R variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
#  If the between groups are denoted by groups numbers stored in a column
# of dat, you can set MAT=T, which will store the data in the format
# expected by this function
#
#  Example, grpc=1 means group id numbers are in col 1.
#  coln=c(3:6) means the within variables are stored in col 3-6.
#
#  Or you can use the function selbybw to sort the data.
#
if(is.data.frame(data))data=as.matrix(data)
if(MAT)
data=selbybw(data,grpc=grpc,coln=coln)$x
x<-data
       if(is.matrix(x) || is.data.frame(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                data <- y
        }
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
tmeans<-0
h<-0
v<-matrix(0,p,p)
klow<-1-K
kup<-0
for (i in 1:p)tmeans[i]<-mean(data[[grp[i]]],tr,na.rm=TRUE)
for (j in 1:J){
h[j]<-length(data[[grp[j]]])-2*floor(tr*length(data[[grp[j]]]))
#    h is the effective sample size for the jth level of factor A
#   Use covmtrim to determine blocks of squared standard errors and
#   covariances.
klow<-klow+K
kup<-kup+K
sel<-c(klow:kup)
v[sel,sel]<-covmtrim(data[grp[klow:kup]],tr)
}
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
#  Do test for factor A
cmat<-kron(cj,ik)  # Contrast matrix for factor A
Qa<-johansp(cmat,tmeans,v,h,J,K)
# Do test for factor B
cmat<-kron(ij,ck)  # Contrast matrix for factor B
Qb<-johansp(cmat,tmeans,v,h,J,K)
# Do test for factor A by B interaction
cmat<-kron(cj,ck)  # Contrast matrix for factor A by B
Qab<-johansp(cmat,tmeans,v,h,J,K)
list(Qa=Qa$teststat,Qa.siglevel=Qa$siglevel,
Qb=Qb$teststat,Qb.siglevel=Qb$siglevel,
Qab=Qab$teststat,Qab.siglevel=Qab$siglevel)
}


rmmest<-function(x,y=NA,alpha=.05,con=0,est=onestep,plotit=TRUE,dif=FALSE,grp=NA,
hoch=FALSE,nboot=NA,BA=TRUE,xlab="Group 1",ylab="Group 2",pr=TRUE,...){
#
#   Use a percentile bootstrap method to  compare dependent groups.
#   By default,
#   compute a .95 confidence interval for all linear contasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#   If con is not specified, all pairwise comparisons are done.
#
#   By default, a one-step M-estimator is used
#   and a sequentially rejective method
#   is used to control the probability of at least one Type I error.
#
#   dif=T indicates that difference scores are to be used
#   dif=F indicates that measure of location associated with
#   marginal distributions are used instead.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of contrasts there are.
#
#   x can be an n by J matrix or it can have list mode
#   for two groups, data for second group can be put in y
#   otherwise, assume x is a matrix (n by J) or has list mode.
#
#   A sequentially rejective method is used to control alpha.
#
#   Argument BA: When using dif=F, BA=T uses a correction term
#  that is recommended when using MOM.
#
if(dif){
if(pr)print("dif=T, so analysis is done on difference scores")
temp<-rmmcppbd(x,y=y,alpha=.05,con=con,est,plotit=plotit,grp=grp,
nboot=nboot,hoch=hoch,...)
output<-temp$output
con<-temp$con
}
if(!dif){
if(pr)print("dif=F, so analysis is done on marginal distributions")
if(!is.na(y[1]))x<-cbind(x,y)
if(!is.list(x) && !is.matrix(x))stop("Data must be stored in a matrix or
in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))stop("The number of rows in con is not equal to the
number of groups.")
}}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matl(x)
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))stop("The number of rows in con is not equal to the
number of groups.")
mat<-x
}
n=nrow(x)
if(is.matrix(x))mat<-x
if(!is.na(sum(grp)))mat<-mat[,grp]
mat<-elimna(mat) # Remove rows with missing values.
x<-mat
J<-ncol(mat)
xcen<-x
for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j])
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
if(is.na(nboot)){
if(d<=4)nboot<-1000
if(d>4)nboot<-5000
}
n<-nrow(mat)
crit.vec<-alpha/c(1:d)
connum<-ncol(con)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xbars<-apply(mat,2,est)
psidat<-NA
for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars)
psihat<-matrix(0,connum,nboot)
psihatcen<-matrix(0,connum,nboot)
bvec<-matrix(NA,ncol=J,nrow=nboot)
bveccen<-matrix(NA,ncol=J,nrow=nboot)
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib,]<-apply(x[data[ib,],],2,est,...)
bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...)
}
#
# Now have an nboot by J matrix of bootstrap values.
#
test<-1
bias<-NA
for (ic in 1:connum){
psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic])
psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic])
bias[ic]<-sum((psihatcen[ic,]>0))/nboot-.5
if(BA){
test[ic]<-sum((psihat[ic,]>0))/nboot-.1*bias[ic]
if(test[ic]<0)test[ic]<-0
}
if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot
test[ic]<-min(test[ic],1-test[ic])
}
test<-2*test
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvecba<-dvec
dvec[1]<-alpha/2
}
if(n>=80)hoch=T
if(hoch)dvec<-alpha/(c(1:ncon))
if(plotit && ncol(bvec)==2){
z<-c(0,0)
one<-c(1,1)
plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n")
points(bvec)
totv<-apply(x,2,est,...)
cmat<-var(bvec)
dis<-mahalanobis(bvec,totv,cmat)
temp.dis<-order(dis)
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
if(BA)zvec<-dvecba[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","sig.level","crit.sig",
"ci.lower","ci.upper"))
tmeans<-apply(mat,2,est,...)
psi<-1
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
output[temp2,4]<-zvec
temp<-sort(psihat[ic,])
icl<-round(output[ic,4]*nboot/2)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}

lindep<-function(x,con,cmat,alpha=.05,tr=.2){
#
#  Compute a test statistic based on the
#  linear contrast coefficients in con and the covariance matrix
#  cmat.
#
#  The data are assumed to be stored in x in list mode
#  or a matrix with columns correpsonding to groups.
#
#  con is a J by d matrix containing the contrast coefficients that are used.
#  d=number of linear contrasts
#
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-length(x)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xbar[j]<-mean(x[[j]],tr=tr)
}
ncon<-ncol(con)
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","se","test"))
w<-cmat
for (d in 1:ncol(con)){
psihat[d,1]<-d
psihat[d,2]<-sum(con[,d]*xbar)
cvec<-as.matrix(con[,d])
sejk<-sqrt(t(cvec)%*%w%*%cvec)
psihat[d,3]<-sejk
psihat[d,4]<-psihat[d,2]/sejk
}
list(test.stat=psihat)
}
bwmcp<-function(J, K, x, tr = 0.2, JK = J * K, con = 0,
 alpha = 0.05, grp =c(1:JK), nboot = 599, SEED = TRUE, ...)
{
        #
        # A bootstrap-t for multiple comparisons among
        # for all main effects and interactions.
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
        conM = con2way(J, K)
 p <- J * K
        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
                # Now have the groups in proper order.
                data[[j]] = data[[j]] - mean(data[[j]], tr = tr)
        }
ilow=0-K
iup=0
for(j in 1:J){
ilow <- ilow + K
 iup = iup + K
sel <- c(ilow:iup)
xx[sel]=listm(elimna(matl(xx[sel])))
 v[sel, sel] <- covmtrim(xx[sel], tr)
                }
A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat
B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat
AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat
        x <- data
        jp <- 1 - K
        kv <- 0
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        nvec <- NA
        testA = NA
        testB = NA
        testAB = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA))
bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB))
abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB))
#        for(j in 1:J)
#                nvec[j] = length(x[[j]])
        for(ib in 1:nboot) {
                ilow <- 1 - K
                iup = 0
 for(j in 1:J) {
 ilow <- ilow + K
 iup = iup + K
nv=length(xx[[ilow]])
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = xx[[k]][bdat[[j]]]
}
 }
ilow=0-K
iup=0
for(j in 1:J){
ilow <- ilow + K
 iup = iup + K
sel <- c(ilow:iup)
 v[sel, sel] <- covmtrim(bsam[sel], tr)
                }
temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4])
aboot[ib,]=temp
#testA[ib] = max(abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4]))
testA[ib] = max(temp)
temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4])
bboot[ib,]=temp
testB[ib]= max(temp)
temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4])
testAB[ib] = max(temp)
abboot[ib,]=temp
        }
pbA=NA
pbB=NA
pbAB=NA
for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])<aboot[,j]))
for(j in 1:ncol(bboot))pbB[j]=mean((abs(B[j,4])<bboot[,j]))
for(j in 1:ncol(abboot))pbAB[j]=mean((abs(AB[j,4])<abboot[,j]))
        critA = sort(testA)
        critB = sort(testB)
        critAB = sort(testAB)
        ic <- floor((1 - alpha) * nboot)
        critA = critA[ic]
        critB = critB[ic]
        critAB = critAB[ic]
cr=matrix(critA,ncol=1,nrow=nrow(A))
dimnames(cr)<-list(NULL,c("crit.value"))
A=cbind(A,cr)
pv=matrix(pbA,ncol=1,nrow=nrow(A))
dimnames(pv)<-list(NULL,c("p.value"))
A=cbind(A,pv)
cr=matrix(critB,ncol=1,nrow=nrow(B))
dimnames(cr)<-list(NULL,c("crit.value"))
B=cbind(B,cr)
pv=matrix(pbB,ncol=1,nrow=nrow(B))
dimnames(pv)<-list(NULL,c("p.value"))
B=cbind(B,pv)
cr=matrix(critAB,ncol=1,nrow=nrow(AB))
dimnames(cr)<-list(NULL,c("crit.value"))
AB=cbind(AB,cr)
pv=matrix(pbAB,ncol=1,nrow=nrow(AB))
dimnames(pv)<-list(NULL,c("p.value"))
AB=cbind(AB,pv)
list(Fac.A=A,Fac.B=B,Fac.AB=AB,contrast.coef=conM)
}
bwwmcp<-function(J, K, L, x, tr = 0.2, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 599, SEED = TRUE, ...)
{
        #
        # A bootstrap-t for multiple comparisons
#       all main effects and interactions.
#       a between-by-within-within design.
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x)) y[[j]] <- x[, j]
                x <- y
}

        conM = con3way(J,K,L)
 p <- J*K*L
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
KL=K*L
        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
                # Now have the groups in proper order.
                data[[j]] = data[[j]] - mean(data[[j]], tr = tr)
        }
ilow=0-L
iup=0
for(j in 1:J){
ilow <- ilow + KL
 iup = iup + KL
sel <- c(ilow:iup)
xx[sel]=listm(elimna(matl(xx[sel])))
 v[sel, sel] <- covmtrim(xx[sel], tr)
                }
A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat
B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat
C=lindep(xx,conM$conC,cmat=v,tr=tr)$test.stat
AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat
AC=lindep(xx,conM$conAC,cmat=v,tr=tr)$test.stat
BC=lindep(xx,conM$conBC,cmat=v,tr=tr)$test.stat
ABC=lindep(xx,conM$conABC,cmat=v,tr=tr)$test.stat
        x <- data
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        testA = NA
        testB = NA
testC=NA
        testAB = NA
        testAC = NA
        testBC = NA
        testABC = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA))
bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB))
cboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conC))
abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB))
acboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAC))
bcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conBC))
abcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conABC))
        for(ib in 1:nboot) {
                ilow <- 1 - KL
                iup = 0
 for(j in 1:J) {
 ilow <- ilow + KL
 iup = iup + KL
nv=length(x[[ilow]])
 bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
 }
ilow=0-KL
iup=0
for(j in 1:J){
ilow <- ilow + KL
 iup = iup + KL
sel <- c(ilow:iup)
 v[sel, sel] <- covmtrim(bsam[sel], tr)
                }
temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4])
aboot[ib,]=temp
testA[ib] = max(temp)
temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4])
bboot[ib,]=temp
testB[ib]= max(temp)

temp=abs(lindep(bsam,conM$conC,cmat=v,tr=tr)$test.stat[,4])
cboot[ib,]=temp
testC[ib]= max(temp)

temp=abs(lindep(bsam,conM$conAC,cmat=v,tr=tr)$test.stat[,4])
acboot[ib,]=temp
testAC[ib]= max(temp)

temp=abs(lindep(bsam,conM$conBC,cmat=v,tr=tr)$test.stat[,4])
bcboot[ib,]=temp
testBC[ib]= max(temp)

temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4])
testAB[ib] = max(temp)
abboot[ib,]=temp

temp=abs(lindep(bsam,conM$conABC,cmat=v,tr=tr)$test.stat[,4])
abcboot[ib,]=temp
testABC[ib]= max(temp)

        }
pbA=NA
pbB=NA
pbC=NA
pbAB=NA
pbAC=NA
pbBC=NA
pbABC=NA
for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])<aboot[,j]))
for(j in 1:ncol(bboot))pbB[j]=mean((abs(B[j,4])<bboot[,j]))
for(j in 1:ncol(cboot))pbC[j]=mean((abs(C[j,4])<cboot[,j]))
for(j in 1:ncol(abboot))pbAB[j]=mean((abs(AB[j,4])<abboot[,j]))
for(j in 1:ncol(acboot))pbAC[j]=mean((abs(AC[j,4])<acboot[,j]))
for(j in 1:ncol(bcboot))pbBC[j]=mean((abs(BC[j,4])<bcboot[,j]))
for(j in 1:ncol(abcboot))pbABC[j]=mean((abs(ABC[j,4])<abcboot[,j]))
        critA = sort(testA)
        critB = sort(testB)
        critC = sort(testC)
        critAB = sort(testAB)
        critAC = sort(testAC)
        critBC = sort(testBC)
        critABC = sort(testABC)
        ic <- floor((1 - alpha) * nboot)
        critA = critA[ic]
        critB = critB[ic]
        critC = critC[ic]
        critAB = critAB[ic]
        critAC = critAC[ic]
        critBC = critBC[ic]
        critABC = critABC[ic]
critA=matrix(critA,ncol=1,nrow=nrow(A))
dimnames(critA)=list(NULL,c("crit.val"))
p.value=pbA
A=cbind(A,critA,p.value)


critB=matrix(critB,ncol=1,nrow=nrow(B))
dimnames(critB)=list(NULL,c("crit.val"))
p.value=pbB
B=cbind(B,critB,p.value)

critC=matrix(critC,ncol=1,nrow=nrow(C))
dimnames(critC)=list(NULL,c("crit.val"))
p.value=pbC
C=cbind(C,critC,p.value)

critAB=matrix(critAB,ncol=1,nrow=nrow(AB))
dimnames(critAB)=list(NULL,c("crit.val"))
p.value=pbAB
AB=cbind(AB,critAB,p.value)

critAC=matrix(critAC,ncol=1,nrow=nrow(AC))
dimnames(critAC)=list(NULL,c("crit.val"))
p.value=pbAC
AC=cbind(AC,critAC,p.value)


critBC=matrix(critBC,ncol=1,nrow=nrow(BC))
dimnames(critBC)=list(NULL,c("crit.val"))
p.value=pbBC
BC=cbind(BC,critBC,p.value)

critABC=matrix(critABC,ncol=1,nrow=nrow(ABC))
dimnames(critABC)=list(NULL,c("crit.val"))
p.value=pbABC
ABC=cbind(ABC,critABC,p.value)

list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC)
}
bbwmcp<-function(J, K, L, x, tr = 0.2, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 599, SEED = TRUE, ...)
{
        #
        # A bootstrap-t for multiple comparisons among
        # all main effects and interactions
#         for a between-by-between-within design.
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x)) y[[j]] <- x[, j]
                x <- y
}

        conM = con3way(J,K,L)
 p <- J*K*L
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
                # Now have the groups in proper order.
                data[[j]] = data[[j]] - mean(data[[j]], tr = tr)
        }
ilow=0-L
iup=0
for(j in 1:JK){
ilow <- ilow + L
 iup = iup + L
sel <- c(ilow:iup)
xx[sel]=listm(elimna(matl(xx[sel])))
 v[sel, sel] <- covmtrim(xx[sel], tr)
                }
A=lindep(xx,conM$conA,cmat=v,tr=tr)$test.stat
B=lindep(xx,conM$conB,cmat=v,tr=tr)$test.stat
C=lindep(xx,conM$conC,cmat=v,tr=tr)$test.stat
AB=lindep(xx,conM$conAB,cmat=v,tr=tr)$test.stat
AC=lindep(xx,conM$conAC,cmat=v,tr=tr)$test.stat
BC=lindep(xx,conM$conBC,cmat=v,tr=tr)$test.stat
ABC=lindep(xx,conM$conABC,cmat=v,tr=tr)$test.stat
        x <- data
        jp <- 1 - K
        kv <- 0
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        testA = NA
        testB = NA
testC=NA
        testAB = NA
        testAC = NA
        testBC = NA
        testABC = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conA))
bboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conB))
cboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conC))
abboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAB))
acboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conAC))
bcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conBC))
abcboot=matrix(NA,nrow=nboot,ncol=ncol(conM$conABC))
#        for(j in 1:JK)
#                nvec[j] = length(x[[j]])
        for(ib in 1:nboot) {
                ilow <- 1 - L
                iup = 0
 for(j in 1:JK) {
 ilow <- ilow + L
 iup = iup + L
nv=length(x[[ilow]])
 bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
 }
ilow=0-L
iup=0
for(j in 1:JK){
ilow <- ilow + L
 iup = iup + L
sel <- c(ilow:iup)
 v[sel, sel] <- covmtrim(bsam[sel], tr)
                }
temp=abs(lindep(bsam,conM$conA, cmat=v,tr=tr)$test.stat[,4])
aboot[ib,]=temp
testA[ib] = max(temp)
temp=abs(lindep(bsam,conM$conB,cmat=v,tr=tr)$test.stat[,4])
bboot[ib,]=temp
testB[ib]= max(temp)

temp=abs(lindep(bsam,conM$conC,cmat=v,tr=tr)$test.stat[,4])
cboot[ib,]=temp
testC[ib]= max(temp)

temp=abs(lindep(bsam,conM$conAC,cmat=v,tr=tr)$test.stat[,4])
acboot[ib,]=temp
testAC[ib]= max(temp)

temp=abs(lindep(bsam,conM$conBC,cmat=v,tr=tr)$test.stat[,4])
bcboot[ib,]=temp
testBC[ib]= max(temp)

temp=abs(lindep(bsam,conM$conAB,cmat=v,tr=tr)$test.stat[,4])
testAB[ib] = max(temp)
abboot[ib,]=temp

temp=abs(lindep(bsam,conM$conABC,cmat=v,tr=tr)$test.stat[,4])
abcboot[ib,]=temp
testABC[ib]= max(temp)

        }
pbA=NA
pbB=NA
pbC=NA
pbAB=NA
pbAC=NA
pbBC=NA
pbABC=NA
for(j in 1:ncol(aboot))pbA[j]=mean((abs(A[j,4])<aboot[,j]))
for(j in 1:ncol(bboot))pbB[j]=mean((abs(B[j,4])<bboot[,j]))
for(j in 1:ncol(cboot))pbC[j]=mean((abs(C[j,4])<cboot[,j]))
for(j in 1:ncol(abboot))pbAB[j]=mean((abs(AB[j,4])<abboot[,j]))
for(j in 1:ncol(acboot))pbAC[j]=mean((abs(AC[j,4])<acboot[,j]))
for(j in 1:ncol(bcboot))pbBC[j]=mean((abs(BC[j,4])<bcboot[,j]))
for(j in 1:ncol(abcboot))pbABC[j]=mean((abs(ABC[j,4])<abcboot[,j]))
        critA = sort(testA)
        critB = sort(testB)
        critC = sort(testC)
        critAB = sort(testAB)
        critAC = sort(testAC)
        critBC = sort(testBC)
        critABC = sort(testABC)
        ic <- floor((1 - alpha) * nboot)
        critA = critA[ic]
        critB = critB[ic]
        critC = critC[ic]
        critAB = critAB[ic]
        critAC = critAC[ic]
        critBC = critBC[ic]
        critABC = critABC[ic]
critA=matrix(critA,ncol=1,nrow=nrow(A))
dimnames(critA)=list(NULL,c("crit.val"))
p.value=pbA
A=cbind(A,critA,p.value)

critB=matrix(critB,ncol=1,nrow=nrow(B))
dimnames(critB)=list(NULL,c("crit.val"))
p.value=pbB
B=cbind(B,critB,p.value)

critC=matrix(critC,ncol=1,nrow=nrow(C))
dimnames(critC)=list(NULL,c("crit.val"))
p.value=pbC
C=cbind(C,critC,p.value)

critAB=matrix(critAB,ncol=1,nrow=nrow(AB))
dimnames(critAB)=list(NULL,c("crit.val"))
p.value=pbAB
AB=cbind(AB,critAB,p.value)

critAC=matrix(critAC,ncol=1,nrow=nrow(AC))
dimnames(critAC)=list(NULL,c("crit.val"))
p.value=pbAC
AC=cbind(AC,critAC,p.value)


critBC=matrix(critBC,ncol=1,nrow=nrow(BC))
dimnames(critBC)=list(NULL,c("crit.val"))
p.value=pbBC
BC=cbind(BC,critBC,p.value)

critABC=matrix(critABC,ncol=1,nrow=nrow(ABC))
dimnames(critABC)=list(NULL,c("crit.val"))
p.value=pbABC
ABC=cbind(ABC,critABC,p.value)

list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC)
}
bwbmcp<-function(J,K,x,tr=.2,JK=J*K,grp=c(1:JK),con=0,alpha=.05,dif=TRUE,pool=FALSE){
#
# All pairwise comparisons among levels of Factor B
# in a split-plot design using trimmed means.
#
# Data are pooled for each level
# of Factor B.
# Then this function calls rmmcp.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}
JK<-J*K
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
if(pool){
data<-list()
m1<-matrix(c(1:JK),J,K,byrow=T)
for(k in 1:K){
for(j in 1:J){
flag<-m1[j,k]
print(paste("Level", j, "factor A:"))
if(j==1)temp<-x[[flag]]
if(j>1){
temp<-c(temp,x[[flag]])
}}
data[[k]]<-temp
}
print("Group numbers refer to levels of Factor B")
temp<-rmmcp(data,con=con,tr=tr,alpha=alpha,dif=dif)
return(temp)
}
if(!pool){
mat<-matrix(c(1:JK),ncol=K,byrow=T)
for(j in 1:J){
data<-list()
ic<-0
for(k in 1:K){
ic<-ic+1
data[[ic]]<-x[[mat[j,k]]]
}
print(paste("For level ", j, " of Factor A:"))
temp<-rmmcp(data,con=con,tr=tr,alpha=alpha,dif=dif)
print(temp$test)
print(temp$psihat)
}}
}

out<-function(x,cov.fun=cov.mve,plotit=TRUE,SEED=TRUE,xlab="X",ylab="Y",qval=.975,
crit=NULL,...){
#
#  Search for outliers using robust measures of location and scatter,
#  which are used to compute robust analogs of Mahalanobis distance.
#
#  x is an n by p matrix or a vector of data.
#
#  The function returns the values flagged as an outlier plus
#  the (row) number where the data point is stored.
#  If x is a vector, out.id=4 indicates that the fourth observation
#  is an outlier and outval=123 indicates that 123 is the value.
#  If x is a matrix, out.id=4 indicates that the fourth row of
#  the matrix is an outlier and outval reports the corresponding
#  values.
#
#  The function also returns the distance of the
#  points identified as outliers
#  in the variable dis.
#
#  For bivariate data, if plotit=TRUE, plot points and circle outliers.
#
#  cov.fun determines how the measure of scatter is estimated.
#  Possible choices are
#  cov.mve (the MVE estimate)
#  cov.mcd (the MCD estimate)
#  covmba2 (the MBA or median ball algorithm)
#  rmba  (an adjustment of MBA suggested by D. Olive)
#  cov.roc (Rocke's TBS estimator)
#
library(MASS)
if(SEED)set.seed(12)
if(is.data.frame(x))x=as.matrix(x)
if(is.list(x))stop("Data cannot be stored in list mode")
nrem=nrow(as.matrix(x))
if(!is.matrix(x)){
dis<-(x-median(x,na.rm=TRUE))^2/mad(x,na.rm=TRUE)^2
if(is.null(crit))crit<-sqrt(qchisq(.975,1))
vec<-c(1:length(x))
}
if(is.matrix(x)){
mve<-cov.fun(elimna(x))
dis<-mahalanobis(x,mve$center,mve$cov)
if(is.null(crit))crit<-sqrt(qchisq(.975,ncol(x)))
vec<-c(1:nrow(x))
}
dis[is.na(dis)]=0
dis<-sqrt(dis)
chk<-ifelse(dis>crit,1,0)
id<-vec[chk==1]
keep<-vec[chk==0]
if(is.matrix(x)){
if(ncol(x)==2 && plotit){
plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type="n")
flag<-rep(T,nrow(x))
flag[id]<-F
points(x[flag,1],x[flag,2])
if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="*")
}}
if(!is.matrix(x))outval<-x[id]
if(is.matrix(x))outval<-x[id,]
n=nrow(as.matrix(x))
n.out=length(id)
list(n=n,n.out=n.out,out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit)
}

lintestMC<-function(x,y,regfun=tsreg,nboot=500,alpha=.05,xout=FALSE,outfun=out,...){
#
# Test the hypothesis that the regression surface is a plane.
# Stute et al. (1998, JASA, 93, 141-149).
#
library(parallel)
set.seed(2)
x<-as.matrix(x)
d<-ncol(x)
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
x<-as.matrix(x)
y<-temp[,d+1]
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
x<-as.matrix(x)
y<-y[flag]
}
mflag<-matrix(NA,nrow=length(y),ncol=length(y))
for (j in 1:length(y)){
for (k in 1:length(y)){
mflag[j,k]<-(sum(x[j,]<=x[k,])==ncol(x))
}
}
reg<-regfun(x,y,...)
yhat<-y-reg$residuals
print("Taking bootstrap sample, please wait.")
data<-matrix(runif(length(y)*nboot),nrow=nboot)
data<-sqrt(12)*(data-.5) # standardize the random numbers.
data=listm(t(data))
#rvalb<-apply(data,1,lintests1,yhat,reg$residuals,mflag,x,regfun,...)
rvalb<-mclapply(data,lintests1,yhat,reg$residuals,mflag,x,regfun,mc.preschedule=TRUE,...)
# An n x nboot matrix of R values
rvalb=matl(rvalb)
rvalb<-rvalb/sqrt(length(y))
dstatb<-apply(abs(rvalb),2,max)
wstatb<-apply(rvalb^2,2,mean)
# compute test statistic
v<-c(rep(1,length(y)))
rval<-lintests1(v,yhat,reg$residuals,mflag,x,regfun,...)
rval<-rval/sqrt(length(y))
dstat<-max(abs(rval))
wstat<-mean(rval^2)
ib<-round(nboot*(1-alpha))
p.value.d<-1-sum(dstat>=dstatb)/nboot
p.value.w<-1-sum(wstat>=wstatb)/nboot
#critw<-wstatb[ib]
list(dstat=dstat,wstat=wstat,p.value.d=p.value.d,p.value.w=p.value.w)
}


yuen.effect<-function(x,y,tr=.2,alpha=.05,plotit=FALSE,
plotfun=splot,op=TRUE,VL=TRUE,cor.op=FALSE,
xlab="Groups",ylab="",PB=FALSE){
#
#  Same as yuen, only it computes explanatory power and the related
# measure of effect size. Only use this with n1=n2. Called by yuenv2
# which allows n1!=n2.
#
#
#  Perform Yuen's test for trimmed means on the data in x and y.
#  The default amount of trimming is 20%
#  Missing values (values stored as NA) are automatically removed.
#
#  A confidence interval for the trimmed mean of x minus the
#  the trimmed mean of y is computed and returned in yuen$ci.
#  The significance level is returned in yuen$siglevel
#
#  For an omnibus test with more than two independent groups,
#  use t1way.
#  This function uses winvar from chapter 2.
#
if(tr==.5)stop("Use medpb to compare medians.")
if(tr>.5)stop("Can't have tr>.5")
library(MASS)
x<-x[!is.na(x)]  # Remove any missing values in x
y<-y[!is.na(y)]  # Remove any missing values in y
h1<-length(x)-2*floor(tr*length(x))
h2<-length(y)-2*floor(tr*length(y))
q1<-(length(x)-1)*winvar(x,tr)/(h1*(h1-1))
q2<-(length(y)-1)*winvar(y,tr)/(h2*(h2-1))
df<-(q1+q2)^2/((q1^2/(h1-1))+(q2^2/(h2-1)))
crit<-qt(1-alpha/2,df)
m1=mean(x,tr)
m2=mean(y,tr)
mbar=(m1+m2)/2
dif=m1-m2
low<-dif-crit*sqrt(q1+q2)
up<-dif+crit*sqrt(q1+q2)
test<-abs(dif/sqrt(q1+q2))
yuen<-2*(1-pt(test,df))
xx=c(rep(1,length(x)),rep(2,length(y)))
pts=c(x,y)
top=var(c(m1,m2))
#
if(!PB){
if(tr==0)cterm=1
if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr
bot=winvar(pts,tr=tr)/cterm
}
if(PB)bot=pbvar(pts)/1.06
#
e.pow=top/bot
if(e.pow>1){
x0=c(rep(1,length(x)),rep(2,length(y)))
y0=c(x,y)
e.pow=wincor(x0,y0,tr=tr)$cor^2
}
if(plotit){
plot(xx,pts,xlab=xlab,ylab=ylab)
if(op)
points(c(1,2),c(m1,m2))
if(VL)lines(c(1,2),c(m1,m2))
}
list(ci=c(low,up),p.value=yuen,dif=dif,se=sqrt(q1+q2),teststat=test,
crit=crit,df=df,Var.Explained=e.pow,Effect.Size=sqrt(e.pow))
}


bbbmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){
#
#   between-by-between-by-between design
#
        #
        # A percentile bootstrap for
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using and appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
#
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JKL, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
ncon=ncol(con)
 p <- J*K*L
JKL=p
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
KL=K*L
        data <- list()
xx=list()
        for(j in 1:length(x)) {
xx[[j]]=x[[grp[j]]] # save input data
#                # Now have the groups in proper order.
        }
for(j in 1:p){
xx[j]=elimna(xx[j])
}
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        testA = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(con))
tvec=NA
tvec=linhat(x,con,est=est,...)
        for(ib in 1:nboot) {
 for(j in 1:JKL) {
nv=length(x[[j]])
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in 1:p){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
}
aboot[ib,]=linhat(bsam,con=con,est=est,...)
}
pbA=NA
for(j in 1:ncol(aboot)){
pbA[j]=mean(aboot[,j]>0)
pbA[j]=2*min(c(pbA[j],1-pbA[j]))
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncol(con) > 10){
avec<-.05/c(11:(ncol(con)))
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(con > 10){
avec<-.01/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncol(con))
}
}
if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con)
outputA<-matrix(0,ncol(con),6)
dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit",
"ci.lower","ci.upper"))
test=pbA
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
outputA[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
outputA[,2]<-tvec
for (ic in 1:ncol(con)){
outputA[ic,1]<-ic
outputA[ic,3]<-test[ic]
temp<-sort(aboot[,ic])
outputA[ic,5]<-temp[icl]
outputA[ic,6]<-temp[icu]
}
outputA
}

bbbmcppb<-function(J, K,L, x, est=est,JKL = J * K*L,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE,...)
{
#
#  BETWEEN-BETWEEN-BETWEEN DESIGN
#
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
con=con3way(J,K,L)
A=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
B=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
C=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AB=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
BC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
ABC=bbbmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC)
}

linhat<-function(x,con,est=tmean,...){
#
# estimate all linear contrasts specified by con
#
psihat=0
xbar=llocv2(x,est=est,...)$center
for(i in 1:ncol(con))psihat[i]=sum(con[,i]*xbar)
psihat
}
bbwmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE,...)
{
#
#  BETWEEN-BETWEEN-WITHIN DESIGN
#
        # A percentile bootstrap for multiple comparisons
        #  for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
con=con3way(J,K,L)
A=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
B=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
C=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AB=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
BC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
ABC=bbwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC)
}

bbwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
#
#  JK independent groups, L dependent groups
#

       #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
#        nvec <- NA
#for(j in 1:length(x))nvec[j]=length(x[[j]])
ncon=ncol(con)
 p <- J*K*L
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
KL=K*L
        data <- list()
xx=list()
        for(j in 1:length(x)) {
xx[[j]]=x[[grp[j]]] # save input data
#                # Now have the groups in proper order.
        }
ilow=1-L
iup=0
for(j in 1:JK){
ilow <- ilow + L
 iup = iup + L
sel <- c(ilow:iup)
xx[sel]=listm(elimna(matl(xx[sel])))
}

        jp <- 1 - L
        kv <- 0
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        testA = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(con))
tvec=NA
tvec=linhat(x,con,est=est,...)
        for(ib in 1:nboot) {
                ilow <- 1 - L
                iup = 0
 for(j in 1:JK) {
 ilow <- ilow + L
 iup = iup + L
nv=length(x[[ilow]])
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
}
ilow=0-L
iup=0
aboot[ib,]=linhat(bsam,con=con,est=est,...)
}
pbA=NA
for(j in 1:ncol(aboot)){
pbA[j]=mean(aboot[,j]>0)
pbA[j]=2*min(c(pbA[j],1-pbA[j]))
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncol(con) > 10){
avec<-.05/c(11:(ncol(con)))
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(con > 10){
avec<-.01/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncol(con))
}
}
if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con)
outputA<-matrix(0,ncol(con),6)
dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit",
"ci.lower","ci.upper"))
test=pbA
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
outputA[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
outputA[,2]<-tvec
for (ic in 1:ncol(con)){
outputA[ic,1]<-ic
outputA[ic,3]<-test[ic]
temp<-sort(aboot[,ic])
outputA[ic,5]<-temp[icl]
outputA[ic,6]<-temp[icu]
}
outputA
}

bwwmcppb.sub<-function(J, K,L, x, est=tmean, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
#
#  J independent groups, KL dependent groups
#

       #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
#        nvec <- NA
#for(j in 1:length(x))nvec[j]=length(x[[j]])
ncon=ncol(con)
 p <- J*K*L
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
KL=K*L
#        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
#                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
#                # Now have the groups in proper order.
        }
ilow=1-KL
iup=0
for(j in 1:J){
ilow <- ilow + KL
 iup = iup + KL
sel <- c(ilow:iup)
xx[sel]=listm(elimna(matl(xx[sel])))
}

        jp <- 1 - KL
        kv <- 0
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        testA = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(con))
tvec=NA
tvec=linhat(x,con,est=est,...)
        for(ib in 1:nboot) {
                ilow <- 1 - KL
                iup = 0
 for(j in 1:J) {
 ilow <- ilow + KL
 iup = iup + KL
nv=length(x[[ilow]])
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
}
ilow=0-KL
iup=0
aboot[ib,]=linhat(bsam,con=con,est=est,...)
}
pbA=NA
for(j in 1:ncol(aboot)){
pbA[j]=mean(aboot[,j]>0)
pbA[j]=2*min(c(pbA[j],1-pbA[j]))
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncol(con) > 10){
avec<-.05/c(11:(ncol(con)))
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(con > 10){
avec<-.01/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncol(con))
}
}
if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con)
outputA<-matrix(0,ncol(con),6)
dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit",
"ci.lower","ci.upper"))
test=pbA
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
outputA[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
outputA[,2]<-tvec
for (ic in 1:ncol(con)){
outputA[ic,1]<-ic
outputA[ic,3]<-test[ic]
temp<-sort(aboot[,ic])
outputA[ic,5]<-temp[icl]
outputA[ic,6]<-temp[icu]
}
outputA
}

wwwmcppb.sub<-function(J, K,L, x, est=est, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE, ...){
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
#
#  within-by-within-by-within design
#
#  JKL dependent groups
#

       #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
#        nvec <- NA
#for(j in 1:length(x))nvec[j]=length(x[[j]])
ncon=ncol(con)
 p <- J*K*L
JKL=p
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
KL=K*L
#        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
#                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
#                # Now have the groups in proper order.
        }
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        testA = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(con))
tvec=NA
tvec=linhat(x,con,est=est,...)
nv=length(x[[1]])
        for(ib in 1:nboot) {
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in 1:JKL) bsam[[k]] = x[[k]][bdat[[j]]]
aboot[ib,]=linhat(bsam,con=con,est=est,...)
}
pbA=NA
for(j in 1:ncol(aboot)){
pbA[j]=mean(aboot[,j]>0)
pbA[j]=2*min(c(pbA[j],1-pbA[j]))
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncol(con) > 10){
avec<-.05/c(11:(ncol(con)))
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(con > 10){
avec<-.01/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncol(con))
}
}
if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con)
outputA<-matrix(0,ncol(con),6)
dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit",
"ci.lower","ci.upper"))
test=pbA
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
outputA[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
outputA[,2]<-tvec
for (ic in 1:ncol(con)){
outputA[ic,1]<-ic
outputA[ic,3]<-test[ic]
temp<-sort(aboot[,ic])
outputA[ic,5]<-temp[icl]
outputA[ic,6]<-temp[icu]
}
outputA
}

wwwmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE,...)
{
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
con=con3way(J,K,L)
A=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
B=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
C=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AB=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
BC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
ABC=wwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC)
}

bwwmcppb<-function(J, K,L, x, est=tmean,JKL = J * K*L,
 alpha = 0.05, grp =c(1:JKL), nboot = 500, bhop=FALSE,SEED = TRUE,...)
{
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
con=con3way(J,K,L)
A=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conA,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
B=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
C=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AB=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conAC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
BC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conBC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
ABC=bwwmcppb.sub(J=J, K=K,L=L, x, est=est,con=con$conABC,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
list(Fac.A=A,Fac.B=B,Fac.C=C,Fac.AB=AB,Fac.AC=AC,Fac.BC=BC,Fac.ABC=ABC)
}


cjMAT<-function(J){
L=(J^2-J)/2
cj=matrix(0,nrow=L,ncol=J)
ic=0
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic=ic+1
cj[ic,j]=1
cj[ic,k]=-1
}}}
cj
}

con3way<-function(J,K,L){
#
# Generate all contrast coefficients for 3-way design.
# with the goal of testing all main effects and interactions
#
cj=cjMAT(J)
ck=cjMAT(K)
cl=cjMAT(L)
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
il<-matrix(c(rep(1,L)),1,L)
conA=t(kron(cj,kron(ik,il)))
conB=t(kron(ij,kron(ck,il)))
conC=t(kron(ij,kron(ik,cl)))
conAB=kron(cj,kron(ck,il))
conAC=kron(cj,kron(ik,cl))
conBC=kron(ij,kron(ck,cl))
conABC=kron(cj,kron(ck,cl))
list(conA=conA,conB=conB,conC=conC,conAB=t(conAB),conAC=t(conAC),
conBC=t(conBC),conABC=t(conABC))
}


wmwloc2<-function(x){
#
# Compute loc2dif for all pairs of groups
#
if(is.matrix(x))x=listm(x)
locvec=NULL
ic=0
J=length(x)
for(j in 1:J){
for(k in 1:J){
if (j<k){
ic=ic+1
locvec[ic]=loc2dif(x[[j]],x[[k]])
}}}
locvec
}

regpord.sub<-function(isub,x,y,cov.fun){
xmat<-matrix(x[isub,],nrow(x),ncol(x))
vals<-regvarp(xmat,y[isub],cov.fun=cov.fun)
vals
}


t1way.effect<-function(x,tr=.2,grp=NA,MAT=FALSE,lev.col=1,var.col=2){
#
# Same as t1way, but computes explanatory power and related effect size
# Only use this function with = n's
# Use t1wayv2 in general, which calls this function when sample sizes differ.
#
#  A heteroscedastic one-way ANOVA for trimmed means
#  using a generalization of Welch's method.
#
#  The data are assumed to be stored in $x$ in a matrix or in list mode.
#
# MAT=F, if x is a matrix, columns correspond to groups.
# if MAT=T, assumes argument
# lev.col
# indicates which column of x denotes the groups. And
#  var.col indicates the column where the data are stored.
#
# if x has list mode:
#  length(x) is assumed to correspond to the total number of groups.
#  By default, the null hypothesis is that all groups have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
#  Missing values are automatically removed.
#
library(MASS)
if(MAT){
if(!is.matrix(x))stop("With MAT=T, data must be stored in a matrix")
if(length(lev.col)!=1)stop("Argument lev.col should have 1 value")
temp=selby(x,lev.col,var.col)
x=temp$x
grp2=rank(temp$grpn)
x=x[grp2]
}
if(is.matrix(x))x<-listm(x)
if(is.na(sum(grp[1])))grp<-c(1:length(x))
if(!is.list(x))stop("Data are not stored in a matrix or in list mode.")
J<-length(grp)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
pts=NULL
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
pts=c(pts,val)
x[[j]]<-val[xx]  # missing values have been removed
h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # h is the number of observations in the jth group after trimming.
w[j]<-h[j]*(h[j]-1)/((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr))
xbar[j]<-mean(x[[grp[j]]],tr)
}
u<-sum(w)
xtil<-sum(w*xbar)/u
A<-sum(w*(xbar-xtil)^2)/(J-1)
B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1)
TEST<-A/(B+1)
nu1<-J-1
nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1))
sig<-1-pf(TEST,nu1,nu2)
#
# Determine explanatory effect size
#
top=var(xbar)
bot=winvarN(pts,tr=tr)
e.pow=top/bot
list(TEST=TEST,nu1=nu1,nu2=nu2,siglevel=sig,Var.Explained=e.pow,
Effect.Size=sqrt(e.pow))
}

snmreg<-function(x,y,SEED=TRUE,xout=FALSE,outfun=outpro,initreg=chreg,...){
#
# Compute regression S-estimator via Nelder-Mead method
# The measure of scale is taken to be the percentage bend midvariance
#
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
x <- as.matrix(x)
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
npm1=np-1
x=X[,1:npm1]
x=as.matrix(x)
y=X[,np]
N<-np-1
temp<-initreg(x,y,SEED=SEED)$coef
START<-temp[2:np]
temp<-nelder(X,N,FN=snmreg.sub,START=START)
alpha <- median(y - x %*% temp)
coef <- c(alpha,temp)
res <- y - x %*% temp - alpha
list(coef = coef, residuals = res)
}
snmregv2<-function(x,y,SEED=TRUE){
#
# Compute regression S-estimator
# remove points for which residuals are outliers
# then recompute the estimated slopes and intercetp.
#
res=snmreg(x,y,SEED=SEED)$residuals
chk<-abs(res-median(res))/mad(res)
x=as.matrix(x)
xx<-x[chk<=2,]
yy<-y[chk<=2]
temp<-snmreg(xx,yy,SEED=SEED)
list(coef=temp$coef,residuals=temp$residuals)
}


larsR<-function(x,y,type="lasso"){
library(lars)
p=ncol(x)
p1=p+1
xy=elimna(cbind(x,y))
result=lars(xy[,1:p],xy[,p1],type=type)
result
}

regvarp<-function(x,y,p=1,locfun=lloc,scat=var,est=mean,cov.fun=cov.mba){
#
# Measure the importance of each of p variables in a regression
# problem, p>1
#
xy=cbind(x,y)
xy<-elimna(xy)
m<-ncol(x)
x=xy[,1:m]
n<-nrow(x)
m1=m+1
y=xy[,m1]
x=standm(x,locfun=locfun,est=est,scat=scat)
vals=NA
if(p==1)for(j in 1:m){
#z=cbind(y,x[,j])
#vals[j]=prod(eigen(scor(z,plotit=FALSE)$cor.values)$values)
vals[j]=gvarg(cbind(y,x[,j]),cov.fun)
}
if(p>1){
temp=modgen(m)
ic=0
for(j in 1:length(temp)){
if(length(temp[[j]])==p){
ic=ic+1
vals[ic]=gvarg(cbind(y,x[,temp[[j]]]),cov.fun)
z=cbind(y,x[,temp[[j]]])
#vals[ic]=prod(eigen(scor(z,plotit=FALSE)$cor.values)$values)
#vals[ic]=gvarg(cbind(y,x[,temp[[j]]]),cov.fun)
}}}
vals
}

bwmcppb<-function(J, K, x, est=tmean,JK = J * K,
 alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=FALSE,SEED = TRUE,...)
{
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
con=con2way(J,K)
A=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conA,
 alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...)
B=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conB,
 alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...)
AB=bwmcppb.sub(J=J, K=K, x, est=est,JK = J * K,con=con$conAB,
 alpha = alpha, grp =c(1:JK), nboot = nboot, bhop=bhop,SEED = SEED,...)
list(Fac.A=A,Fac.B=B,Fac.AB=AB)
}

bwmcppb.sub<-function(J, K, x, est=tmean, JK = J * K, con = 0,
 alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=FALSE,SEED = TRUE, ...){
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
        nvec <- NA
for(j in 1:length(x))nvec[j]=length(x[[j]])
nmax=max(nvec)
ncon=ncol(con)
 p <- J * K
        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
#                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
#                # Now have the groups in proper order.
#                data[[j]] = data[[j]] - mean(data[[j]], tr = tr)
        }
ilow=0-K
iup=0
for(j in 1:J){
ilow <- ilow + K
 iup = iup + K
sel <- c(ilow:iup)
xx[sel]=listm(elimna(matl(xx[sel])))
                }
        jp <- 1 - K
        kv <- 0
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        testA = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(con))
tvec=NA
tvec=linhat(x,con,est=est,...)
        for(ib in 1:nboot) {
                ilow <- 1 - K
                iup = 0
 for(j in 1:J) {
 ilow <- ilow + K
 iup = iup + K
nv=length(x[[ilow]])
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
}
ilow=0-K
iup=0
aboot[ib,]=linhat(bsam,con=con,est=est,...)
}
pbA=NA
for(j in 1:ncol(aboot)){
pbA[j]=mean(aboot[,j]>0)
pbA[j]=2*min(c(pbA[j],1-pbA[j]))
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncol(con) > 10){
avec<-.05/c(11:(ncol(con)))
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(nmax>=100)dvec[1]=.01
if(con > 10){
avec<-.01/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncol(con))
dvec[1]<-alpha/2
}
dvec<-2*dvec
}
if(nmax>80){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(con > 10){
avec<-.05/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncol(con))
}
}
if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con)
outputA<-matrix(0,ncol(con),6)
dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit",
"ci.lower","ci.upper"))
test=pbA
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
outputA[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
outputA[,2]<-tvec
for (ic in 1:ncol(con)){
outputA[ic,1]<-ic
outputA[ic,3]<-test[ic]
temp<-sort(aboot[,ic])
outputA[ic,5]<-temp[icl]
outputA[ic,6]<-temp[icu]
}
outputA
}

D.akp.effect<-function(x,null.value=0,tr=.2){
#
# Computes the robust effect size for one-sample case using
# a simple modification of
# Algina, Keselman, Penfield Pcyh Methods, 2005, 317-328
library(MASS)
x<-elimna(x)
s1sq=winvar(x,tr=tr)
cterm=1
if(tr>0)cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr
cterm=sqrt(cterm)
dval<-cterm*(tmean(x)-null.value)/sqrt(s1sq)
dval
}


smean2v2<-function(m1,m2,nullv=rep(0,ncol(m1)),cop=3,MM=FALSE,SEED=NA,
nboot=500,plotit=TRUE,MC=FALSE,STAND=FALSE){
#
# m is an n by p matrix
#
# For two independent groups,
# test hypothesis that multivariate skipped estimators
# are all equal.
#
# The level of the test is .05.
#
# Skipped estimator is used, i.e.,
# eliminate outliers using a projection method
# That is, determine center of data using:
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
# cop=4 MVE
#
# For each point
# consider the line between it and the center
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# Eliminate any outliers and compute means
#  using remaining data.
#
if(ncol(m1) != ncol(m2)){
stop("Number of variables in group 1 does not equal the number in group 2.")
}
if(is.na(SEED))set.seed(2)
if(!is.na(SEED))set.seed(SEED)
m1<-elimna(m1)
m2<-elimna(m2)
n1<-nrow(m1)
n2<-nrow(m2)
n<-min(c(n1,n2))
crit.level<-.05
if(n<=120)crit.level<-.045
if(n<=80)crit.level<-.04
if(n<=60)crit.level<-.035
if(n<=40)crit.level<-.03
if(n<=30)crit.level<-.025
if(n<=20)crit.level<-.02
val<-matrix(NA,ncol=ncol(m1),nrow=nboot)
est=smean(m1)-smean(m2)
for(j in 1: nboot){
data1<-sample(n1,size=n1,replace=TRUE)
data2<-sample(n2,size=n2,replace=TRUE)
mm1<-m1[data1,]
temp<-outpro(mm1,plotit=FALSE,cop=cop,STAND=STAND)$keep
v1<-apply(mm1[temp,],2,mean)
mm2<-m2[data2,]
temp<-outpro(mm2,plotit=FALSE,cop=cop,STAND=STAND)$keep
v2<-apply(mm2[temp,],2,mean)
val[j,]<-v1-v2
}
if(!MC)temp<-pdis(rbind(val,nullv))
if(MC)temp<-pdisMC(rbind(val,nullv))
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
if(ncol(m1)==2 && plotit){
plot(val[,1],val[,2],xlab="VAR 1",ylab="VAR 2")
if(!MC)temp3<-smean(m1,cop=cop)-smean(m2,cop=cop)
if(MC)temp3<-smeanMC(m1,cop=cop)-smeanMC(m2,cop=cop)
points(temp3[1],temp3[2],pch="+")
ic<-round((1-crit.level)*nboot)
if(!MC)temp<-pdis(val)
if(MC)temp<-pdisMC(val)
temp.dis<-order(temp)
xx<-val[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(p.value=sig.level,crit.level=crit.level)
}
mulwmwv2<-function(m1,m2,plotit=TRUE,cop=3,alpha=.05,nboot=1000,pop=4,fr=.8,pr=FALSE){
#
# same as function mulwmw, only report explanatory effect size.
#
# Determine center correpsonding to two
# independent groups, project all  points onto line
# connecting the centers,
# then based on the projected distances,
# estimate p=probability that a randomly sampled
# point from group 1 is less than a point from group 2
# based on the projected distances.
#
#
# plotit=TRUE creates a plot of the projected data
# pop=1 plot two dotplots based on projected distances
# pop=2 boxplots
# pop=3 expected frequency curve.
# pop=4 adaptive kernel density
#
#  There are three options for computing the center of the
#  cloud of points when computing projections:
#  cop=1 uses Donoho-Gasko median
#  cop=2 uses MCD center
#  cop=3 uses median of the marginal distributions.
#
#  When using cop=2 or 3, default critical value for outliers
#  is square root of the .975 quantile of a
#  chi-squared distribution with p degrees
#  of freedom.
#
#  Donoho-Gasko (Tukey) median is marked with a cross, +.
#
#if(is.matrix(m1)){print("Data are assumed to be stored in")
if(is.null(dim(m1))){print("Data are assumed to be stored in")
print(" a matrix or data frame having two or more columns.")
stop(" For univariate data, use the function outbox or out")
}
m1<-elimna(m1) # Remove missing values
m2<-elimna(m2)
n1v=nrow(m1)
n2v=nrow(m2)
if(cop==1){
if(ncol(m1)>2){
center1<-dmean(m1,tr=.5)
center2<-dmean(m2,tr=.5)
}
if(ncol(m1)==2){
tempd<-NA
for(i in 1:nrow(m1))
tempd[i]<-depth(m1[i,1],m1[i,2],m1)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center1<-m1[flag,]
if(sum(flag)>1)center1<-apply(m1[flag,],2,mean)
for(i in 1:nrow(m2))
tempd[i]<-depth(m2[i,1],m2[i,2],m2)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center2<-m2[flag,]
if(sum(flag)>1)center2<-apply(m2[flag,],2,mean)
}}
if(cop==2){
center1<-cov.mcd(m1)$center
center2<-cov.mcd(m2)$center
}
if(cop==3){
center1<-apply(m1,2,median)
center2<-apply(m2,2,median)
}
if(cop==4){
center1<-smean(m1)
center2<-smean(m2)
}
center<-(center1+center2)/2
B<-center1-center2
if(sum(center1^2)<sum(center2^2))B<-(0-1)*B
BB<-B^2
bot<-sum(BB)
disx<-NA
disy<-NA
if(bot!=0){
for (j in 1:nrow(m1)){
AX<-m1[j,]-center
tempx<-sum(AX*B)*B/bot
disx[j]<-sign(sum(AX*B))*sqrt(sum(tempx^2))
}
for (j in 1:nrow(m2)){
AY<-m2[j,]-center
tempy<-sum(AY*B)*B/bot
disy[j]<-sign(sum(AY*B))*sqrt(sum(tempy^2))
}
}
es=yuenv2(disx,disy)$Effect.Size
if(plotit){
if(pop==1){
par(yaxt="n")
xv<-rep(2,length(disx))
yv<-rep(1,length(disy))
plot(c(disx,disy),c(xv,yv),type="n",xlab="",ylab="")
xv<-rep(1.6,length(disx))
yv<-rep(1.4,length(disy))
points(disx,xv)
points(disy,yv)
#par(yaxt="c")
}
if(pop==2)boxplot(disx,disy)
if(pop==3)rd2plot(disx,disy,fr=fr)
if(pop==4)g2plot(disx,disy,fr=fr)
}
m<-outer(disx,disy,FUN="-")
m<-sign(m)
phat<-(1-mean(m))/2
if(bot==0)phat<-.5
print("Computing critical values")
m1<-t(t(m1)-center1)
m2<-t(t(m2)-center2)
v1<-mulwmwcrit(m1,m2,cop=cop,alpha=alpha,iter=nboot,pr=pr)
list(phat=phat,lower.crit=v1[1],upper.crit=v1[2],Effect.Size=abs(es),n1=n1v,n2=n2v)
}
regpord<-function(x,y,nboot=100,alpha=.05,SEED=TRUE,xout=FALSE,cov.fun=cov.mba,pr=TRUE,
plotit=TRUE,xlab="Standardized Predictors",ylab="Y",est=mean,scat=var,...){
#
# Compare strength of association of two predictors via
# some robust covariance matrix, with predictors standardized.
#
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
n=nrow(x)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regpord.sub,x,y,cov.fun)
ptot=(p^2-p)/2
# bvec is a p by nboot matrix.
est=regvarp(x,y,est=est,scat=scat)
regci<-matrix(0,ptot,4)
dimnames(regci)<-list(NULL,c("Pred.","Pred","test.stat","Decision"))
ic=0
crit05=2.06-5.596/sqrt(n)
if(pr){
print("est is the estimated generalized variance")
}
if(p==2){
if(plotit){
z=standm(x,locfun=lloc,est=mean,scat=var)
z1=cbind(z[,1],y)
z2=cbind(z[,2],y)
plot(rbind(z1,z2),type="n",xlab=xlab,ylab=ylab)
points(z1,pch="*")
points(z2,pch="+")
}}
for(j in 1:p){
for(k in 1:p){
if(j<k){
sqse<-mean((bvec[j,]-est[j]-bvec[k,]+est[k])^2)*nboot/(nboot-1)
test=(est[j]-est[k])/sqrt(sqse)
ic=ic+1
regci[ic,1]<-j
regci[ic,2]<-k
regci[ic,3]<-test
regci[ic,4]<-0
if(abs(test)>=crit05)regci[ic,4]<-1
}}}
regci=data.frame(regci)
flag=(regci[,4]==0)
regci[flag,4]="fail to reject"
regci[!flag,4]="reject"
list(crit.value=crit05,est=est,results=regci)
}


mopreg<-function(x,y,regfun=tsreg,cop=3,KEEP=TRUE,MC=FALSE,STAND=FALSE){
#
# Do multiple (outcomes) regression on points not labled outliers
# using projection-type outlier detection method
# Arg=regfun determines regression method;
# by default, Theil-Sen is used.
#
#  KEEP=F, outliers will be eliminated
#  KEEP=T, outliers are not eliminated
# cop: see function outpro
library(MASS)
if(MC)library(parallel)
x<-as.matrix(x)
y<-as.matrix(y)
px<-ncol(x)
py<-ncol(y)
m<-cbind(x,y)
m<-elimna(m) # eliminate any rows with missing data
if(KEEP)ivec<-c(1:nrow(x))
if(!KEEP){
if(!MC)ivec<-outpro(m,plotit=FALSE,cop=cop,STAND=STAND)$keep
if(MC)ivec<-outproMC(m,plotit=FALSE,cop=cop,STAND=STAND)$keep
}
np1<-ncol(x)+1
vec<-rep(1,nrow(m))
pxpy<-px+py
coef<-matrix(ncol=py,nrow=np1)
res<-matrix(ncol=py,nrow=nrow(m))
for(i in 1:py){
pv<-px+i
coef[,i]<-regfun(m[ivec,1:ncol(x)],m[ivec,pv])$coef
vec<-as.matrix(vec)
res[,i]<-m[,pv]-cbind(vec,m[,1:ncol(x)])%*%coef[,i]
}
list(coef=coef,residuals=res)
}
robpcaS<-function(x,SCORES=FALSE,STAND=FALSE,est=tmean,varfun=winvar,SEED=TRUE){
#
# An abbreviated form of robpca.
#
# compute eigen values to determine proportion of scatter.
# Goal is to see how many components are needed
#
x=elimna(x)
if(STAND)x=standm(x,est=est,scat=varfun)
v=robpca(x,pr=FALSE,plotit=FALSE,SEED=SEED)
cumsum(v$L/sum(v$L))
val=matrix(NA,ncol=length(v$L),nrow=4)
scores=NULL
if(SCORES)scores=v$T
dimnames(val)=list(c("Number of Comp.","Robust Stand Dev","Proportion Robust var","Cum. Proportion"),
NULL)
val[1,]=c(1:length(v$L))
val[2,]=sqrt(v$L)
val[3,]=v$L/sum(v$L)
val[4,]=cumsum(v$L/sum(v$L))
list(summary=val,scores=scores)
}


Ppca<-function(x,p=ncol(x)-1,locfun=L1medcen,loc.val=NULL,SCORES=FALSE,
gvar.fun=cov.mba,pr=TRUE,SEED=TRUE,gcov=rmba,SCALE=TRUE,...){
#
# Robust PCA aimed at finding scores that maximize a
# robust generalized variance given the goal of reducing data from
# m dimensions to
# p, which defaults to m-1
#
#  locfun, location used to center design space.
#  by default, use the spatial median
#  alternatives are mcd, tauloc, ...
#
#  # data are centered based on measure of location indicated by
#  locfun: default is spatial median.
#
#  SCALE=T means the marginal distributions are rescaled using the
#  measure and scatter indicated by
#  gcov, which defaults to median ball measure of location and variance
#
#  Output: the projection matrix. If
#  SCORES=T, the projected scores are returned.
#
x<-elimna(x)
n<-nrow(x)
m<-ncol(x)
xdat=c(n,m,p,as.vector(x))
if(!SCALE){
if(is.null(loc.val))info<-locfun(x,...)$center
if(!is.null(loc.val))info<-loc.val
for(i in 1:n)x[i,]<-x[i,]-info
}
if(SCALE){
ms=gcov(x)
for(i in 1:n)x[i,]<-x[i,]-ms$center
for(j in 1:m)x[,j]<-x[,j]/sqrt(ms$cov[j,j])
}
vals<-NA
z<-matrix(nrow=n,ncol=p)
np=p*m
B=robpca(x,pval=p,plotit=FALSE,pr=pr,SEED=SEED,scree=F)$P
B=t(B)
Bs=nelderv2(xdat,np,NMpca,START=B)
Bop=matrix(Bs,nrow=p,ncol=m)
Bop=t(ortho(t(Bop)))
z<-matrix(nrow=n,ncol=p)
zval<-NULL
for(i in 1:n)z[i,]<-Bop%*%as.matrix(x[i,])
if(SCORES)zval<-z
val=gvarg(z)
list(B=Bop,gen.sd=sqrt(val),scores=zval)
}
Ppca.sum.sub<-function(j,x,SCALE=T){
#
res=Ppca(x,p=j,pr=FALSE,SCALE=SCALE)$gen.sd
res
}
Ppca.summary<-function(x,MC=FALSE,SCALE=TRUE,p=NULL){
#
#   x is assumed to be a matrix with p columns
#   Using robust principal components (Ppca)
#   compute generalized variance for each dimension reduction
#   from 1 to p.
#
#   report values plus proportion relative to largest value found
#
if(!is.matrix(x))stop("x should be a matrix")
x=elimna(x)
gv=NA
if(is.null(p))p=ncol(x)
if(!MC)for(j in 1:p)gv[j]=Ppca(x,p=j,pr=FALSE,SCALE=SCALE)$gen.sd
if(MC){
library(parallel)
y=list()
for(j in 1:p)y[[j]]=j
gv=mclapply(y,Ppca.sum.sub,x,SCALE=SCALE,mc.preschedule=TRUE)
gv=as.vector(matl(gv))
}
res=matrix(NA,nrow=3,ncol=p)
res[1,]=c(1:p)
res[2,]=gv
res[3,]=gv/max(gv)
dimnames(res)=list(c("Num. of Comp.","Gen.Stand.Dev","Relative Size"),NULL)
list(summary=res)
}
mdepreg<-function(x,y){
#
# multiple depth regression
#
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
if(np==2){
temp=depreg(X[,1],X[,2])
coef=temp$coef
res=temp$residuals
}
if(np>2){
N<-np-1
x=X[,1:N]
y=X[,np]
START<-tsreg(x,y)$coef
coef<-nelderv2(X,np,FN=mdepreg.sub,START=START)
x <- as.matrix(x)
res <- y - x %*% coef[2:np] - coef[1]
}
list(coef = coef, residuals = res)
}
l2plot<-function(x1,y1,x2,y2,f=2/3,SCAT=TRUE,xlab="x",ylab="y",
eout=FALSE,xout=FALSE,...){
#
# Plot LOESS smoother for two groups
#
# f is the span used by loess
# SCAT=F, scatterplot not created, just the regression lines
# Missing values are automatically removed.
#
m<-elimna(cbind(x1,y1))
x1<-m[,1]
y1<-m[,2]
m<-elimna(cbind(x2,y2))
x2<-m[,1]
y2<-m[,2]
plot(c(x1,x2),c(y1,y2),xlab=xlab,ylab=ylab)
lines(lowess(x1,y1,f=f))
lines(lowess(x2,y2,f=f))
}

contab<-function(dat,alpha=.05){
# dat is a 2by2 contingency table (matrix)
# Goal: compare the marginal probability of the first variable (e.g. Time 1)
# to the marginal probability of the second variable (e.g. Time 2).
# Issue: do the probabilities change from time 1 to time 2.
#
phat=dat
n=sum(phat)
phat=phat/n
p1.=phat[1,1]+phat[1,2]
p.1=phat[1,1]+phat[2,1]
del=p1.-p.1
sigsq=p1.*(1-p1.)+p.1*(1-p.1)-2*(phat[1,1]*phat[2,2]-phat[1,2]*phat[2,1])
sig=sqrt(sigsq/n)
test=abs(del)/sig
pv=2*(1-pnorm(test))
ci=del-qnorm(1-alpha/2)*sig
ci[2]=del+qnorm(1-alpha/2)*sig
list(delta=del,CI=ci,p.value=pv)
}


Ckappa<-function (x,fleiss=FALSE,w = NULL){
#
#  compute Cohen's kappa
#  if fleiss=T, compute weighted kappa with Fleiss weights if w=NULL
#  if fleiss=F, w=.5^|i-j| is used.
#  if argument w contains weights, they are used
#
if(!is.matrix(x))stop("x should be a square matrix")
if(ncol(x)!=nrow(x))stop("x should be a square matrix")
    p <- dim(x)[2]
    x <- as.matrix(x)
    tot <- sum(x)
    x <- x/tot
    rs <- rowSums(x)
    cs <- colSums(x)
    prob <- rs %*% t(cs)
    po <- sum(diag(x))
    pc <- sum(diag(prob))
    kappa <- (po - pc)/(1 - pc)
    if (is.null(w)) {
v=outer(c(1:p),c(1:p),"-")
w=outer(c(1:p),c(1:p),"-")
if(fleiss)w=1-w^2/(p-1)^2
if(!fleiss)w=.5^abs(w)
}
    weighted.prob <- w * prob
    weighted.obser <- w * x
    wpo <- sum(weighted.obser)
    wpc <- sum(weighted.prob)
    wkappa <- (wpo - wpc)/(1 - wpc)
    return(list(kappa = kappa, weighted.kappa = wkappa))
}
ODDSR.CI<-function(x,y=NULL,alpha=.05){
#
#  Compute confidence interval of the odds ratio.
#
#  x is either a two-by-two contingency table or a
#  vector of 0's and 1's, in which case
#  y is also a  vector of 0's and 1's
#
# if x is a 2-by-2 matrix, assume col 1 is X=1, col 2 is X=0
# row 1 is Y=1 and row 2 is Y=0.
#
if(is.matrix(x)){
if(ncol(x)!=2)stop("If x is a matrix, should have 2 columns")
if(nrow(x)!=2)stop("If x is a matrix, should have 2 rows")
n=sum(x)
x1=rep(1,x[1,1])
y1=rep(1,x[1,1])
x2=rep(0,x[1,2])
y2=rep(1,x[1,2])
x3=rep(1,x[2,1])
y3=rep(0,x[2,1])
x4=rep(0,x[2,2])
y4=rep(0,x[2,2])
x=c(x1,x2,x3,x4)
y=c(y1,y2,y3,y4)
}
temp=logreg(x,y)
z=qnorm(1-alpha/2)
ci=c(exp(temp[2,1]-z*temp[2,2]),exp(temp[2,1]+z*temp[2,2]))
list(odds.ratio=exp(temp[2,1]),ci=ci)
}

smean<-function(m,cop=3,MM=FALSE,op=1,outfun=outogk,cov.fun=rmba,MC=FALSE,STAND=FALSE,...){
#
# m is an n by p matrix
#
# Compute a multivariate skipped measure of location
#
# op=1:
# Eliminate outliers using a projection method
# If in addition, MC=T, a multi-core processor is used
# assuming your computer has multiple cores and the package
# multicore has been installed.
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
#  cop=4 uses MVE center
#  cop=5 uses TBS
#  cop=6 uses rmba (Olive's median ball algorithm)
#
# For each point
# consider the line between it and the center,
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# op=2 use mgv (function outmgv) method to eliminate outliers
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# op=3 use outlier method indicated by outfun
#
# Eliminate any outliers and compute means
#  using remaining data.
#
m<-elimna(m)
if(op==1){
if(!MC)temp<-outpro(m,plotit=FALSE,cop=cop,MM=MM,STAND=STAND)$keep
if(MC)temp<-outproMC(m,plotit=FALSE,cop=cop,MM=MM,STAND=STAND)$keep
}
if(op==2)temp<-outmgv(m,plotit=FALSE,cov.fun=cov.fun)$keep
if(op==3)temp<-outfun(m,plotit=FALSE,...)$keep
val<-apply(m[temp,],2,mean)
val
}

smeancrv2<-function(m,nullv=rep(0,ncol(m)),cop=3,MM=FALSE,SEED=TRUE,
nboot=500,plotit=TRUE,MC=FALSE,xlab="VAR 1",ylab="VAR 2",STAND=FALSE){
#
# m is an n by p matrix
#
# Test hypothesis that multivariate skipped estimators
# are all equal to the null value, which defaults to zero.
# The level of the test is .05.
#
# Eliminate outliers using a projection method
# That is, determine center of data using:
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
# cop=4 MVE
#
# For each point
# consider the line between it and the center
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# Eliminate any outliers and compute means
#  using remaining data.
#
if(SEED)set.seed(2)
m<-elimna(m)
n<-nrow(m)
est=smean(m,MC=MC,cop=cop,STAND=STAND)
crit.level<-.05
if(n<=120)crit.level<-.045
if(n<=80)crit.level<-.04
if(n<=60)crit.level<-.035
if(n<=40)crit.level<-.03
if(n<=30)crit.level<-.025
if(n<=20)crit.level<-.02
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
val<-matrix(NA,ncol=ncol(m),nrow=nboot)
for(j in 1: nboot){
mm<-m[data[j,],]
val[j,]<-smean(mm,MC=MC,cop=cop,STAND=STAND)
}
if(!MC)temp<-pdis(rbind(val,nullv),center=est)
if(MC)temp<-pdisMC(rbind(val,nullv),center=est)
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
if(ncol(m)==2 && plotit){
plot(val[,1],val[,2],xlab=xlab,ylab=ylab)
temp3<-est
points(temp3[1],temp3[2],pch="+")
ic<-round((1-crit.level)*nboot)
if(!MC)temp<-pdis(val,center=est)
if(MC)temp<-pdisMC(val,center=est)
temp.dis<-order(temp)
xx<-val[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(p.value=sig.level)
}
rmdzeroOP<-function(x, nboot = 500, cop=3,MC = FALSE,xlab="",ylab="",STAND=FALSE,...){
#
#   Form all pairwise differences of scores then test
#   the hypothesis that all differences have OP measure of location = 0
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=500
#
if(!is.list(x) && !is.matrix(x))
stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat<-x
mat<-elimna(mat) # Remove rows with missing values.
J<-ncol(mat)
jp<-0
Jall<-(J^2-J)/2
dif<-matrix(NA,nrow=nrow(mat),ncol=Jall)
ic<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic<-ic+1
dif[,ic]<-mat[,j]-mat[,k]
}}}
dif<-as.matrix(dif)
res=smeancrv2(dif, nboot = nboot, cop=cop,plotit = plotit,
 MC = MC, xlab = xlab,ylab = ylab,STAND=STAND)
res
}

mat2grp<-function(m,coln){
#
#  For data in a matrix m, divide the data into groups based
#  on the values in column indicated
#  by the argument coln
#  and store the data in list mode.
#
#  All columns of m are retained including  column coln.
#
if(!is.null(dim(m)))m=as.matrix(m)
if(!is.matrix(m))stop("Data must be stored in a matrix or data frame")
if(length(coln)!=1)stop("The argument coln must have length 1")
x<-list()
flagna=!is.na(m[,coln])
m=m[flagna,]
grpn<-sort(unique(m[,coln]))
for (ig in 1:length(grpn)){
flag<-(m[,coln]==grpn[ig])
x[[ig]]<-m[flag,]
}
print("Group Levels:")
print(grpn)
x
}

robpca<-function(x, pval=ncol(x), kmax=10, alpha=0.75, h, mcd=1,
plots=1, labsd=3, labod=3, classic=0,plotit=FALSE,pr=TRUE,SEED=TRUE,
STAND=TRUE,est=tmean,varfun=winvar,scree=TRUE,xlab="Principal Component",ylab="Proportion of Variance"){
x<-elimna(x)
#
# This is a slightly modified version of the code in robpca.SSC that
# was downloade from M. Hubert's web page.
#
if(pval!=ncol(x))scree=FALSE
if(STAND)x=standm(x,est=est,scat=varfun)
if(SEED)set.seed(2) # so cov.mve will always return same result
k<-pval # k=0 generates an error when using the original code.
if(pr)print(paste("Number of principal components specified is",pval))
#
# ROBPCA is a 'ROBust method for Principal Components Analysis'.
# It is resistant to outliers in the data. The robust loadings are computed using
# projection-pursuit techniques and the MCD method. Therefore ROBPCA can be applied
# to both low and high-dimensional data sets.In low dimensions, the MCD method is applied (see cov.mcd).
# The ROBPCA method is described in
#   Hubert, M., Rousseeuw, P.J., Vanden Branden K. (2005),
#   "ROBPCA: a new approach to robust principal components analysis",
#   to appear in Technometrics.
# For the up-to-date reference, please consult the website:
#     www.wis.kuleuven.ac.be/stat/robust.html

# Required input arguments:
#            x : Data matrix (observations in the rows, variables in the
#                columns)
#
# Optional input arguments:
#            pval (which was k) in original code by Hubert
#              : Number of principal components to compute. If k is missing,
#                or k = 0, a screeplot is drawn which allows you to select
#                the number of principal components. If k = 0 and plots = 0,
#                the algorithm itself will determine the number of components.
#                This is not recommended.
#         kmax : Maximal number of principal components to compute (default = 10).
#                If k is provided, kmax does not need to be specified, unless k is larger
#                than 10.
#        alpha : (1-alpha) measures the fraction of outliers the algorithm should
#                resist. Any value between 0.5 and 1 may be specified (default = 0.75).
#            h : (n-h+1) measures the number of outliers the algorithm should
#                resist. Any value between n/2 and n may be specified. (default = 0.75*n)
#                Alpha and h may not both be specified.
#          mcd : If equal to one: when the number of variables is sufficiently small,
#                the loadings are computed as the eigenvectors of the MCD covariance matrix,
#                hence the function 'cov.mcd' is automatically called. The number of
#                principal components is then taken as k = rank(x). (default)
#                If equal to zero, the robpca algorithm is always applied.
#        plots : If equal to one, a scree plot, and a robust score outlier map are
#                drawn (default). If the input argument 'classic' is equal to one,
#                the classical plots are drawn as well.
#                If 'plots' is equal to zero, all plots are suppressed.
#        labsd : The 'labsd' observations with largest score distance are
#                labeled on the outlier map. (default = 3)
#        labod : The 'labod' observations with largest orthogonal distance are
#                labeled on the outlier map. default = 3)
#      classic : If equal to one, the classical PCA analysis will be performed. (default = 0)
#
# I/O: result<-robpca(x,k=2,kmax=10,alpha=0.75,h=50,mcd=1,plots=1,labsd=3,labod=3,classic=0)
#  The user should only give the input arguments that have to change their default value.
#  The name of the input arguments needs to be followed by their value.
#  The order of the input arguments is of no importance.
#
# Examples:
#    result<-robpca(x,k=3,alpha=0.65,plots=0)
#    result<-robpca(x,alpha=0.80,kmax=15,labsd=5)
# plotit=FALSE, is the same as using plots=0
#
# The output of ROBPCA is a structure containing
#
#    result$P        : Robust loadings (eigenvectors)
#    result$L        : Robust eigenvalues
#    result$M        : Robust center of the data
#    result$T        : Robust scores
#    result$k        : Number of (chosen) principal components
#    result$h        : The quantile h used throughout the algorithm
#    result$sd       : Robust score distances within the robust PCA subspace
#    result$od       : Orthogonal distances to the robust PCA subspace
#    result$cutoff   : Cutoff values for the robust score and orthogonal distances
#    result$flag     : The observations whose score distance is larger than result.cutoff.sd
#                      or whose orthogonal distance is larger than result$cutoff$od
#                      can be considered as outliers and receive a flag equal to zero.
#                      The regular observations receive a flag 1.
#    result$class    : 'ROBPCA'
#    result$classic  : If the input argument 'classic' is equal to one, this structure
#                     contains results of the classical PCA analysis.

# Short description of the method: Let n denote the number of observations, and p the number of original variables,
# then ROBPCA finds a robust center (p x 1) of the data M and a loading matrix P which is (p x k) dimensional.
# Its columns are orthogonal and define a new coordinate system. The scores (n x k) are the coordinates of the centered
# observations with respect to the loadings: T=(X-M)*P. Note that ROBPCA also yields a robust covariance matrix (often singular)
# which can be computed as  cov<-out$P*out$L*t(out$P). The scree plot shows the eigenvalues and is helpful to select the number
# of principal components. The outlier map visualizes the observations by plotting their orthogonal distance to the robust PCA subspace
# versus their robust distances within the PCA subspace. This allows to classify the data points into 4 types: regular observations,
# good leverage points, bad leverage points and orthogonal outliers.
#
# robpca.ssc was written by Jan Wijfels
#				  adapted by Karlien Vanden Branden.
# Last Update: 14/01/2005
if(!plotit)plots<-0
library(MASS)
	if(missing(x)){
		stop("Error in robpca: You have to provide at least some data")	
	}
	data <- as.matrix(x)
	n <- nrow(data)
	p <- ncol(data)

	if(n < p) {
		X.svd <- kernelEVD(data)
	}
	else {
		X.svd <- classSVD(data)
	}
	if(X.svd$rank == 0) {
		stop("All data points collapse!")
	}	
	kmax <- max(min(floor(kmax), floor(n/2), X.svd$rank),1)
	k <- floor(k)
	if(k < 0) {
		k <- 0
	}
	else if(k > kmax) {
		warning("Attention robpca: The number of principal components k = ", k, " is larger then kmax = ", kmax, "; k is set to ", kmax,".")
		k <- kmax
	}
	if(!missing(h) & !missing(alpha)) {
		stop("Error in robpca: Both inputarguments alpha and h are provided. Only one is required.")
	}
	if(missing(h) & missing(alpha)) {
		h <- min(floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha),n)
	}
	if(!missing(h) & missing(alpha)) {
		alpha <- h/n
		if(k==0) {			
			if(h < floor((n+kmax+1)/2)) {
				h <- floor((n+kmax+1)/2)
				alpha <- h/n
				warning("Attention robpca: h should be larger than (n+kmax+1)/2. It is set to its minimum value ", h, ".")
			}
		}
		else {
			if(h < floor((n+k+1)/2)) {
				h <- floor((n+k+1)/2)
				alpha <- h/n
				warning("Attention robpca: h should be larger than (n+k+1)/2. It is set to its minimum value ", h, ".")
			}
		}
		if(h > n) {
			alpha <- 0.75
			if(k==0) {
				h <- floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha)
			}
			else {
				h <- floor(2*floor((n+k+1)/2)-n+2*(n-floor((n+k+1)/2))*alpha)
			}
			warning("Attention robpca: h should be smaller than n = ", n, ". It is set to its default value ", h, ".")
		}
	}
	if(missing(h) & !missing(alpha)) {
		if(alpha < 0.5) {
			alpha <- 0.5
			warning("Attention robpca: Alpha should be larger then 0.5. It is set to 0.5.")
		}
		if(alpha >= 1) {
			alpha <- 0.75
			warning("Attention robpca: Alpha should be smaller then 1. It is set to its default value 0.75.")


		}
		if(k==0) {
			h <- floor(2*floor((n+kmax+1)/2)-n+2*(n-floor((n+kmax+1)/2))*alpha)
		}
		else {
			h <- floor(2*floor((n+k+1)/2)-n+2*(n-floor((n+k+1)/2))*alpha)
		}
	}
	labsd <- floor(max(0,min(labsd,n)))
	labod <- floor(max(0,min(labod,n)))

	out <- list()
	
	Xa <- X.svd$scores
	center <- X.svd$centerofX
	rot <- X.svd$loadings
	p1 <- ncol(Xa)
	if( (p1 <= min(floor(n/5), kmax)) & (mcd == 1 ) ) {
		if(k != 0) {
			k <- min(k, p1)
		}
		else {
			k <- p1
#			cat("Message from robpca: The number of principal
# components is defined by the algorithm. It is set to ", k,".\n", sep="")
		}
		if(h < floor((nrow(Xa) + ncol(Xa) +1)/2)) {
			h <- floor((nrow(Xa) + ncol(Xa) +1)/2)
			cat("Message from robpca: The number of non-outlying observations h is set to ", h," in order to make the mcd algorithm function.\n", sep="") 	
		}
# 		Xa.mcd <- cov.mcd(as.data.frame(Xa), quan=h, print=F)
Xa.mcd <- cov.mcd(as.data.frame(Xa), quan=h) # R version
#print(Xa.mcd$method)
#if(length(grep("equation", Xa.mcd$method)) == 1) {
#			print(Xa.mcd$method)
#			stop("The ROBPCA algorithm can not deal with this
#   result from the FAST-MCD algorithm. The algorithm is aborted.")
#		}
#print("OUT")	
		Xa.mcd.svd <- svd(Xa.mcd$cov)
		scores <- (Xa - matrix(data=rep(Xa.mcd$center, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=T)) %*% Xa.mcd.svd$u
		out$M <- center + as.vector(Xa.mcd$center %*% t(rot))
		out$L <- Xa.mcd.svd$d[1:k]
#
if(scree){
pv=out$L
cs=pv/sum(pv)
cm=cumsum(cs)
plot(rep(c(1:ncol(x)),2),c(cs,cm),type="n",xlab=xlab,ylab=ylab)
points(c(1:ncol(x)),cs,pch="*")
lines(c(1:ncol(x)),cs,lty=1)
points(c(1:ncol(x)),cm,pch=".")
lines(c(1:ncol(x)),cm,lty=2)
}

		out$P <- X.svd$loadings %*% Xa.mcd.svd$u[,1:k]
		out$T <- as.matrix(scores[,1:k])
		if(is.list(dimnames(data))) {
			dimnames(out$T)[[1]] <- dimnames(data)[[1]]
		}
		out$h <- h
		out$k <- k
		out$alpha <- alpha
	}
	else {
		directions <- choose(n,2)
		ndirect <- min(250, directions)
		all <- (ndirect == directions)
		seed <- 0
		B <- extradir(Xa, ndirect, seed, all)
		Bnorm <- vector(mode="numeric", length=nrow(B))
		Bnorm<-apply(B,1,vecnorm)
		Bnormr <- Bnorm[Bnorm > 1.E-12]
		B <- B[Bnorm > 1.E-12,]
		A <- diag(1/Bnormr) %*% B
		Y <- Xa %*% t(A)
		Z <- matrix(data=0, nrow=n, ncol=length(Bnormr))
		for(i in 1:ncol(Z)) {
			univ <- unimcd(Y[,i],quan = h)
			if(univ$smcd < 1.E-12) {
				r2 <- qr(data[univ$weights==1,])$rank
				if(r2 == 1) {
					stop("Error in robpca: At least ", sum(univ$weights), " observations are identical.")	
				}
			}
			else {
				Z[,i] <- abs(Y[,i] - univ$tmcd) / univ$smcd
			}
		}
		H0 <- order(apply(Z, 1, max))
	
		Xh <- Xa[H0[1:h],]
		Xh.svd <- classSVD(Xh)
		
		kmax <- min(Xh.svd$rank, kmax)
		if( (k == 0) & (plots == 0) ) {
			test <- which((Xh.svd$eigenvalues/Xh.svd$eigenvalues[1]) <= 1.E-3)
			if(length(test) != 0) {
				k <- min(min(Xh.svd$rank, test[1]), kmax)				
			}
			else {
				k <- min(Xh.svd$rank, kmax)
			}
			cumulative <- cumsum(Xh.svd$eigenvalues[1:k]) / sum(Xh.svd$eigenvalues)
			if(cumulative[k] > 0.8) {
				k <- which(cumulative >= 0.8)[1]
			}
			cat("Message from robpca: The number of principal components is set by the algorithm. It is set to ", k, ".\n", sep="")
		}
		else {
			if( (k==0) & (plots != 0) ) {
				loc <- 1:kmax
				plot(loc, Xh.svd$eigenvalues[1:kmax], type='b', axes= FALSE, xlab="Component", ylab="Eigenvalue")
				axis(2)
				axis(1, at=loc)
				cumv <- cumsum(Xh.svd$eigenvalues)/sum(Xh.svd$eigenvalues)
				text(loc, Xh.svd$eigenvalues[1:kmax] + par("cxy")[2], as.character(signif(cumv[1:kmax], 2)))
				box <- dialogbox(title="ROBPCA", controls=list(),buttons = c("OK"))
				box <- dialogbox.add.control(box, where=1, statictext.control(paste("How many principal components would you like to retain?\nMaximum = ", kmax, sep=""), size=c(200,20)))
				box <- dialogbox.add.control(box, where=2, editfield.control(label="Your choice:", size=c(30,10)))
				input <- as.integer(dialogbox.display(box)$values$"Your choice:")
				k <- max(min(min(Xh.svd$rank, input), kmax), 1)
			}
			else {
				k <- min(min(Xh.svd$rank, k), kmax)
			}
		}
		if(k!=X.svd$rank){
			XRc <- Xa-matrix(data=rep(Xh.svd$centerofX, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=T)
			Xtilde <- XRc%*%Xh.svd$loadings[,1:k]%*%t(Xh.svd$loadings[,1:k])
			Rdiff <- XRc-Xtilde
          odh <- apply(Rdiff,1,vecnorm)
          ms <- unimcd(odh^(2/3),h)
        	cutoffodh <- sqrt(qnorm(0.975,ms$tmcd,ms$smcd)^3)
          indexset <- (odh<=cutoffodh)
          Xh.svd <- classSVD(Xa[indexset,])
			kmax <- min(Xh.svd$rank, kmax)
		}
			
		center <- center + Xh.svd$centerofX %*% t(rot)
		rot <- rot %*% Xh.svd$loadings
		Xstar<- (Xa - matrix(data=rep(Xh.svd$centerofX, times=nrow(Xa)), nrow=nrow(Xa), ncol=ncol(Xa), byrow=T)) %*% Xh.svd$loadings
		Xstar <- as.matrix(Xstar[,1:k])
		rot <- as.matrix(rot[,1:k])
		mah <- mahalanobis(Xstar, center=rep(0, ncol(Xstar)), cov=diag(Xh.svd$eigenvalues[1:k], nrow=k))
		oldobj <- prod(Xh.svd$eigenvalues[1:k])
		niter <- 100
		for(j in 1:niter) {
			mah.order <- order(mah)
			Xh <- as.matrix(Xstar[mah.order[1:h],])
			Xh.svd <- classSVD(Xh)
			obj <- prod(Xh.svd$eigenvalues)
			Xstar <- (Xstar - matrix(data=rep(Xh.svd$centerofX, times=nrow(Xstar)), nrow=nrow(Xstar), ncol=ncol(Xstar), byrow=T)) %*% Xh.svd$loadings
			center <- center + Xh.svd$centerofX %*% t(rot)
			rot <- rot %*% Xh.svd$loadings
			mah <- mahalanobis(Xstar, center=rep(0, ncol(Xstar)), cov=diag(x=Xh.svd$eigenvalues, nrow=length(Xh.svd$eigenvalues)))
			if( (Xh.svd$rank == k) & ( abs(oldobj - obj) < 1.E-12) ) {
				break
			}
			else {
				oldobj <- obj
				if(Xh.svd$rank < k) {
					j <- 1
					k <- Xh.svd$rank
				}
			}
		}
#Xstar.mcd <- cov.mcd(as.data.frame(Xstar), ntrial=250, quan=h, print=F)
Xstar.mcd <- cov.mcd(as.data.frame(Xstar), quan=h) # R version
#if(length(grep("equation", Xstar.mcd$method)) == 1) {
#			print(Xstar.mcd$method)
#stop("The ROBPCA algorithm can not deal with this result from the
#FAST-MCD algorithm. The algorithm is aborted.")
#		}	
#		if(Xstar.mcd$raw.objective < obj) {
			covf <- Xstar.mcd$cov
			centerf <- Xstar.mcd$center
#		}
#		else {
#			consistencyfactor <- median(mah)/qchisq(0.5,k)
#			mah <- mah/consistencyfactor
#			weights <- ifelse(mah <= qchisq(0.975, k), T, F)
#			noMCD <- weightmecov(Xstar, weights, n, k)
#			centerf <- noMCD$center
#			covf <- noMCD$cov
#		}

		covf.eigen <- eigen(covf)
		covf.eigen.values.sort <- greatsort(covf.eigen$values)
		P6 <- covf.eigen$vectors
		P6 <- covf.eigen$vectors[,covf.eigen.values.sort$index]
		
out$T <- (Xstar - matrix(data=rep(centerf, times=n), nrow=n, ncol=ncol(Xstar), byrow=T)) %*% covf.eigen$vectors[,covf.eigen.values.sort$index]

		if(is.list(dimnames(data))) {
			dimnames(out$T)[[1]] <- dimnames(data)[[1]]
		}
		out$P <- rot %*% covf.eigen$vectors[,covf.eigen.values.sort$index]
		out$M <- as.vector(center + centerf %*% t(rot))
		out$L <- as.vector(covf.eigen$values)
		out$k <- k
		out$h <- h

		out$alpha <- alpha
	}
	oldClass(out) <- "robpca"
	out <- CompRobustDist(data, X.svd$rank, out, classic)
	if(classic == 1) {
		out <- CompClassicDist(X.svd, out)
	}
	if(plots == 1) {
		plot(out, classic, labod=labod, labsd=labsd)
	}
	return(out)
}
"greatsort"<-function(vec){
	x <- vec * (-1)
	index <- order(x)
	return(list(sortedvector=rev(sort(vec)), index=index))
}
"classSVD"<-function(x){
	if(!is.matrix(x)) {
		stop("The function classSVD requires input of type 'matrix'.")
	}
	n <- nrow(x)
	p <- ncol(x)
	if(n == 1) {
		stop("The sample size is 1. No singular value decomposition can be performed.")
	}
	if(p < 5) {
		tolerance <- 1E-12
	}
	else {
		if(p <= 8) {
			tolerance <- 1E-14
		}
		else {
			tolerance <- 1E-16
		}
	}
	centerofX <- apply(x, 2, mean)
	Xcentered <- scale(x, center=TRUE, scale=FALSE)
	XcenteredSVD <- svd(Xcentered/sqrt(n-1))
	rank <- sum(XcenteredSVD$d > tolerance)
	eigenvalues <- (XcenteredSVD$d[1:rank])^2
	loadings <- XcenteredSVD$v[,1:rank]
	scores <- Xcentered %*% loadings
	return(list(loadings=as.matrix(loadings), scores=as.matrix(scores), eigenvalues=as.vector(eigenvalues), rank=rank,
					Xcentered=as.matrix(Xcentered), centerofX=as.vector(centerofX)))
}
"kernelEVD"<-function(x){
	if(!is.matrix(x)) {
		stop("The function kernelEVD requires input of type 'matrix'.")
	}
	n <- nrow(x)
	p <- ncol(x)
	if(n > p) {
		return(classSVD(x))
	}
	else {
		centerofX <- apply(x, 2, mean)
		Xcentered <- scale(x, center=TRUE, scale=FALSE)
		if(n == 1) {
			stop("The sample size is 1. No singular value decomposition can be performed.")
		}
		eigen <- eigen(Xcentered %*% t(Xcentered)/(n-1))
		eigen.descending <- greatsort(eigen$values)
		loadings <- eigen$vectors[,eigen.descending$index]
		tolerance <- n * max(eigen$values) * .Machine$double.eps
		rank <- sum(eigen.descending$sortedvector > tolerance)
		eigenvalues <- eigen.descending$sortedvector[1:rank]
		loadings <- t((Xcentered/sqrt(n-1))) %*% loadings[,1:rank] %*% diag(1/sqrt(eigenvalues), nrow=length(eigenvalues), ncol=length(eigenvalues))
		scores <- Xcentered %*% loadings
		return(list(loadings=as.matrix(loadings), scores=as.matrix(scores), eigenvalues=as.vector(eigenvalues), rank=rank,
						Xcentered=as.matrix(Xcentered), centerofX=as.vector(centerofX)))
	}
}
"extradir"<-function(data, ndirect, seed=0, all=T){
	n <- nrow(data)
	p <- ncol(data)
	B2 <- matrix(data=0, nrow = ndirect, ncol = p)
	rowindex <- 1
	i <- 1
	if(all == T) {
		while( (i < n) & (rowindex <= ndirect) ) {
			j <- i + 1
			while( (j <= n) & (rowindex <= ndirect) ) {
				B2[rowindex,] <- data[i,] - data[j,]
				j <- j + 1
				rowindex <- rowindex + 1
			}
			i <- i + 1
		}
	}
	else {
		while(rowindex <= ndirect) {
			sseed<-randomset(n,2,seed)
			seed<-sseed$seed
			B2[rowindex,] <- data[sseed$ranset[1],] - data[sseed$ranset[2],]
			rowindex <- rowindex + 1
		}
	}
	return(B2)
}
"randomset"<-function(tot,nel,seed){
out<-list()
for(j in 1:nel){
   randseed<-uniran(seed)
	seed<-randseed$seed
   num<-floor(randseed$random*tot)+1
   if(j > 1){
      while(any(out$ranset==num)){
         	randseed<-uniran(seed)
			seed<-randseed$seed
        	num<-floor(randseed$random*tot)+1
			
      }
   }
   out$ranset[j]<-num
	}
	out$seed<-seed
	return(out)
}
"uniran"<-function(seed = 0){
	out <- list()
	seed<-floor(seed*5761)+999
	quot<-floor(seed/65536)
	out$seed<-floor(seed)-floor(quot*65536)
	out$random<-out$seed/65536
	return(out)
}
"unimcd"<-function(y,quan){
	out<-list()
	ncas<-length(y)
	len<-ncas-quan+1
	if(len==1){
   	 	out$tmcd<-mean(y)
    	out$smcd<-sqrt(var(y))
		}
	else {
		ay<-c()
		I<-order(y)
		y<-y[I]
		ay[1]<-sum(y[1:quan])
	    for(samp in 2:len){
			ay[samp]<-ay[samp-1]-y[samp-1]+y[samp+quan-1]
		}
   	 ay2<-ay^2/quan
	 sq<-c()
    sq[1]<-sum(y[1:quan]^2)-ay2[1]
    for(samp in 2:len){
		 sq[samp]<-sq[samp-1]-y[samp-1]^2+y[samp+quan-1]^2-ay2[samp]+ay2[samp-1]
		}
	 sqmin<-min(sq)
	 Isq<-order(sq)
	 ndup<-sum(sq == sqmin)
	 ii<-Isq[1:ndup]
	 slutn<-c()
    slutn[1:ndup]<-ay[ii]
    initmean<-slutn[floor((ndup+1)/2)]/quan
    initcov<-sqmin/(quan-1)
    res<-(y-initmean)^2/initcov
    sortres<-sort(res)
    factor<-sortres[quan]/qchisq(quan/ncas,1)
    initcov<-factor*initcov
    res<-(y-initmean)^2/initcov
    quantile<-qchisq(0.975,1)
    out$weights<-(res<quantile)
    out$tmcd<-sum(y*out$weights)/sum(out$weights)
    out$smcd<-sqrt(sum((y-out$tmcd)^2*out$weights)/(sum(out$weights)-1))
	Iinv<-order(I)
	out$weights<-out$weights[Iinv]
}
	return(out)
}
"weightmecov"<-function(data, weights, n, nvar) {
	weightedcov <- cov.wt(x=data, wt=weights, center=T)
	return(list(center=weightedcov$center, cov=weightedcov$cov*sum(weights)/(sum(weights)-1)))
}
"CompRobustDist"<-function(data, r, out, classic) {
	n <- nrow(data)
	p <- ncol(data)
	out$sd <- sqrt(mahalanobis(out$T, center=rep(0, length=ncol(out$T)), cov=diag(x=out$L, nrow=length(out$L))))
	out$cutoff$sd <- sqrt(qchisq(0.975, out$k))
	XRc <- data - matrix(data=rep(out$M, times=n), nrow=n, ncol=p, byrow=T)
	Xtilde <- out$T %*% t(out$P)
	Rdiff <- XRc - Xtilde
	out$od <- vector(mode="numeric", length=n)
	if(is.list(dimnames(out$T))) {
		names(out$od) <- dimnames(out$T)[[1]]
	}
	out$od<-apply(Rdiff,1,vecnorm)
	if(out$k != r) {
		ms <- unimcd(out$od^(2/3), quan=out$h)
		out$cutoff$od <- sqrt(qnorm(0.975, ms$tmcd, ms$smcd)^3)
		out$flag <- (out$od <= rep(x=out$cutoff$od, times=length(out$od))) & (out$sd <= rep(x=out$cutoff$sd, times=length(out$sd)))
	}
	else {
		out$cutoff$od <- 0
		out$flag <- out$sd <= rep(x=out$cutoff$sd, times=length(out$sd))
	}
	if(classic == 0) {
		out$classic <- 0
	}
	out$class <- "ROBPCA"

	return(out)
}
"CompClassicDist"<-function(svd, out) {
	out$classic$P <- as.matrix(svd$loadings[,1:out$k])
	out$classic$L <- as.vector(svd$eigenvalues[1:out$k])
	out$classic$M <- as.vector(svd$centerofX)
	out$classic$T <- as.matrix(svd$scores[,1:out$k])

	out$classic$k <- out$k
	out$classic$Xc <- as.matrix(svd$Xcentered)
	Tclas <- out$classic$Xc %*% out$classic$P
	out$classic$sd <- sqrt(mahalanobis(Tclas, center=rep(0, length=ncol(Tclas)), cov=diag(x=out$classic$L, nrow=out$classic$k)))
	out$classic$cutoff$sd <- sqrt(qchisq(0.975, out$classic$k))
	Xtilde <- Tclas %*% t(out$classic$P)
	Cdiff <- out$classic$Xc - Xtilde
	out$classic$od <- vector(mode="numeric", length=nrow(out$classic$Xc))
	if(is.list(dimnames(out$classic$T))) {
		names(out$classic$od) <- dimnames(out$classic$T)[[1]]
	}
	out$classic$od<-apply(Cdiff,1,vecnorm)
	if(out$k != svd$rank) {
		m <- mean(out$classic$od^(2/3))
		s <- sqrt(var(out$classic$od^(2/3)))
		out$classic$cutoff$od <- sqrt((qnorm(0.975, m, s))^3)
	}
	else {
		out$classic$cutoff$od <- 0
	}
	out$classic$flag <- (out$classic$od <= rep(x=out$classic$cutoff$od, times=length(out$classic$od))) &
								(out$classic$sd <= rep(x=out$classic$cutoff$sd, times=length(out$classic$sd)))
	out$classic$class <- "CPCA"
	return(out)
}
plot_robpca<-function(robpca.obj, classic=0, labod=3, labsd=3) {
	diagnosticplot <- !any(robpca.obj$od <= as.vector(1.E-06,mode(robpca.obj$od)))
	if(diagnosticplot == T) {
		xmax <- max(max(robpca.obj$sd), robpca.obj$cutoff$sd)
		ymax <- max(max(robpca.obj$od), robpca.obj$cutoff$od)
		plot(robpca.obj$sd, robpca.obj$od, xlab="Score distance", ylab="Orthogonal distance", xlim=c(0,xmax), ylim=c(0,ymax), type="p")
		abline(v=robpca.obj$cutoff$sd)
		abline(h=robpca.obj$cutoff$od)
		givelabel(robpca.obj, labod, labsd)
	}
	else {
		ymax <- max(max(robpca.obj$sd), robpca.obj$cutoff$sd)
		plot(robpca.obj$sd, xlab="Index", ylab="Score distance", ylim=c(0,ymax), type="p")
		abline(h=robpca.obj$cutoff$sd)
		givelabel(robpca.obj, labod=0, labsd, indexplot=1)
	}
	title("ROBPCA")
	if(classic == 1) {
		diagnosticplot <- !any(robpca.obj$classic$od <= as.vector(1.E-06,mode(robpca.obj$classic$od)))
		if(diagnosticplot == T) {
			xmax <- max(max(robpca.obj$classic$sd), robpca.obj$classic$cutoff$sd)
			ymax <- max(max(robpca.obj$classic$od), robpca.obj$classic$cutoff$od)
			plot(robpca.obj$classic$sd, robpca.obj$classic$od, xlab="Score distance", ylab="Orthogonal distance", xlim=c(0,xmax), ylim=c(0,ymax), type="p")
			abline(v=robpca.obj$classic$cutoff$sd)
			abline(h=robpca.obj$classic$cutoff$od)
			givelabel(robpca.obj$classic, labod, labsd)
		}
		else {
			ymax <- max(max(robpca.obj$classic$sd), robpca.obj$classic$cutoff$sd)
			plot(robpca.obj$classic$sd, xlab="Index", ylab="Score distance", ylim=c(0,ymax), type="p")
			abline(h=robpca.obj$classic$cutoff$sd)
			givelabel(robpca.obj$classic, labod=0, labsd, indexplot=1)
		}
		title("CPCA")
	}
	invisible(robpca.obj)
}
"givelabel"<-function(object, labod, labsd, indexplot=0) {
	if((labod == 0) && (labsd == 0)) {
		return(invisible(object))
	}
	if(indexplot != 1) {
		order.od <- order(object$od*(-1))
		order.sd <- order(object$sd*(-1))
		if(labod != 0) {
			for(i in 1:labod) {
				lab <- ifelse(is.character(names(object$od)), names(object$od[order.od[i]]), order.od[i])
				text(object$sd[order.od[i]], object$od[order.od[i]]+par("cxy")[2], labels=lab)
			}
		}
		if(labsd != 0) {
			for(i in 1:labsd) {
				lab <- ifelse(is.character(names(object$sd)), names(object$od[order.sd[i]]), order.sd[i])
				text(object$sd[order.sd[i]], object$od[order.sd[i]]+par("cxy")[2], labels=lab)
			}	
		}	
	}
	else {
		order.sd <- order(object$sd*(-1))
		if(labsd != 0) {
			for(i in 1:labsd) {
				lab <- ifelse(is.character(names(object$sd)), names(object$sd[order.sd[i]]), order.sd[i])
				text(order.sd[i], object$sd[order.sd[i]]+par("cxy")[2], labels=lab)
			}	
		}	
	}
	return(invisible(object))
}

regpca<-function(x,cor=TRUE,loadings=TRUE,
SCORES=FALSE,pval=ncol(x),scree=TRUE,xlab="Principal Component",ylab="Proportion of Variance"){
#
# regular PCA, calls princomp
#
x<-elimna(x) # removes any rows having missing values
temp<-princomp(x,cor=cor,scores=TRUE,scale=scale)
if(!SCORES)temp<-summary(temp,loadings=loadings)
if(SCORES){
return(temp$scores)
}
if(scree){
z=temp$sdev
pv=z^2
cs=pv/sum(pv)
cm=cumsum(cs)
plot(rep(c(1:ncol(x)),2),c(cs,cm),type="n",xlab=xlab,ylab=ylab)
points(c(1:ncol(x)),cs,pch="*")
lines(c(1:ncol(x)),cs,lty=1)
points(c(1:ncol(x)),cm,pch=".")
lines(c(1:ncol(x)),cm,lty=2)
}
temp
}



chi.test.ind<-function(x){
#
# x is a matrix with r rows and c columns
# Goal: perform the chi-squared test of independence.
#
if(!is.matrix(x))stop("x should be a matrix")
n=sum(x)
phat=x/n
r=rowSums(x)
cc=colSums(x)
df=(nrow(x)-1)*(ncol(x)-1)
ex=outer(r,cc,"*")
val=sum(n*(x-ex/n)^2/ex)
pv=1-pchisq(val,df)
list(test.stat=val,p.value=pv)
}

anova_power<-function(groups=NULL,n=NULL,delta=NULL,sig.level=0.05,power=NULL){
#
# Determine sample sizes or power when using the ANOVA F test.
#
#  groups is the number of groups and must be specified.
#
#  delta is Cohen's effect size: the sum of the
#  squared devaitions among the means divided by the within group variance.
#
#  Excluding groups, all but one of the NULL arguments must be specified.
#  The function determines the value for the one argument that is NULL
#
#library(stats)
if(is.null(groups))stop("Need to specify the number of groups")
within.var=1
between.var=delta/(groups-1)
res=power.anova.test(groups=groups,n=n, between.var=between.var,
within.var=within.var,sig.level=sig.level,power=power)
list(groups=res[1]$groups,n=res[2]$n,delta=delta,
sig.level=res[5]$sig.level,power=res[6]$power)
}

 outpca<-function(x,cor=TRUE,loadings=TRUE,covlist=NULL,scree=TRUE,
SCORES=FALSE,ALL=TRUE,pval=NULL,cop=3,ADJ=FALSE,SEED=TRUE,pr=TRUE,STAND=FALSE,
xlab= "Principal Component", ylab = "Proportion of Variance",...){
#
# Remove outliers with outpro
# (using projection method)
# apply standard principle compoenents to remaining data
#
# ALL=T, when computing scores, all of the data are used, not just
# the data left after outliers are removed.
#
x<-elimna(x) # removes any rows having missing values
m<-ncol(x)
if(m>9){
if(pr)print("With more than 9 variables, might want to use ADJ=T")
}
if(!ADJ)flag<-outpro(x,cop=cop,STAND=STAND)$keep
if(ADJ)flag<-outproad(x,cop=cop,SEED=SEED,STAND=STAND)$results$keep
remx<-x
temp2<-princomp(remx)
x<-x[flag,]
loc<-apply(x,2,mean)
temp<-princomp(x,cor=cor,scores=TRUE,covlist=covlist)
if(scree){
z=temp$sdev
pv=z^2
cs=pv/sum(pv)
cm=cumsum(cs)
plot(rep(c(1:ncol(x)),2),c(cs,cm),type="n",xlab=xlab,ylab=ylab)
points(c(1:ncol(x)),cs,pch="*")
lines(c(1:ncol(x)),cs,lty=1)
points(c(1:ncol(x)),cm,pch=".")
lines(c(1:ncol(x)),cm,lty=2)
}
if(!SCORES)temp<-summary(temp,loadings=loadings)
if(SCORES){
if(is.null(pval))
stop("When computing scores, specify pval, number of components")
if (!ALL)temp<-temp$scores[,1:pval]
if(ALL){
temp<-summary(temp,loadings=T)
B<-temp[2]$loadings[1:m,1:m] # Use robust loadings
 z<-remx
for(i in 1:nrow(z))z[i,]<-z[i,]-loc
temp<-t(B)%*%t(z)
temp<-t(temp)
temp<-temp[,1:pval]
}}
temp
}

mcp2a<-function(J,K,x,est=mom,con=0,alpha=.05,nboot=NA,grp=NA,...){
#
# Do all pairwise comparisons of
# main effects for Factor A and B and all interactions
#
        #  The data are assumed to be stored in x
        #  in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
        JK <- J * K
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp)) {
                yy <- x
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
mvec<-NA
  tempn=0
        for(j in 1:JK) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)]
                mvec[j]<-est(x[[j]],...)
tempn[j]=length(x[[j]])
        }
nmax=max(tempn)
        #
        # Create the three contrast matrices
        #
        if(JK != length(x))
                warning("The number of groups does not match the number of contrast coefficients.")
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
bvec<-matrix(NA,nrow=JK,ncol=nboot)
print("Taking bootstrap samples. Please wait.")
for(j in 1:JK){
print(paste("Working on group ",j))
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # J by nboot matrix, jth row contains
#                          bootstrapped  estimates for jth group
}
outvec<-list()
temp3<-con2way(J,K)
for(jj in 1:3){
con<-temp3[[jj]]
con<-as.matrix(con)
ncon<-ncol(con)
# Determine critical values
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(nmax>80){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
test<-NA
bcon<-t(con)%*%bvec #ncon by nboot matrix
tvec<-t(con)%*%mvec
for (d in 1:ncon){
test[d]<-sum(bcon[d,]>0)/nboot
if(test[d]> .5)test[d]<-1-test[d]
}
output<-matrix(0,ncon,6)
dimnames(output)<-list(NULL,c("con.num","psihat","sig.test","sig.crit","ci.lower","ci.upper"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
if(sum(sigvec)<ncon){
dd<-ncon-sum(sigvec) #number that are sig.
ddd<-sum(sigvec)+1
zvec[ddd:ncon]<-dvec[ddd]
}
output[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tvec[ic,]
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(bcon[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
outvec[[jj]]<-output
}
list(FactorA=outvec[[1]],FactorB=outvec[[2]],Interactions=outvec[[3]],
conA=temp3[[1]],conB=temp3[[2]],conAB=temp3[[3]])
}


t1wayF<-function(x,fac,tr=.2,nboot=100,SEED=TRUE){
#
# Same a t1way, but now the data are assumed to be
# stored in a matrix or data frame where one of the columns contain
# the data to be analyzed and another column contains the group
# identification.
#
# For example, if dat is a data frame, with column 1 containing
# the outcome measures of interest, and column 2 is a factor variable
# indicating to  which group a value in column 1 belongs, then
#  t1wayF(dat[,1],dat[,2])
#  will test the hypothesis that all J groups have identical
#  trimmed means.
#
#  Missing values are automatically removed.
#
library(MASS)
if(SEED)set.seed(2)
x=fac2list(x,fac)
J<-length(x)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
pts=NULL
nval=0
for(j in 1:J)x[[j]]=elimna(x[[j]])
for(j in 1:J){
val<-x[[j]]
val<-elimna(val)
nval[j]=length(val)
pts=c(pts,val)
x[[j]]<-val # missing values have been removed
h[j]<-length(x[[j]])-2*floor(tr*length(x[[j]]))
   # h is the number of observations in the jth group after trimming.
w[j]<-h[j]*(h[j]-1)/((length(x[[j]])-1)*winvar(x[[j]],tr))
xbar[j]<-mean(x[[j]],tr)
}
u<-sum(w)
xtil<-sum(w*xbar)/u
A<-sum(w*(xbar-xtil)^2)/(J-1)
B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1)
TEST<-A/(B+1)
nu1<-J-1
nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1))
sig<-1-pf(TEST,nu1,nu2)
#
# Determine explanatory effect size
#
chkn=var(nval)
if(chkn==0){
top=var(xbar)
cterm=NULL
if(tr==0)cterm=1
if(tr==0.2)cterm=.642
if(is.null(cterm))cterm=area(dnormvar,qnorm(tr),qnorm(1-tr))+
2*(qnorm(tr)^2)*tr
bot=winvar(pts,tr=tr)/cterm
e.pow=top/bot
}
if(chkn!=0){
vals=0
N=min(nval)
xdat=list()
for(i in 1:nboot){
for(j in 1:J){
xdat[[j]]=sample(x[[j]],N)
vals[i]=t1way.effect(xdat,tr=tr)$Var.Explained
}}
e.pow=mean(vals,na.rm=TRUE)
}
list(TEST=TEST,nu1=nu1,nu2=nu2,siglevel=sig,Var.Explained=e.pow,
Effect.Size=sqrt(e.pow))
}




t1waybt<-function(x,tr=.2,grp=NA,nboot=599,SEED=TRUE){
#
#   Test the hypothesis of equal trimmed mdeans, corresponding to J independent
#   groups, using a percentile t bootstrap method.
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=599
#
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
if(!is.na(grp[1]))J=length(grp)
if(is.na(grp[1])){
J<-length(x)
grp<-c(1:J)
}
for(j in 1:J){
temp<-x[[j]]
x[[j]]<-temp[!is.na(temp)] # Remove any missing values.
}
bvec<-array(0,c(J,2,nboot))
hval<-vector("numeric",J)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
hval[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # hval is the number of observations in the jth group after trimming.
print(paste("Working on group ",grp[j]))
xcen<-x[[grp[j]]]-mean(x[[grp[j]]],tr)
data<-matrix(sample(xcen,size=length(x[[grp[j]]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row
#                     contains the bootstrap trimmed means, the second row
#                     contains the bootstrap squared standard errors.
}
m1<-bvec[,1,]  # J by nboot matrix containing the bootstrap trimmed means
m2<-bvec[,2,]  # J by nboot matrix containing the bootstrap sq standard errors
wvec<-1/m2  # J by nboot matrix of w values
uval<-apply(wvec,2,sum)  # Vector having length nboot
blob<-wvec*m1
xtil<-apply(blob,2,sum)/uval # nboot vector of xtil values
blob1<-matrix(0,J,nboot)
for (j in 1:J)blob1[j,]<-wvec[j,]*(m1[j,]-xtil)^2
avec<-apply(blob1,2,sum)/(length(x)-1)
blob2<-(1-wvec/uval)^2/(hval-1)
cvec<-apply(blob2,2,sum)
cvec<-2*(length(x)-2)*cvec/(length(x)^2-1)
testb<-avec/(cvec+1)
#            A vector of length nboot containing bootstrap test values
ct<-sum(is.na(testb))
if(ct>0){
print("Some bootstrap estimates of the test statistic could not be computed")
print("Effective number of bootstrap samples was")
print(sum(!is.na(testb)))
}
test<-t1way(x,tr=tr,grp=grp)
pval<-mean(test$TEST<=testb,na.rm=TRUE)
list(test=test$TEST,p.value=pval)
}

cidM<-function(x,nboot=1000,alpha=.05,MC=FALSE,SEED=TRUE,g=NULL,dp=NULL){
#
# Variation of Cliff method based on median of X-Y
# i.e., use p=P(X<Y) as effect size.
# test p=.5
# All pairwise comparisons performed.
# FWE controlled via Hochberg method.
# x can be a matrix (columns are groups) or have list mode
#
#   g=NULL, x is assumed to be a matrix or have list mode
#   if g is specifed, it is assumed that column g of x is
#   a factor variable and that the dependent variable of interest is in column
#   dp of x, which can be a matrix or data frame.
#
if(!is.null(g)){
if(is.null(dp))stop("Specify a value for dp, the column containing the data")
x=fac2list(x[,dp],x[,g])
}
if(SEED)set.seed(2)
if(MC)library(parallel)
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x=listm(x)
chk=tlist(x)
if(chk!=0)print("Warning: tied values detected")
J=length(x)
L=(J^2-J)/2
CC=L
pvec=NA
boot=list()
MAT=matrix(NA,nrow=nboot,ncol=L)
for(i in 1:nboot){
jcom=0
for (j in 1:J){
boot[[j]]=sample(x[[j]],size=length(x[[j]]),replace=TRUE)
}
MAT[i,]=wmwloc2(boot)
}
#
pvec=NA
test<-matrix(NA,CC,8)
dimnames(test)<-list(NULL,c("Group","Group","p-value","p.crit",
"P(X<Y)","P(X=Y)","P(X>Y)","p.hat"))
dvec<-alpha/c(1:CC)
for(j in 1:J){
for(k in 1:J){
if(j<k){
jcom=jcom+1
p.value=mean(MAT[,jcom]>0)+.5*mean(MAT[,jcom]==0)
pvec[jcom]=2*min(c(p.value,1-p.value))
if(is.na(pvec[jcom]))pvec=1
test[jcom,1]<-j
test[jcom,2]<-k
test[jcom,3]<-pvec[jcom]
test[jcom,5:7]<-cid(x[[j]],x[[k]])$summary.dvals
test[jcom,8]<-test[jcom,5]+.5*test[jcom,6]
}}}
temp2<-order(0-test[,3])
test[temp2,4]=dvec
list(test=test)
}

msmedse<-function(x){
#
# Compute  standard error of the median using method
# recommended by McKean and Shrader (1984).
#
x=elimna(x)
chk=sum(duplicated(x))
if(chk>0){
print("WARNING: tied values detected.")
print("Estimate of standard error might be highly inaccurate, even with n large")
}
y<-sort(x)
n<-length(x)
av<-round((n+1)/2-qnorm(.995)*sqrt(n/4))
if(av==0)av<-1
top<-n-av+1
sqse<-((y[top]-y[av])/(2*qnorm(.995)))^2
sqse<-sqrt(sqse)
sqse
}




t1waybtv2<-function(x,tr=.2,grp=NA,g=NULL,dp=NULL,nboot=599,SEED=TRUE){
#
#   Test the hypothesis of equal trimmed mdeans, corresponding to J independent
#   groups, using a percentile t bootstrap method.
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#   g=NULL, x is assumed to be a matrix or have list mode
#
#   if g is specifed, it is assumed that column g of x is
#   a factor variable and that the dependent variable of interest is in column
#   dp of x, which can be a matrix or data frame.
#
#   The default number of bootstrap samples is nboot=599
#
if(!is.null(g)){
if(is.null(dp))stop("Specify a value for dp, the column containing the data")
x=fac2list(x[,dp],x[,g])
}
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
if(is.na(grp[1]))grp<-c(1:length(x))
J<-length(grp)
nval=NA
x=lapply(x,elimna)
nval=lapply(x,length)
xbar=lapply(x,mean,tr=tr)
bvec<-array(0,c(J,2,nboot))
hval<-vector("numeric",J)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
hval[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # hval is the number of observations in the jth group after trimming.
print(paste("Working on group ",grp[j]))
xcen<-x[[grp[j]]]-mean(x[[grp[j]]],tr)
data<-matrix(sample(xcen,size=length(x[[grp[j]]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row
#                     contains the bootstrap trimmed means, the second row
#                     contains the bootstrap squared standard errors.
}
m1<-bvec[,1,]  # J by nboot matrix containing the bootstrap trimmed means
m2<-bvec[,2,]  # J by nboot matrix containing the bootstrap sq standard errors
wvec<-1/m2  # J by nboot matrix of w values
uval<-apply(wvec,2,sum)  # Vector having length nboot
blob<-wvec*m1
xtil<-apply(blob,2,sum)/uval # nboot vector of xtil values
blob1<-matrix(0,J,nboot)
for (j in 1:J)blob1[j,]<-wvec[j,]*(m1[j,]-xtil)^2
avec<-apply(blob1,2,sum)/(length(x)-1)
blob2<-(1-wvec/uval)^2/(hval-1)
cvec<-apply(blob2,2,sum)
cvec<-2*(length(x)-2)*cvec/(length(x)^2-1)
testb<-avec/(cvec+1)
#            A vector of length nboot containing bootstrap test values
ct<-sum(is.na(testb))
if(ct>0)print("Some bootstrap estimates of the test statistic could not be computed")
test<-t1way(x,tr=tr,grp=grp)
pval<-sum(test$TEST<=testb)/nboot
#
# Determine explanatory effect size
#
e.pow=t1wayv2(x)$Explanatory.Power
list(test=test$TEST,p.value=pval,Explanatory.Power=e.pow,
Effect.Size=sqrt(e.pow))
}




t2wayv2<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,g=NULL,dp=NULL,pr=T){
#  Perform a J by K  (two-way) anova on trimmed means where
#  all groups are independent.
#
#  The R variable data is assumed to contain the raw
#  data stored in list mode, or a matrix with columns
#  corresponding to groups. If stored in list mode, data[[1]] contains the data
#  for the first level of all three factors: level 1,1,.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second factor: level 1,2
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JK, the total number of
#  groups being tested.
#
#   g=NULL, x is assumed to be a matrix or have list mode
#
#   if g is specifed, it is assumed that column g of x is
#   a factor variable and that the dependent variable of interest is in column
#   dp of x, which can be a matrix or data frame.
#
if(!is.null(g[1])){
if(length(g)!=2)stop("Argument g should have two values")
if(is.null(dp[1]))
stop("Specify a value for dp, the column containing the data")
data=fac2list(data[,dp],data[,g])
}
if(is.matrix(data))data=listm(data)
if(!is.list(data))stop("Data are not stored in list mode")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
data[[grp[i]]]=elimna(data[[grp[i]]])
tmeans[i]<-mean(data[[grp[i]]],tr)
h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]]))
#    h is the effective sample size
v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1))
#    v contains the squared standard errors
}
v<-diag(v,p,p)   # Put squared standard errors in a diag matrix.
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
#  Do test for factor A
#cmat<-kron(cj,kron(ik,il))  # Contrast matrix for factor A
cmat<-kron(cj,ik)  # Contrast matrix for factor A
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
Qa<-johan(cmat,tmeans,v,h,alval[i])
if(Qa$teststat>Qa$crit)break
}
A.p.value=irem/1000
# Do test for factor B
cmat<-kron(ij,ck)  # Contrast matrix for factor B
for(i in 1:999){
irem<-i
Qb<-johan(cmat,tmeans,v,h,alval[i])
if(Qb$teststat>Qb$crit)break
}
B.p.value=irem/1000
# Do test for factor A by B interaction
cmat<-kron(cj,ck)  # Contrast matrix for factor A by B
for(i in 1:999){
irem<-i
Qab<-johan(cmat,tmeans,v,h,alval[i])
if(Qab$teststat>Qab$crit)break
}
AB.p.value=irem/1000
tmeans=matrix(tmeans,J,K,byrow=T)
list(Qa=Qa$teststat,A.p.value=A.p.value,
Qb=Qb$teststat,B.p.value=B.p.value,
Qab=Qab$teststat,AB.p.value=AB.p.value,means=tmeans)
}



lpindt<-function(x,y,nboot=500,xout=FALSE,outfun=out){
#
# Test the hypothesis of no association based on the fit obtained
# from lplot (Cleveland's LOESS)
#
m<-elimna(cbind(x,y))
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
x<-m[,1:p]
y<-m[,pp]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,pp]
}
n=length(y)
data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
val=NA
x=as.matrix(x)
for(i in 1:nboot){
val[i]=lplot(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)$Strength.Assoc
}
val=sort(val)
est=lplot(x,y,plotit=FALSE,pr=FALSE)$Strength.Assoc
p.value=mean((est<val))
list(Explanatory.power=est,p.value=p.value)
}
gamindt<-function(x,y,nboot=500,xout=FALSE,outfun=out){
#
# Test the hypothesis of no association based on the fit obtained with
# a generalized additive model
#
m<-elimna(cbind(x,y))
x<-as.matrix(x)
p<-ncol(x)
pp<-p+1
x<-m[,1:p]
y<-m[,pp]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,pp]
}
n=length(y)
data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
val=NA
x=as.matrix(x)
for(i in 1:nboot){
val[i]=gamplotv2(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)$Strength.Assoc
}
val=sort(val)
est=gamplotv2(x,y,plotit=FALSE)$Strength.Assoc
p.value=mean((est<val))
p.value
}
gamplotv2<-function(x,y,sop=FALSE,pyhat=FALSE,eout=FALSE,xout=FALSE,outfun=out,plotit=TRUE,
varfun=pbvar,xlab="X",ylab="",zlab="",theta=50,phi=25,expand=.5,SCALE=FALSE,
cor.fun=pbcor,ADJ=FALSE,nboot=20,pr=TRUE,SEED=TRUE,ticktype="simple"){
#
# Plot regression surface using generalized additive model
#
# sop=F, use lowess
# sop=T, use splines
#
if(ADJ){
if(SEED)set.seed(2)
}
if(pr){
if(!ADJ){
print("To get adjusted estimates of strength of association, use ADJ=T")
print("The strength of association is estimated under independence")
print(" and then rescaled")
}}
library(akima)
library(mgcv)
x<-as.matrix(x)
np<-ncol(x)
np1<-np+1
if(ncol(x)>4)stop("x should have at most four columns of data")
m<-elimna(cbind(x,y))
if(xout && eout)stop("Can't have xout=eout=T")
if(eout){
flag<-outfun(m)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
}
x<-m[,1:np]
x=as.matrix(x)
y<-m[,np1]
if(!sop){
if(ncol(x)==1)fitr<-fitted(gam(y~x[,1]))
if(ncol(x)==2)fitr<-fitted(gam(y~x[,1]+x[,2]))
if(ncol(x)==3)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]))
if(ncol(x)==4)fitr<-fitted(gam(y~x[,1]+x[,2]+x[,3]+x[,4]))
}
if(sop){
if(ncol(x)==1)fitr<-fitted(gam(y~s(x[,1])))
if(ncol(x)==2)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])))
if(ncol(x)==3)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])))
if(ncol(x)==4)fitr<-fitted(gam(y~s(x[,1])+s(x[,2])+s(x[,3])+s(x[,4])))
}
last<-fitr
if(plotit){
if(ncol(x)==1){
plot(x,fitr,xlab=xlab,ylab=ylab)
}
if(ncol(x)==2){
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the S-PLUS function interp
mkeep<-x[iout>=1,]
fitr<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fitr,theta=theta,phi=phi,expand=expand,xlab="x1",ylab="x2",zlab="",
scale=scale,ticktype=ticktype)
}
}
top=varfun(last)
ep=top/varfun(y)
if(ep>=1)ep=cor.fun(last,y)$cor^2
eta=sqrt(ep)
st.adj=NULL
e.adj=NULL
if(ADJ){
x=as.matrix(x)
val=NA
n=length(y)
data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(i in 1:nboot){
temp=gamplotv2.sub(x[data1[i,],],y[data2[i,]],plotit=FALSE)
val[i]=temp$Explanatory.power
}
vindt=median(val)
v2indt=median(sqrt(val))
st.adj=(sqrt(ep)-max(c(0,v2indt)))/(1-max(c(0,v2indt)))
e.adj=(ep-max(c(0,vindt)))/(1-max(c(0,vindt)))
st.adj=max(c(0,st.adj))
e.adj=max(c(0,e.adj))
}
eta=as.matrix(eta)
ep=as.matrix(ep)
dimnames(eta)=NULL
dimnames(ep)=NULL
eta=eta[1]
ep=ep[1]
list(Strength.Assoc=eta,Explanatory.power=ep,
Strength.Adj=st.adj,Explanatory.Adj=e.adj)
}

cidmul<-function(x,alpha=.05,g=NULL,dp=NULL,pr=TRUE){
#
#  Perform Cliff's method for all pairs of J independent groups.
#  Unlike the function meemul, ties are allowed.
#  The familywise type I error probability is controlled by using
#  a critical value from the Studentized maximum modulus distribution.
#
#  The data are assumed to be stored in $x$ in list mode.
#  Length(x) is assumed to correspond to the total number of groups, J.
#  It is assumed all groups are independent.
#
#  Missing values are automatically removed.
#
#  The default value for alpha is .05. Any other value results in using
#  alpha=.01.
#
if(pr)print('cidmulv2 might provide better power')
if(!is.null(g)){
if(is.null(dp))stop("Specify a value for dp, the column containing the data")
x=fac2list(x[,dp],x[,g])
}
if(is.matrix(x) || is.data.frame(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
J<-length(x)
CC<-(J^2-J)/2
test<-matrix(NA,CC,7)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
}
dimnames(test)<-list(NULL,c("Group","Group","d","ci.lower","ci.upper",
"p.hat","p-value"))
jcom<-0
crit<-smmcrit(200,CC)
if(alpha!=.05)crit<-smmcrit01(200,CC)
alpha<-1-pnorm(crit)
n=matl(lapply(x,length))
for (j in 1:J){
for (k in 1:J){
if (j < k){
temp<-cid(x[[j]],x[[k]],alpha,plotit=FALSE)
temp2<-cidv2(x[[j]],x[[k]],alpha,plotit=FALSE)
jcom<-jcom+1
test[jcom,1]<-j
test[jcom,2]<-k
test[jcom,3]<-temp$d
test[jcom,4]<-temp$cl
test[jcom,5]<-temp$cu
test[jcom,6]<-temp$phat
test[jcom,7]<-temp2$p.value
}}}
list(n=n,test=test)
}

 cidmulv2<-function(x,alpha=.05,g=NULL,dp=NULL,CI.FWE=F){
#
#  Perform Cliff's method for all pairs of J independent groups.
#  The familywise type I error probability is controlled via
#  Hochberg's method.
#
#  The data are assumed to be stored in $x$ in list mode or in a
#  matrix with J columns, columns corresponding to groups.
#
#  It is assumed all groups are independent.
#
#  Missing values are automatically removed.
#
#   g=NULL, x is assumed to be a matrix or have list mode
#   if g is specified, it is assumed that column g of x is
#   a factor variable and that the dependent variable of interest is in column
#   dp of x, which can be a matrix or data frame.
#
if(!is.null(g)){
if(is.null(dp))stop("Specify a value for dp, the column containing the data")
x=fac2list(x[,dp],x[,g])
}
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
J<-length(x)
CC<-(J^2-J)/2
test<-matrix(NA,CC,7)
c.sum=matrix(NA,CC,5)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx]  # Remove missing values
}
dimnames(test)<-list(NULL,c("Group","Group","p.hat","p.ci.lower",
"p.ci.uppper","p-value","p.crit"))
dvec<-alpha/c(1:CC)
dimnames(c.sum)<-list(NULL,c("Group","Group","P(X<Y)","P(X=Y)","P(X>Y)"))
jcom<-0
n=matl(lapply(x,length))
for (j in 1:J){
for (k in 1:J){
if (j < k){
temp<-cidv2(x[[j]],x[[k]],alpha,plotit=FALSE)
jcom<-jcom+1
test[jcom,1]<-j
test[jcom,2]<-k
c.sum[jcom,1]<-j
c.sum[jcom,2]<-k
c.sum[jcom,3:5]=cid(x[[j]],x[[k]])$summary.dvals
test[jcom,3]<-temp$p.hat
test[jcom,4]<-temp$p.ci[1]
test[jcom,5]<-temp$p.ci[2]
test[jcom,6]<-temp$p.value
}}}
temp2<-order(0-test[,6])
test[temp2,7]=dvec
if(CI.FWE){
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
jcom<-jcom+1
temp<-cidv2(x[[j]],x[[k]],alpha=test[jcom,7],plotit=FALSE)
test[jcom,4]<-temp$p.ci[1]
test[jcom,5]<-temp$p.ci[2]
}}}}
list(n=n,test=test,summary.dvals=c.sum)
}
fac2list<-function(x,g){
#
# data are stored in x
# information about the level of the value in x is stored in g,
# which can be a matrix with up to 4 columns
#
# sort the data in x into groups based on values in g.
# store results in list mode.
#
#  Example: fac2list(m[,2],m[,4]) would sort the values
#  in column 2 of m according to the values in column 4 of m
#
g=as.data.frame(g)
L=ncol(g)
g=listm(g)
for(j in 1:L)g[[j]]=as.factor(g[[j]])
g=matl(g)
Lp1=L+1
if(L>4)stop("Can have at most 4 factors")
if(L==1){
res=selby(cbind(x,g),2,1)
group.id=res$grpn
res=res$x
}
if(L>1){
res=selby2(cbind(x,g),c(2:Lp1),1)
group.id=res$grpn
res=res$x
}
print("Group Levels:")
print(group.id)
res
}

MMreg<-function(x,y,RES=FALSE,xout=FALSE,outfun=outpro,STAND=FALSE,varfun=pbvar,corfun=pbcor,...){
#
#  Compute MM regression estimate derived by Yohai (1987)
#  simply by calling the R function lmrob
#  This function will remove leverage points when
#  xout=T
#  using the outlier detection method indicated by
#  outfun, which defaults to the projection method.
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
if(!STAND)flag<-outfun(x,...)$keep
if(STAND)flag<-outpro(x,STAND=TRUE,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
library(robustbase)
temp=lmrob(y~x)
coef=temp$coefficients
p1=ncol(x)+1
res<-y-x%*%coef[2:p1]-coef[1]
yhat<-y-res
stre=NULL
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
e.pow=as.numeric(e.pow)
stre=sqrt(e.pow)
}
if(!RES)res=NULL
list(coef=coef,residuals=res,Strength.Assoc=stre)
}

ks<-function(x,y,w=FALSE,sig=TRUE,alpha=.05){
#  Compute the Kolmogorov-Smirnov test statistic
#
#  w=T computes the weighted version instead.
#
#  sig=T indicates that the exact significance level is to be computed.
#  If there are ties, the reported significance level is exact when
#  using the unweighted test, but for the weighted test the reported
#  level is too high.
#
#  This function uses the functions ecdf, kstiesig, kssig and kswsig
#  that are stored in the file ch5fun.sp that comes with this book.
#
#  This function returns the value of the test statistic, the approximate .05
#  critical value, and the exact significance level if sig=T.
#
#  Missing values are automatically removed
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
w<-as.logical(w)
sig<-as.logical(sig)
tie<-logical(1)
siglevel<-NA
z<-sort(c(x,y))  # Pool and sort the observations
tie=duplicated(z)
v<-1   # Initializes v
for (i in 1:length(z))v[i]<-abs(ecdf(x,z[i])-ecdf(y,z[i]))
ks<-max(v)
#
#crit<-1.36*sqrt((length(x)+length(y))/(length(x)*length(y))) # Approximate
#                                                       .05 critical value
crit=sqrt(0-log(alpha/2)*(length(x)+length(y))/(2*length(x)*length(y)))
if(!w && sig && !tie)siglevel<-kssig(length(x),length(y),ks)
if(!w && sig && tie)siglevel<-kstiesig(x,y,ks)
if(w){
crit<-(max(length(x),length(y))-5)*.48/95+2.58+abs(length(x)-length(y))*.44/95
if(length(x)>100 || length(y)>100)warning(paste("When either sample size is
greater than 100, the approximate critical value can be inaccurate. It is
recommended that the exact significance level be computed."))
for (i in 1:length(z)){
temp<-(length(x)*ecdf(x,z[i])+length(y)*ecdf(y,z[i]))/length(z)
temp<-temp*(1.-temp)
v[i]<-v[i]/sqrt(temp)
}
v<-v[!is.na(v)]
ks<-max(v)*sqrt(length(x)*length(y)/length(z))
if(sig)siglevel<-kswsig(length(x),length(y),ks)
if(tie && sig)
warning(paste("Ties were detected. The reported significance level of the
weighted Kolmogorov-Smirnov test statistic is not exact."))
}
list(test=ks,critval=crit,p.value=siglevel)
}

bbw2list<-function(x,grp.col,lev.col,pr=T){
#
#  for a between-by-between-by-within design
#  grp.col indicates the columns where values of the  levels of between factor
#  are stored.
#  lev.col indicates the columns where repeated measures are contained.
#  If, for example, there are data for three times, stored in columns
#  6, 8 and 11, set
#  lev.col=c(6,8,11)
#
#  Example:  Have a 3 x 4 x 2 design
#  values in columns 2 and 4 indicate the
#  levels of the two between factors.
#  column 3 contains time 1 data,
#  column 7 contains time 2 data
#  bbw2list(x,(c(2,4),c(3,7)) will store data in list mode that can be
#  used by bbwtrim and related functions
#
res=selbybbw(x,grp.col,lev.col,pr=pr)
res
}


selbybbw<-function(m,grpc,coln,pr=T){
#
#  For a between by-between-by-within design,
#  a commmon situation is to have data stored in an n by p matrix where
#  two  column indicate a  group identification numbers (levels)
#  for the between factors,
#  and two or more other columns contain  the within group results.
#
#  This function is used by bbw2list to store the data in list mode so
#  that the R function bbwtrim can  be use.
#
#  m is a matrix containing the data. One column contains group
#  identification values
#  and two or more other columns contain repeated measures.
#
#  This function groups  all values in the columns
#  indicated by  coln according to the
#  group numbers in column grpc and stores the  results in list mode.
#
#  So if grpc[1] has J values, grpc[2] has K values,
#  and coln indicates L columns,
#  this function returns the data stored in list mode have length JKL
#
#  Example:
#  y<-selbybbw(blob,c(2,3),c(7,9,11))$x
#  will look for group numbers in col 2 and  3 of the matrix blob,
#   which indicate levels for the between factors,
#  and it assumes that times 1, 2 and 3 are stored in col 7, 9, and 11.
#
#  Result:  the data will now be stored in y having list mode.
#
#if(!is.matrix(m))stop("Data must be stored in a matrix")
if(is.na(grpc[1]))stop("The argument grpc is not specified")
if(is.na(coln[1]))stop("The argument coln is not specified")
if(length(grpc)!=2)stop("The argument grpc must have length 2")
mm=m
m<-as.data.frame(elimna(mm))
x<-list()
grp1<-sort(unique(m[,grpc[1]]))
grp2<-sort(unique(m[,grpc[2]]))
if(pr){
print("Levels for first factor:")
print(grp1)
print("Levels for second factor:")
print(grp2)
}
J<-length(grp1)
K<-length(grp2)
L<-length(coln)
JKL<-J*K*L
itt<-0
it=0
mm=as.matrix(m[,coln])
gmat=matrix(NA,ncol=2,nrow=J*K)
for (ig1 in 1:length(grp1)){
for (ig2 in 1:length(grp2)){
itt=itt+1
gmat[itt,]=c(grp1[ig1],grp2[ig2])
for (ic in 1:length(coln)){
it<-it+1
flag<-(m[,grpc[1]]==grp1[ig1])*(m[,grpc[2]]==grp2[ig2])
flag=as.logical(flag)
x[[it]]<-as.numeric(mm[flag,ic])
}}}
x
}

selbybw<-function(m,grpc,coln){
#
#  For a between by within design,
#  a commmon situation is to have data stored in an n by p matrix where
#  a column is a  group identification number
#  and the remaining columns are the within group results.
#
#  m is a matrix containing the data. One column contains group
#  identification values
#  and two or more other columns contain repeated measures.
#
#  This function groups  all values in the columns
#  indicated by  coln according to the
#  group numbers in column grpc and stores the  results in list mode.
#
#  So if grpc has J values, and coln indicates K columns,
#  this function returns the data stored in list mode have length JK
#
#  Example: y<-selbybw(blob,3,c(4,6,7))$x
#  will look for group numbers in col 3 of the matrix blob,
#  and it assumes within
#  group data are stored in col 4, 6 and 7.
#  Result:  the data will now be stored in y having list mode
#

#if(!is.matrix(m))stop("Data must be stored in a matrix")
if(is.na(grpc[1]))stop("The argument grpc is not specified")
if(is.na(coln[1]))stop("The argument coln is not specified")
if(length(grpc)!=1)stop("The argument grpc must have length 1")
x<-list()
m=m[,c(grpc,coln)]
m<-as.data.frame(elimna(m))
grpn<-sort(unique(m[,1]))
J<-length(grpn)
K<-length(coln)
JK<-J*K
it<-0
mm=as.data.frame(m[,2:ncol(m)])
for (ig in 1:length(grpn)){
for (ic in 1:length(coln)){
it<-it+1
flag<-(m[,1]==grpn[ig])
x[[it]]<-as.numeric(mm[flag,ic])
}}
list(x=x,grpn=grpn)
}

bw2list<-function(x,grp.col,lev.col,pr=T){
#
#  for a between by within design
#  grp.col is column indicating levels of between factor.
#  lev.col indicates the columns where repeated measures are contained
#
#  Example:  column 2 contains information on levels of between factor
#  have a 3 by 2 design, column 3 contains time 1 data,
#  column 7 contains time 2
#  bw2list(x,2,c(3,7)) will store data in list mode that can be
#  used by rmanova and related functions
#
res=selbybw(x,grp.col,lev.col)
if(pr){
print("Levels for between factor:")
print(unique(x[,grp.col]))
}
res$x
}


rmc2list<-function(x,grp.col,lev.col,pr=T){
#
#  for a between by within design
#  grp.col is column indicating levels of between factor.
#  lev.col indicates the columns where repeated measures are contained
#
#  Example:  column 2 contains information on levels of between factor
#  have a 3 by 2 design, column 3 contains time 1 data,
#  column 7 contains time 2
#  rmc2list(x,2,c(3,7)) will store data in list mode that can be
#  bw2list(x,2,c(3,7)) also can be used.
#  used by rmanova and related functions
#
res=selbybw(x,grp.col,lev.col)
if(pr){
print("Levels for between factor:")
print(unique(x[,grp.col]))
}
res$x
}


wlogregci<-function(x,y,nboot=400,alpha=.05,SEED=TRUE,MC=FALSE,
xlab="Predictor 1",ylab="Predictor 2",xout=FALSE,outfun=out,...){
#
#   Compute a  confidence interval for each of the parameters of
#   a log linear model based on a robust estimator
#
#   The predictor values are assumed to be in the n by p matrix x.
#

if(MC)library(parallel)
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
n=length(y)
data<-matrix(sample(n,size=length(y)*nboot,replace=TRUE),nrow=n,ncol=nboot)
data=listm(data)
data<-matrix(sample(n,size=length(y)*nboot,replace=TRUE),nrow=n,ncol=nboot)
n=length(y)
data<-matrix(sample(n,size=length(y)*nboot,replace=TRUE),nrow=n,ncol=nboot)
data=listm(data)
if(MC)bvec<-mclapply(data,wlogreg.sub,x,y,mc.preschedule=TRUE)
if(!MC)bvec<-lapply(data,wlogreg.sub,x,y)
bvec=matl(bvec)
#
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
x=as.matrix(x)
p1<-ncol(x)+1
regci<-matrix(0,p1,3)
VAL<-c("intercept",rep("X",ncol(x)))
dimnames(regci)<-list(VAL,c("Est.","ci.low","ci.up"))
se<-NA
sig.level<-NA
for(i in 1:p1){
bna=elimna(bvec[i,])
nbn=length(bna)
ilow<-round((alpha/2) * nbn)
ihi<-nbn - ilow
ilow<-ilow+1
temp<-mean(bna<0)
sig.level[i]<-2*(min(temp,1-temp))
bna<-sort(bna)
regci[i,2]<-bna[ilow]
regci[i,3]<-bna[ihi]
se[i]<-sqrt(var(elimna(bvec[i,])))
}
regci[,1]=wlogreg(x,y)$coef
list(conf.interval=regci,p.values=sig.level,se=se)
}
wlogreg.sub<-function(data,x,y){
x=as.matrix(x)
vals=wlogreg(x[data,],y[data])$coef
}




logreg.plot<-function(x,y,MLE=FALSE,ROB=TRUE,xlab="X",ylab="P(X)"){
#
# For one predictor, plot logistic regression line
#
#  if x is a matrix with more than one column, plot is  based on data in
#  in column 1.
#
#  MLE=T, will plot usual maximum likelihood estimate using a solid line
#  ROB=T, will plot robust estimate, which is indicated by a
#  dashed line.
#
if(is.matrix(x))x=x[,1]
xord=order(x)
xx=x[xord]
yy=y[xord]
est1=logreg(xx,yy)[1:2,1]
plot(xx,yy,xlab=xlab,ylab=ylab)
phat=exp(est1[1]+est1[2]*xx)/(1+exp(est1[1]+est1[2]*xx))
if(MLE)lines(xx,phat)
if(ROB){
est2=wlogreg(xx,yy)$coef[1:2]
phat2=exp(est2[1]+est2[2]*xx)/(1+exp(est2[1]+est2[2]*xx))
lines(xx,phat2,lty=2)
}
}


medpb2<-function(x,y,alpha=.05,nboot=2000,SEED=TRUE){
#
#   Compare 2 independent groups using medians.
#
#   A percentile bootstrap method is used, which performs well when
#   there are tied values.
#
#   The data are assumed to be stored in x and y
#
#   Missing values are automatically removed.
#
x=elimna(x)
y=elimna(y)
xx<-list()
xx[[1]]<-x
xx[[2]]<-y
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
est1=median(xx[[1]])
est2=median(xx[[2]])
est.dif<-median(xx[[1]])-median(xx[[2]])
crit<-alpha/2
temp<-round(crit*nboot)
icl<-temp+1
icu<-nboot-temp
bvec<-matrix(NA,nrow=2,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
for(j in 1:2){
data<-matrix(sample(xx[[j]],size=length(xx[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,median) # Bootstrapped medians for jth group
}
top<-bvec[1,]-bvec[2,]
test<-sum(top<0)/nboot+.5*sum(top==0)/nboot
if(test > .5)test<-1-test
top<-sort(top)
ci<-NA
ci[1]<-top[icl]
ci[2]<-top[icu]
list(n1=length(x),n2=length(y),p.value=2*test,ci=ci,est1=est1,est2=est2,
est.dif=est.dif)
}
m2ci<-function(x,y,alpha=.05,nboot=1000,bend=1.28,os=F){
#
#   Compute a bootstrap, .95 confidence interval for the
#   the difference between two independent
#   M-estimator of location based on Huber's Psi.
#   The default percentage bend is bend=1.28
#   The default number of bootstrap samples is nboot=399
#
#   By default, the fully iterated M-estimator is used. To use the
#   one-step M-estimator instead, set os=T
#
os<-as.logical(os)
x<-x[!is.na(x)] # Remove any missing values in x
y<-y[!is.na(y)] # Remove any missing values in y
if(length(x)<=19 || length(y)<=19)
warning(paste("The number of observations in at least one group
is less than 20. This function might fail due to division by zero,
which in turn causes an error in function hpsi having to do with
a missing value."))
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(y)*nboot,replace=TRUE),nrow=nboot)
if(!os){
bvecx<-apply(datax,1,mest,bend)
bvecy<-apply(datay,1,mest,bend)
}
if(os){
bvecx<-apply(datax,1,onestep,bend)
bvecy<-apply(datay,1,onestep,bend)
}
bvec<-sort(bvecx-bvecy)
test<-sum(bvec<0)/nboot+.5*sum(bvec==0)/nboot
pv=2*min(c(test,1-test))
low<-round((alpha/2)*nboot)
up<-round((1-alpha/2)*nboot)
se<-sqrt(var(bvec))
list(ci=c(bvec[low],bvec[up]),se=se,p.value=pv)
}

qsplit<-function(x,y,split.val=NULL){
#
# x assumed to be a matrix or dataframe
#
#  IF split.val=NULL,
#
# split the data in x into 3 groups:
# those for which y <= lower quartile
# those between lower and upper quartile
# those >= upper quartile
#
# IF split.val CONTAINS TWO VALUES, SPLIT THE DATA ACCORDING TO
# THE VALUES SPECIFIED.
#
if(!is.data.frame(x))x=as.matrix(x)
if(is.null(split.val)){
v=idealf(y)
flag1=(y<=v$ql)
flag2=(y>=v$qu)
}
if(!is.null(split.val)){
flag1=(y<=split.val[1])
flag2=(y>=split.val[2])
}
flag3=as.logical(as.numeric(!flag1)*as.numeric(!flag2))
d1=x[flag1,]
d2=x[flag2,]
d3=x[flag3,]
list(lower=d1,middle=d3,upper=d2)
}
cohen2xi<-function(delta){
xi=sqrt((2*delta^2)/(4+delta^2))
xi
}
xi2cohen<-function(xi){
delta=sqrt((4*xi^2)/(2-xi^2))
delta
}
cid<-function(x,y,alpha=.05,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab=""){
#
#  Compute a confidence interval for delta using the method in
#  Cliff, 1996, p. 140, eq 5.12
#
#  The null hypothesis is that for two independent group, P(X<Y)=P(X>Y).
#  This function reports a 1-alpha confidence interval for
#  P(X>Y)-P(X<Y)
#
#  plotit=TRUE causes a plot of the difference scores to be created
#  pop=0 adaptive kernel density estimate
#  pop=1 results in the expected frequency curve.
#  pop=2 kernel density estimate (Rosenblatt's shifted histogram)
#  pop=3 boxplot
#  pop=4 stem-and-leaf
#  pop=5 histogram
#  pop=6 S+ kernel density estimate
#
x<-x[!is.na(x)]
y<-y[!is.na(y)]
m<-outer(x,y,FUN="-")
msave<-m
m<-sign(m)
d<-mean(m)
phat<-(1-d)/2
flag=T
if(phat==0 || phat==1)flag=F
q0<-sum(msave==0)/length(msave)
qxly<-sum(msave<0)/length(msave)
qxgy<-sum(msave>0)/length(msave)
c.sum<-matrix(c(qxly,q0,qxgy),nrow=1,ncol=3)
dimnames(c.sum)<-list(NULL,c("P(X<Y)","P(X=Y)","P(X>Y)"))
if(flag){
sigdih<-sum((m-d)^2)/(length(x)*length(y)-1)
di<-NA
for (i in 1:length(x))di[i]<-sum(x[i]>y)/length(y)-sum(x[i]<y)/length(y)
dh<-NA
for (i in 1:length(y))dh[i]<-sum(y[i]>x)/length(x)-sum(y[i]<x)/length(x)
sdi<-var(di)
sdh<-var(dh)
sh<-((length(y)-1)*sdi+(length(x)-1)*sdh+sigdih)/(length(x)*length(y))
zv<-qnorm(alpha/2)
cu<-(d-d^3-zv*sqrt(sh)*sqrt((1-d^2)^2+zv^2*sh))/(1-d^2+zv^2*sh)
cl<-(d-d^3+zv*sqrt(sh)*sqrt((1-d^2)^2+zv^2*sh))/(1-d^2+zv^2*sh)
}
if(!flag){
sh=NULL
nm=max(c(length(x),length(y)))
if(phat==1)bci=binomci(nm,nm,alpha=alpha)
if(phat==0)bci=binomci(0,nm,alpha=alpha)
}
if(plotit){
if(pop==1 || pop==0){
if(length(x)*length(y)>2500){
print("Product of sample sizes exceeds 2500.")
print("Execution time might be high when using pop=0 or 1")
print("If this is case, might consider changing the argument pop")
#print("pop=2 might be better")
}}
if(pop==0)akerd(as.vector(msave),xlab=xlab,ylab=ylab)
if(pop==1)rdplot(as.vector(msave),fr=fr,xlab=xlab,ylab=ylab)
if(pop==2)kdplot(as.vector(msave),rval=rval,xlab=xlab,ylab=ylab)
if(pop==3)boxplot(as.vector(msave))
if(pop==4)stem(as.vector(msave))
if(pop==5)hist(as.vector(msave),xlab=xlab)
if(pop==6)skerd(as.vector(msave))
}
if(flag)pci=c((1-cu)/2,(1-cl)/2)
if(!flag){
pci=bci$ci
cl=1-2*pci[2]
cu=1-2*pci[1]
}
list(n1=length(x),n2=length(y),cl=cl,cu=cu,d=d,sqse.d=sh,phat=phat,summary.dvals=c.sum,ci.p=pci)
}
cidv2<-function(x,y,alpha=.05,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab=""){
#
#   p-value for Cliff's analog of WMW test
#
nullval<-0
ci<-cid(x,y,alpha=alpha,plotit=plotit,pop=pop,fr=fr,rval=rval)
alph<-c(1:99)/100
for(i in 1:99){
irem<-i
chkit<-cid(x,y,alpha=alph[i],plotit=FALSE)
if(chkit[[3]]>nullval || chkit[[4]]<nullval)break
}
p.value<-irem/100
if(p.value<=.1){
iup<-(irem+1)/100
alph<-seq(.001,iup,.001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-cid(x,y,alpha=alph[i],plotit=FALSE,xlab=xlab,ylab=ylab)
if(chkit[[3]]>nullval || chkit[[4]]<nullval)break
}}
if(p.value<=.001){
alph<-seq(.0001,.001,.0001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-cid(x,y,alpha=alph[i],plotit=FALSE)
if(chkit[[3]]>nullval || chkit[[4]]<nullval)break
}}
phat<-(1-ci$d)/2
pci=c((1-ci$cu)/2,(1-ci$cl)/2)
d.ci=c(ci$cl,ci$cu)
dval=cid(x,y)$summary.dvals
list(n1=length(elimna(x)),n2=length(elimna(y)),d.hat=ci$d,d.ci=d.ci,p.value=p.value,p.hat=phat,p.ci=pci,summary.dvals=dval)
}



bmp<-function(x,y,alpha=.05,crit=NA,plotit=FALSE,pop=0,fr=.8,xlab="",ylab=""){
#
# Brunner and Munzel (2000) heteroscedastic analog of WMW test.
#
# plotit=TRUE causes a plot of the difference scores to be created
x<-x[!is.na(x)]  # Remove any missing values
y<-y[!is.na(y)]
n1<-length(x)
n2<-length(y)
N<-n1+n2
n1p1<-n1+1
flag1<-c(1:n1)
flag2<-c(n1p1:N)
R<-rank(c(x,y))
R1<-mean(R[flag1])
R2<-mean(R[flag2])
Rg1<-rank(x)
Rg2<-rank(y)
S1sq<-sum((R[flag1]-Rg1-R1+(n1+1)/2)^2)/(n1-1)
S2sq<-sum((R[flag2]-Rg2-R2+(n2+1)/2)^2)/(n2-1)
sig1<-S1sq/n2^2
sig2<-S2sq/n1^2
se<-sqrt(N)*sqrt(N*(sig1/n1+sig2/n2))
bmtest<-(R2-R1)/se
phat<-(R2-(n2+1)/2)/n1
dhat<-1-2*phat
df<-(S1sq/n2 + S2sq/n1)^2/((S1sq/n2)^2/(n1-1)+(S2sq/n1)^2/(n2-1))
sig<-2 * (1 - pt(abs(bmtest),df))
if(is.na(crit))vv<-qt(alpha/2,df)
if(!is.na(crit))vv<-crit
ci.p<-c(phat+vv*se/N,phat-vv*se/N)
if(plotit){
msave<-outer(x,y,FUN="-")
if(pop==0){
if(length(x)*length(y)>2500){
print("Product of sample sizes exceeds 2500.")
print("Execution time might be high when plotting and when using pop=1")
print("If this is case, might consider changing the argument pop or using plotit=F")
}
akerd(as.vector(msave),fr=fr)
}
if(pop==1)rdplot(as.vector(msave),fr=fr,xlab=xlab,ylab=ylab)
if(pop==2)kdplot(as.vector(msave),rval=rval,xlab=xlab,ylab=ylab)
if(pop==3)boxplot(as.vector(msave))
if(pop==4)stem(as.vector(msave))
if(pop==5)hist(as.vector(msave))
if(pop==6)skerd(as.vector(msave),xlab=xlab,ylab=ylab)
}
list(test.stat=bmtest,phat=phat,dhat=dhat,sig.level=sig,ci.p=ci.p,df=df)
}

adjboxout<-function(x){
#
# determine outliers using adjusted boxplot rule based on the
# medcouple
#
x=elimna(x)
n=length(x)
MC=mcskew(x)
val=idealf(x)
iqr=val$qu-val$ql
if(MC>=0){
bot=val$ql-1.5*exp(0-4*MC)*iqr
top=val$qu+1.5*exp(3*MC)*iqr
}
if(MC<0){
bot=val$ql-1.5*exp(0-3*MC)*iqr
top=val$qu+1.5*exp(4*MC)*iqr
}
flag=rep(F,length(x))
fl=(x<bot)
fu=(x>top)
flag[fl]=T
flag[fu]=T
vec<-c(1:n)
#if(sum(flag)==0)
outid<-NULL
if(sum(flag)>0)outid<-vec[flag]
keep<-vec[!flag]
outval<-x[flag]
keep=x[!flag]
list(out.val=outval,out.id=outid,keep=keep,cl=bot,cu=top)
}

Mreglde.sub<-function(x,B){
n=x[1]
ncx=x[2]
ncy=x[3]
nxx=n*ncx
nyy=n*ncy
ncx1=ncx+1
B=matrix(B,nrow=ncx1,ncol=ncy)
iu=nxx+3
xm=matrix(x[4:iu],ncol=ncx)
il=iu+1
ym=matrix(x[il:length(x)],ncol=ncy)
ainit=B[1:ncy]
il=ncy+1
Binit=matrix(B[il:length(B)],nrow=ncx,ncol=ncy)
yhat=matrix(0,nrow=n,ncol=ncy)
for(i in 1:n){
z=as.matrix(xm[i,])
yhat[i,]=t(Binit)%*%z
}
yhat=t(t(yhat)+ainit)
res=ym-yhat
res=sum(sqrt(apply(res^2,1,sum)))
res
}

pbtrmcp<-function(x,alpha=.05,nboot=NA,grp=NA,con=0,bhop=FALSE,tr=.2,SEED=TRUE){
#
#   Multiple comparisons for  J independent groups based on trimmed means.
#   using a percentile bootstrap method
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#

#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
tempn<-0
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
mvec[j]<-tmean(temp,tr=tr)
}
nmax=max(tempn)
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.")
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.025,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.005,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvec[1]<-alpha/2
}
dvec<-2*dvec
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-matrix(NA,nrow=J,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
for(j in 1:J){
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,tmean,tr=tr) # Bootstrapped values for jth group
}
test<-NA
bcon<-t(con)%*%bvec #ncon by nboot matrix
tvec<-t(con)%*%mvec
for (d in 1:ncon){
test[d]<-sum(bcon[d,]>0)/nboot
if(test[d]> .5)test[d]<-1-test[d]
}
test<-2*test
output<-matrix(0,ncon,6)
dimnames(output)<-list(NULL,c("con.num","psihat","sig.test","sig.crit","ci.lower","ci.upper"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tvec[ic,]
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(bcon[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}
mcp3atm<-function(J,K,L, x,tr=.2,con=0,alpha=.05,grp=NA,op=FALSE,pr=TRUE){
#
# Do all pairwise comparisons of
# main effects for Factor A and B and C and all interactions
# based on trimmed means
#
        #   The data are assumed to be stored in x in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
if(is.data.frame(x))x=as.matrix(x)
        JKL <- J*K*L
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                x<-list()
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JKL) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)] # Remove missing values
        }
        #

        if(JKL != length(x))
                warning("The number of groups does not match the number of contrast coefficients.")
for(j in 1:JKL){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
        # Create the three contrast matrices
temp<-con3way(J,K,L)
conA<-temp$conA
conB<-temp$conB
conC<-temp$conC
conAB<-temp$conAB
conAC<-temp$conAC
conBC<-temp$conBC
conABC<-temp$conABC
if(!op){
Factor.A<-lincon(x,con=conA,tr=tr,alpha=alpha,pr=pr)
Factor.B<-lincon(x,con=conB,tr=tr,alpha=alpha,pr=pr)
Factor.C<-lincon(x,con=conC,tr=tr,alpha=alpha,pr=pr)
Factor.AB<-lincon(x,con=conAB,tr=tr,alpha=alpha,pr=pr)
Factor.AC<-lincon(x,con=conAC,tr=tr,alpha=alpha,pr=pr)
Factor.BC<-lincon(x,con=conBC,tr=tr,alpha=alpha,pr=pr)
Factor.ABC<-lincon(x,con=conABC,tr=tr,alpha=alpha,pr=pr)
}
All.Tests<-NA
if(op){
Factor.A<-NA
Factor.B<-NA
Factor.C<-NA
Factor.AB<-NA
Factor.AC<-NA
Factor.BC<-NA
Factor.ABC<-NA
con<-cbind(conA,conB,conB,conAB,conAC,conBC,conABC)
All.Tests<-lincon(x,con=con,tr=tr,alpha=alpha,,pr=pr)
}
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C,
Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC,
Factor.ABC=Factor.ABC,All.Tests=All.Tests,conA=conA,conB=conB,conC=conC,
conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC)
}

mcp3med<-function(J,K,L, x,tr=.2,con=0,alpha=.05,grp=NA,op=F){
#
# Do all pairwise comparisons of
# main effects for Factor A and B and C and all interactions
# based on trimmed means
#
        #   The data are assumed to be stored in x in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
if(is.data.frame(x))x=as.matrix(x)
        JKL <- J*K*L
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                x<-list()
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JKL) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)] # Remove missing values
        }
        #

        if(JKL != length(x))
                warning("The number of groups does not match the number of contrast coefficients.")
for(j in 1:JKL){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
        # Create the three contrast matrices
temp<-con3way(J,K,L)
conA<-temp$conA
conB<-temp$conB
conC<-temp$conC
conAB<-temp$conAB
conAC<-temp$conAC
conBC<-temp$conBC
conABC<-temp$conABC
if(!op){
Factor.A<-msmed(x,con=conA,alpha=alpha)
Factor.B<-msmed(x,con=conB,alpha=alpha)
Factor.C<-msmed(x,con=conC,alpha=alpha)
Factor.AB<-msmed(x,con=conAB,alpha=alpha)
Factor.AC<-msmed(x,con=conAC,alpha=alpha)
Factor.BC<-msmed(x,con=conBC,alpha=alpha)
Factor.ABC<-msmed(x,con=conABC,alpha=alpha)
}
All.Tests<-NA
if(op){
Factor.A<-NA
Factor.B<-NA
Factor.C<-NA
Factor.AB<-NA
Factor.AC<-NA
Factor.BC<-NA
Factor.ABC<-NA
con<-cbind(conA,conB,conB,conAB,conAC,conBC,conABC)
All.Tests<-msmed(x,con=con,alpha=alpha)
}
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C,
Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC,
Factor.ABC=Factor.ABC,All.Tests=All.Tests,conA=conA,conB=conB,conC=conC,
conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC)
}

bbtrim<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,nboot=600,alpha=.05,pr=FALSE){
#
#  Perform a J by K anova using trimmed means with
#  for independent groups using a bootstrap-t method
#
#  tr=.2 is default trimming
#
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode. x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(is.list(x))x<-elimna(matl(x))
if(is.matrix(x))x<-elimna(x)
data<-x
if(is.matrix(data))data<-listm(data)
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups stored in x is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
temp=con2way(J,K)
conA<-temp$conA
conB<-temp$conB
conAB<-temp$conAB
Factor.A<-linconb(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.B<-linconb(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.AB<-linconb(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,pr=pr)
}

bbbtrim<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,nboot=600,pr=FALSE){
#
#  Perform three-way anova, independent groups, based on trimmed means
#
#  That is, there are three factors with a total of JKL independent groups.
#
#  A bootstrap-t method is used to perform multiple comparisons
#  The variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
if(is.list(data))data=listm(elimna(matl(data)))
if(is.matrix(data))data=listm(elimna(data))
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
x=data
temp=con3way(J,K,L)
conA<-temp$conA
conB<-temp$conB
conC<-temp$conC
conAB<-temp$conAB
conAC<-temp$conAC
conBC<-temp$conBC
conABC=temp$conABC
Factor.A<-linconb(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.B<-linconb(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.C<-linconb(x,con=conC,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.AB<-linconb(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.AC<-linconb(x,con=conAC,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.BC<-linconb(x,con=conBC,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
Factor.ABC<-linconb(x,con=conABC,tr=tr,alpha=alpha,nboot=nboot,pr=pr)
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C,
Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC,
Factor.ABC=Factor.ABC,pr=pr)
}


pb2trmcp<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,nboot=NA,alpha=.05,SEED=TRUE,pr=TRUE,
bhop=F){
#
#  Perform a J by K anova using trimmed means with
#  for two independent groups using a bootstrap-t method
#
#  tr=.2 is default trimming
#
#
#  The R variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(SEED)set.seed(2)
if(is.list(x))x<-elimna(matl(x))
if(is.matrix(x))x<-elimna(x)
data<-x
if(is.matrix(data))data<-listm(data)
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups stored in x is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
temp=con2way(J,K)
conA<-temp$conA
conB<-temp$conB
conAB<-temp$conAB
if(pr)print("Taking bootstrap samples")
Factor.A<-pbtrmcp(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=F)
Factor.B<-pbtrmcp(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=F)
Factor.AB<-pbtrmcp(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop,SEED=F)
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,bhop=bhop,SEED=F)
}



pb3trmcp<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,nboot=NA,
SEED=TRUE,bhop=F){
#
#  Multiple comparisons for a  three-way anova, independent groups,
#  based on trimmed means
#
#  That is, there are three factors with a total of JKL independent groups.
#
#  A percentile bootstrap method is used to perform multiple comparisons
#  The variable data is assumed to contain the raw
#  data stored in list mode. data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data][2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  The default amount of trimming is tr=.2
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
if(SEED)set.seed(2)
if(is.list(data))data=listm(elimna(matl(data)))
if(is.matrix(data))data=listm(elimna(data))
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
temp=con3way(J,K,L)
conA<-temp$conA
conB<-temp$conB
conC<-temp$conC
conAB<-temp$conAB
conAC<-temp$conAC
conBC<-temp$conBC
conABC=temp$conABC
Factor.A<-pbtrmcp(x,con=conA,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.B<-pbtrmcp(x,con=conB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.C<-pbtrmcp(x,con=conC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.AB<-pbtrmcp(x,con=conAB,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.AC<-pbtrmcp(x,con=conAC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.BC<-pbtrmcp(x,con=conBC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.ABC<-pbtrmcp(x,con=conABC,tr=tr,alpha=alpha,nboot=nboot,bhop=bhop)
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C,
Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC,
Factor.ABC=Factor.ABC)
}


med2mcp<-function(J,K,x,grp=c(1:p),p=J*K,tr=.2,nboot=NA,alpha=.05,SEED=TRUE,pr=TRUE,
bhop=F){
#
#  Perform multiple comparisons for  J by K anova using medians with
#   using a bpercentile bootstrap method
#
#
#  The R variable data is assumed to contain the raw
#  data stored in a matrix or in list mode.
#  If stored in list mode, data[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  data[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  data[[K]] is the data for level 1,K
#  data[[K+1]] is the data for level 2,1, data[2K] is level 2,K, etc.
#
#  It is assumed that data has length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(SEED)set.seed(2)
if(is.list(x))x<-elimna(matl(x))
if(is.matrix(x))x<-elimna(x)
data<-x
if(is.matrix(data))data<-listm(data)
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups stored in x is")
print(length(data))
print("Warning: These two values are not equal")
}
if(p!=length(grp))stop("Apparently a subset of the groups was specified that does not match the total number of groups indicated by the values for J and K.")
temp=con2way(J,K)
conA<-temp$conA
conB<-temp$conB
conAB<-temp$conAB
if(pr)print("Taking bootstrap samples")
Factor.A<-medpb(x,con=conA,alpha=alpha,nboot=nboot,bhop=bhop,SEED=F)
Factor.B<-medpb(x,con=conB,alpha=alpha,nboot=nboot,bhop=bhop,SEED=F)
Factor.AB<-medpb(x,con=conAB,alpha=alpha,nboot=nboot,bhop=bhop,SEED=F)
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.AB=Factor.AB,bhop=bhop,SEED=F)
}



med3mcp<-function(J,K,L,data,tr=.2,grp=c(1:p),alpha=.05,p=J*K*L,nboot=NA,
SEED=TRUE,bhop=F){
#
#  Multiple comparisons for a  three-way anova, independent groups,
#  based on medians using a percentile bootstrap method
#
#  That is, there are three factors with a total of JKL independent groups.
#
#  The variable data is assumed to contain the raw
#  data stored in a matrix or in list mode.
#  If in list modde, data[[1]] contains the data
#  for the first level of all three factors: level 1,1,1.
#  data][2]] is assumed to contain the data for level 1 of the
#  first two factors and level 2 of the third factor: level 1,1,2
#  data[[L]] is the data for level 1,1,L
#  data[[L+1]] is the data for level 1,2,1. data[[2L]] is level 1,2,L.
#  data[[KL+1]] is level 2,1,1, etc.
#
#  It is assumed that data has length JKL, the total number of
#  groups being tested.
#
if(SEED)set.seed(2)
if(is.list(data))data=listm(elimna(matl(data)))
if(is.matrix(data))data=listm(elimna(data))
if(!is.list(data))stop("Data are not stored in list mode or a matrix")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
temp=con3way(J,K,L)
conA<-temp$conA
conB<-temp$conB
conC<-temp$conC
conAB<-temp$conAB
conAC<-temp$conAC
conBC<-temp$conBC
conABC=temp$conABC
Factor.A<-medpb(x,con=conA,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.B<-medpb(x,con=conB,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.C<-medpb(x,con=conC,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.AB<-pbtrmcp(x,con=conAB,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.AC<-medpb(x,con=conAC,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.BC<-medpb(x,con=conBC,alpha=alpha,nboot=nboot,bhop=bhop)
Factor.ABC<-medpb(x,con=conABC,alpha=alpha,nboot=nboot,bhop=bhop)
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C,
Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC,
Factor.ABC=Factor.ABC)
}


regplot<-function(x,y,regfun=tsreg,xlab="X",ylab="Y",xout=FALSE,outfun=out,...){
x=as.matrix(x)
if(ncol(x)!=1)stop("One predictor only is allowed. For 2 predictors, use regp2plot")
if(xout){
xy=cbind(x,y)
flag=outfun(x)$keep
x=xy[flag,1]
y=xy[flag,2]
}
plot(x,y,xlab=xlab,ylab=ylab)
abline(regfun(x,y)$coef)
}
olsplot<-function(x,y,regfun=lsfit,xlab="X",ylab="Y"){
plot(x,y,xlab=xlab,ylab=ylab)
abline(regfun(x,y)$coef)
}
tlist<-function(z){
#
# check for any tied values in z, which is assumed to have list mode
#
chk=lapply(z,"duplicated")
s=lapply(chk,"sum")
val=sum(matl(s)) # if  val=0, duplicate values detected.
val
}

wmwaov<-function(x,nboot=500,MC=FALSE,SEED=TRUE,MM=FALSE){
#
# Extension of WMW to J groups
# i.e., use p=P(X<Y) as effect size.
# test p_{jk}=.5 all j<k
#
if(SEED)set.seed(2)
if(MC)library(parallel)
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x=listm(x)
chk=tlist(x)
if(chk!=0)print("Warning: tied values detected")
J=length(x)
L=(J^2-J)/2
ic=0
pvec=NA
boot=list()
MAT=matrix(NA,nrow=nboot,ncol=L)
for(i in 1:nboot){
for (j in 1:J){
boot[[j]]=sample(x[[j]],size=length(x[[j]]),replace=TRUE)
}
MAT[i,]=wmwloc2(boot)
}
zero=rep(0,L)
bconB=rbind(MAT,zero)
if(MC)dv=pdisMC(bconB,MM=MM)
if(!MC)dv=pdis(bconB,MM=MM)
bplus<-nboot+1
p.value<-1-sum(dv[bplus]>dv[1:nboot])/nboot-.5*sum(dv[bplus]==dv[1:nboot])/nboot
p.value
}


wincov<-function(m,tr=.2){
m=winall(m,tr=tr)$cov
m
}

mgvreg<-function(x,y,regfun=tsreg,cov.fun=rmba,se=TRUE,varfun=pbvar,corfun=pbcor,
SEED=TRUE){
#
# Do regression on points not labled outliers
# by the MGV method.
# (This function replaces an older version of mgvreg as of 11/6/06)
#
# SEED=T so that results from outmgv are always duplicated using the same data
#
# In contrast to the old version,
#  when calling outmgv, center of data is determined via
#  the measure of location corresponding to cov.fun, which defaults
#  to the median ball algorithm (MBA)
#
x=as.matrix(x)
m<-cbind(x,y)
m<-elimna(m) # eliminate any rows with missing data
ivec<-outmgv(m,plotit=FALSE,cov.fun=cov.fun,SEED=SEED)$keep
np1<-ncol(x)+1
y=m[ivec,np1]
x=m[ivec,1:ncol(x)]
coef<-regfun(x,y)$coef
vec<-rep(1,length(y))
residuals<-y-cbind(vec,x)%*%coef
stre=NULL
yhat<-y-residuals
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
stre=sqrt(e.pow)
}
list(coef=coef,residuals=residuals,Strength.Assoc=stre,Explanatory.Power=e.pow)
}
opregpbMC<-function(x,y,nboot=1000,alpha=.05,om=TRUE,ADJ=TRUE,cop=3,
nullvec=rep(0,ncol(x)+1),plotit=TRUE,opdis=2,gval=sqrt(qchisq(.95,ncol(x)+1))){
#
#  Same as opregpb, only this function takes advantage of a multi-core
#  processor assuming one is availabe and that the R package
#  multicore has been installed.
#
# generate bootstrap estimates
# use projection-type outlier detection method followed by
# TS regression.
#
# om=T and ncol(x)>1, means an omnibus test is performed,
# otherwise only individual tests of parameters are performed.
#
# opdis=2, means that Mahalanobis distance is used
# opdis=1, means projection-type distance is used
#
# gval is critical value for projection-type outlier detection
# method
#
# ADJ=T, Adjust p-values as described in Section 11.1.5 of the text.
#
library(parallel)
x<-as.matrix(x)
m<-cbind(x,y)
p1<-ncol(x)+1
m<-elimna(m) # eliminate any rows with missing data
x<-m[,1:ncol(x)]
x<-as.matrix(x)
y<-m[,p1]
if(nrow(x)!=length(y))stop("Sample size of x differs from sample size of y")
if(!is.matrix(x))stop("Data should be stored in a matrix")
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regboot,x,y,regfun=opregMC)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
# using Hochberg method
bvec<-t(bvec)
dvec<-alpha/(c(1:ncol(x)))
test<-NA
icl0<-round(alpha*nboot/2)
icl<-round(alpha*nboot/(2*ncol(x)))
icu0<-nboot-icl0
icu<-nboot-icl
output<-matrix(0,p1,6)
dimnames(output)<-list(NULL,c("Param.","sig.test","sig.crit",
"ci.lower","ci.upper","s.e."))
pval<-NA
for(i in 1:p1){
output[i,1]<-i-1
se.val<-var(bvec[,i])
temp<-sort(bvec[,i])
output[i,6]<-sqrt(se.val)
if(i==1){
output[i,4]<-temp[icl0+1]
output[i,5]<-temp[icu0]
}
if(i>1){
output[i,4]<-temp[icl+1]
output[i,5]<-temp[icu]
}
pval[i]<-sum((temp>nullvec[i]))/length(temp)
if(pval[i]>.5)pval[i]<-1-pval[i]
}
fac<-2
if(ADJ){
# Adjust p-value if n<60
nval<-length(y)
if(nval<20)nval<-20
if(nval>60)nval<-60
fac<-2-(60-nval)/40
}
pval[1]<-2*pval[1]
pval[2:p1]<-fac*pval[2:p1]
output[,2]<-pval
temp2<-order(0-pval[2:p1])
zvec<-dvec[1:ncol(x)]
sigvec<-(test[temp2]>=zvec)
output[temp2+1,3]<-zvec
output[1,3]<-NA
output[,2]<-pval
om.pval<-NA
temp<-opregMC(x,y)$coef
if(om && ncol(x)>1){
temp2<-rbind(bvec[,2:p1],nullvec[2:p1])
if(opdis==1)dis<-pdisMC(temp2,pr=FALSE,center=temp[2:p1])
if(opdis==2){
cmat<-var(bvec[,2:p1]-apply(bvec[,2:p1],2,mean)+temp[2:p1])
dis<-mahalanobis(temp2,temp[2:p1],cmat)
}
om.pval<-sum((dis[nboot+1]<=dis[1:nboot]))/nboot
}
# do adjusted p-value
nval<-length(y)
if(nval<20)nval<-20
if(nval>60)nval<-60
adj.pval<-om.pval/2+(om.pval-om.pval/2)*(nval-20)/40
if(ncol(x)==2 && plotit){
plot(bvec[,2],bvec[,3],xlab="Slope 1",ylab="Slope 2")
temp.dis<-order(dis[1:nboot])
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],2:3]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
list(output=output,om.pval=om.pval,adj.om.pval=adj.pval)
}


opregMC<-function(x,y,regfun=tsregMC,cop=3,fast=FALSE,pr=TRUE,prres=FALSE,STAND=FALSE){
#
# Do regression on points not labled outliers
# using projection-type outlier detection method
#
library(parallel)
x<-as.matrix(x)
m<-cbind(x,y)
m<-elimna(m) # eliminate any rows with missing data
ivec<-outproMC(m,plotit=FALSE,cop=cop,fast=FALSE,pr=FALSE,STAND=STAND)$keep
np1<-ncol(x)+1
coef<-regfun(m[ivec,1:ncol(x)],m[ivec,np1])$coef
vec<-rep(1,length(y))
residuals<-y-cbind(vec,x)%*%coef
if(fast && pr){
print("Intercept, followed by slopes:")
print(coef)
if(prres){
print("Residuals:")
print(residuals)
}}
list(coef=coef,residuals=residuals)
}
twocor<-function(x1,y1,x2,y2,corfun=pbcor,nboot=599,alpha=.05,...){
#
#  Compute a .95 confidence interval for the
#  difference between two correlation coefficients
#  corresponding to two independent groups.
#
#   the function corfun is any R function that returns a
#   correlation coefficient in corfun$cor. The functions pbcor and
#   wincor follow this convention.
#
#   For Pearson's correlation, use
#   the function twopcor instead.
#
#   The default number of bootstrap samples is nboot=599
#
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data1,1,corbsub,x1,y1,corfun,...) # A 1 by nboot matrix.
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
bvec2<-apply(data2,1,corbsub,x2,y2,corfun,...) # A 1 by nboot matrix.
bvec<-bvec1-bvec2
bsort<-sort(bvec)
term<-alpha/2
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
corci<-1
corci[1]<-bsort[ilow]
corci[2]<-bsort[ihi]
pv<-(sum(bvec<0)+.5*sum(bvec==0))/nboot
pv=2*min(c(pv,1-pv))
r1<-corfun(x1,y1)$cor
r2<-corfun(x2,y2)$cor
reject<-"NO"
if(corci[1]>0 || corci[2]<0)reject="YES"
list(r1=r1,r2=r2,ci.dif=corci,p.value=pv)
}

lplot2g<-function(x1,y1,x2,y2,xlab="X",ylab="Y",xout=F){
#
# Plot of running interval smoother for two groups
#
# fr controls amount of smoothing
# tr is the amount of trimming
#
# Missing values are automatically removed.
#
m<-elimna(cbind(x1,y1))
x1<-m[,1]
y1<-m[,2]
m<-elimna(cbind(x2,y2))
x2<-m[,1]
y2<-m[,2]
flag=order(x1)
x1=x1[flag]
y1=y1[flag]
flag=order(x2)
x2=x2[flag]
y2=y2[flag]
temp1<-lplot(x1,y1,pyhat=TRUE,plotit=FALSE,xout=xout)$yhat.values
temp2<-lplot(x2,y2,pyhat=TRUE,plotit=FALSE,xout=xout)$yhat.values
plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab)
points(x1,y1)
points(x2,y2,pch="+")
lines(x1,temp1)
lines(x2,temp2,lty=2)
}

rm3mcp<-function(J,K,L, x,tr=.2,alpha=.05,dif=TRUE,op=FALSE,grp=NA){
#
# MULTIPLE COMPARISONS FOR A 3-WAY within-by-within-by within ANOVA
# Do all multiple comparisons associated with
# main effects for Factor A and B and C and all interactions
# based on trimmed means
#
        #   The data are assumed to be stored in x in list mode or in a matrix.
        #  If grp is unspecified, it is assumed x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second factor: level 1,2
        #  x[[j+1]] is the data for level 2,1, etc.
        #  If the data are in wrong order, grp can be used to rearrange the
        #  groups. For example, for a two by two design, grp<-c(2,4,3,1)
        #  indicates that the second group corresponds to level 1,1;
        #  group 4 corresponds to level 1,2; group 3 is level 2,1;
        #  and group 1 is level 2,2.
        #
        #   Missing values are automatically removed.
        #
if(is.data.frame(x))x=as.matrix(x)
        JKL <- J*K*L
        if(is.matrix(x))
                x <- listm(x)
        if(!is.na(grp[1])) {
                yy <- x
                x<-list()
                for(j in 1:length(grp))
                        x[[j]] <- yy[[grp[j]]]
        }
        if(!is.list(x))
                stop("Data must be stored in list mode or a matrix.")
        for(j in 1:JKL) {
                xx <- x[[j]]
                x[[j]] <- xx[!is.na(xx)] # Remove missing values
        }
        #

        if(JKL != length(x))
                warning("The number of groups does not match the number of contrast coefficients.")
for(j in 1:JKL){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
x[[j]]<-temp
}
        # Create the three contrast matrices
temp<-con3way(J,K,L)
conA<-temp$conA
conB<-temp$conB
conC<-temp$conC
conAB<-temp$conAB
conAC<-temp$conAC
conBC<-temp$conBC
conABC<-temp$conABC
Factor.A<-rmmcp(x,con=conA,tr=tr,alpha=alpha,dif=dif)
Factor.B<-rmmcp(x,con=conB,tr=tr,alpha=alpha,dif=dif)
Factor.C<-rmmcp(x,con=conC,tr=tr,alpha=alpha,dif=dif)
Factor.AB<-rmmcp(x,con=conAB,tr=tr,alpha=alpha,dif=dif)
Factor.AC<-rmmcp(x,con=conAC,tr=tr,alpha=alpha,dif=dif)
Factor.BC<-rmmcp(x,con=conBC,tr=tr,alpha=alpha,dif=dif)
Factor.ABC<-rmmcp(x,con=conABC,tr=tr,alpha=alpha,dif=dif)
list(Factor.A=Factor.A,Factor.B=Factor.B,Factor.C=Factor.C,
Factor.AB=Factor.AB,Factor.AC=Factor.AC,Factor.BC=Factor.BC,
Factor.ABC=Factor.ABC,conA=conA,conB=conB,conC=conC,
conAB=conAB,conAC=conAC,conBC=conBC,conABC=conABC)
}

tmcppb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,bhop=FALSE,SEED=TRUE,
...){
#
#   Multiple comparisons for  J independent groups using trimmed means
#
#   A percentile bootstrap method with Rom's method is used.
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
tempn<-0
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.")
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-matrix(NA,nrow=J,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
#print(paste("Working on group ",j))
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group
}
test<-NA
bcon<-t(con)%*%bvec #ncon by nboot matrix
tvec<-t(con)%*%mvec
for (d in 1:ncon){
tv<-sum(bcon[d,]==0)/nboot
test[d]<-sum(bcon[d,]>0)/nboot+.5*tv
if(test[d]> .5)test[d]<-1-test[d]
}
test<-2*test
output<-matrix(0,ncon,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tvec[ic,]
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(bcon[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}
bbmcppb<-function(J, K, x, est=tmean,JK = J*K,
 alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=FALSE,SEED = TRUE,...)
{
#
#  BETWEEN-BY-BETWEEN DESIGN
#
        # A percentile bootstrap for multiple comparisons
        #  for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
con=con2way(J,K)
A=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conA,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
B=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
AB=bbmcppb.sub(J=J, K=K, x, est=est,con=con$conAB,
 alpha = alpha, nboot = nboot, bhop=bhop,SEED = SEED,grp=grp,...)
list(Fac.A=A,Fac.B=B,Fac.AB=AB)
}

bbmcppb.sub<-function(J, K, x, est=tmean, JK = J*K, con = 0,
 alpha = 0.05, grp =c(1:JK), nboot = 500, bhop=FALSE,SEED = TRUE, ...){
#
#   bewtween-by-bewtween design
#
        #
        # A percentile bootstrap for multiple comparisons among
        # multiple comparisons for all main effects and interactions
        # The analysis is done by generating bootstrap samples and
        # using an appropriate linear contrast.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
#
#          JK independent groups
#

       #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
ncon=ncol(con)
 p <- J*K
JK=p
if(p>length(x))stop("JK is less than the Number of groups")
JK=J*K
        data <- list()
xx=list()
        for(j in 1:length(x)) {
xx[[j]]=x[[grp[j]]] # save input data
#                # Now have the groups in proper order.
        }
for(j in 1:p){
xx[j]=elimna(xx[j])
}
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        testA = NA
        bsam = list()
        bdat = list()
aboot=matrix(NA,nrow=nboot,ncol=ncol(con))
tvec=NA
tvec=linhat(x,con,est=est,...)
        for(ib in 1:nboot) {
 for(j in 1:JK) {
nv=length(x[[j]])
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in 1:p){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
}
aboot[ib,]=linhat(bsam,con=con,est=est,...)
}
pbA=NA
for(j in 1:ncol(aboot)){
pbA[j]=mean(aboot[,j]>0)
pbA[j]=2*min(c(pbA[j],1-pbA[j]))
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncol(con) > 10){
avec<-.05/c(11:(ncol(con)))
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(con > 10){
avec<-.01/c(11:ncol(con))
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncol(con))
}
}
if(bhop)dvec<-(ncol(con)-c(1:ncol(con))+1)*alpha/ncol(con)
outputA<-matrix(0,ncol(con),6)
dimnames(outputA)<-list(NULL,c("con.num","psihat","p.value","p.crit",
"ci.lower","ci.upper"))
test=pbA
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
outputA[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
outputA[,2]<-tvec
for (ic in 1:ncol(con)){
outputA[ic,1]<-ic
outputA[ic,3]<-test[ic]
temp<-sort(aboot[,ic])
outputA[ic,5]<-temp[icl]
outputA[ic,6]<-temp[icu]
}
outputA
}

ols.plot.inter<-function(x,y, pyhat = FALSE, eout = FALSE, xout = FALSE, outfun = out,
    plotit = TRUE, expand = 0.5, scale = FALSE, xlab = "X",
    ylab = "Y", zlab = "", theta = 50, phi = 25, family = "gaussian",
    duplicate = "error",ticktype="simple",...){
#
# Plot regression surface based on the classic interaction model:
#  usual product term
#
#   x is assumed to be a matrix with two columns (two predictors)
library(akima)
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
if(ncol(x)!=2)stop("x should have two columns")
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:2]
y<-m[,3]
}
xx=cbind(x,x[,1]*x[,2])
temp=lsfit(xx,y)
fitr=y-temp$residuals
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fit,theta=theta,phi=phi,expand=expand,
scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype)
}

gamplotINT<-function(x,y,pyhat=FALSE,plotit=TRUE,theta=50,phi=25,expand=.5,xout=FALSE,
SCALE=FALSE,zscale=TRUE,eout=FALSE,outfun=out,ticktype="simple",xlab = "X", ylab = "Y", zlab = "",...){
#
# Plot regression surface, assuming two predictors in
# n by 2 matrix x using gam (generalized additive model)
# Same as gamplot, only a product term is included.
#
if(eout && xout)stop("Not allowed to have eout=xout=T")
x<-as.matrix(x)
if(ncol(x)!=2)stop("x must be an n by 2 matrix")
library(akima)
library(mgcv)
m<-elimna(cbind(x,y))
if(xout){
flag<-outfun(x,...)$keep
m<-m[flag,]
}
if(eout){
flag<-outfun(m,...)$keep
m<-m[flag,]
}
x1<-m[,1]
x2<-m[,2]
y<-m[,3]
xrem<-m[,1:2]
n<-nrow(x)
fitr<-fitted(gam(y~s(x1)+s(x2)+s(x1,x2)))
allfit<-fitr
if(plotit){
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(xrem[i,]==xrem[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1]
mkeep<-xrem[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr)
persp(fit,theta=theta,phi=phi,expand=expand,xlab=xlab,ylab=ylab,zlab=zlab,
scale=scale,ticktype=ticktype)
}
m<-"Done"
if(pyhat)m<-allfit
m
}



reg.plot.inter<-function(x,y, regfun=tsreg,
 pyhat = FALSE, eout = FALSE, xout = FALSE, outfun = out,
    plotit = TRUE, expand = 0.5, scale = FALSE, xlab = "X",
    ylab = "Y", zlab = "", theta = 50, phi = 25, family = "gaussian",
    duplicate = "error",ticktype="simple",...){
#
# Plot regression surface based on the classic interaction model:
#  usual product term
#
#   x is assumed to be a matrix with two columns (two predictors)
library(akima)
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
if(xout){
p=ncol(x)
p1=p+1
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}

if(!scale)print("scale=F. If there is an association, try scale=T")
if(ncol(x)!=2)stop("x should have two columns")
xx=cbind(x,x[,1]*x[,2])
temp=regfun(xx,y)
fitr=y-temp$residuals
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fit,theta=theta,phi=phi,expand=expand,
scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype)
}

bwrank<-function(J,K,x,grp=c(1:p),p=J*K){
#
# Between by within rank-based ANOVA
# That is, have a J by K design with J independent levels and K dependent
# measures
#
# x can be a matrix with columns corresponding to groups
# or it can have list mode.
#
#
if(is.data.frame(x))data=as.matrix(x)
if(is.matrix(x))x<-listm(x)
x=x[grp]
xx<-list()
nvec<-NA
alldat<-NA
klow<-1-K
kup<-0
iall=0
for (j in 1:J){
klow<-klow+K
kup<-kup+K
mtemp=elimna(matl(x[klow:kup]))
for(k in 1:K){
iall=iall+1
xx[[iall]]=mtemp[,k]
}}
for(j in 1:p){
alldat<-c(alldat,xx[[j]])
nvec[j]<-length(xx[[j]])
}
#
#  Check sample sizes
#
nmat<-matrix(nvec,J,K,byrow=T)
for(j in 1:J){
if(var(nmat[j,]) !=0){
warning("Number of observations among dependent groups for level",paste(j)," of Factor A are unequal")
print("Matrix of sample sizes:")
print(nmat)
}}
if(sum(is.na(alldat[2:length(alldat)])>0))stop("Missing values not allowed")
rval<-rank(alldat[2:length(alldat)])
rdd<-mean(rval) # R bar ...
xr<-list()
il<-1-nvec[1]
iu<-0
for(j in 1:p){
il<-il+nvec[j]
iu<-iu+nvec[j]
xr[[j]]<-rval[il:iu]
}
v<-matrix(0,p,p)
Ja<-matrix(1,J,J)
Ia<-diag(1,J)
Pa<-Ia-Ja/J
Jb<-matrix(1,K,K)
Ib<-diag(1,K)
Pb<-Ib-Jb/K
cona<-kron(Pa,Ib)
conb<-kron(Ia,Pb)
conab<-kron(Pa,Pb)
for(k in 1:K){
temp<-x[[k]]
bigm<-matrix(temp,ncol=1)
jk<-k
for (j in 2:J){
jk<-jk+K
tempc<-matrix(x[[jk]],ncol=1)
bigm<-rbind(bigm,tempc)
temp<-c(temp,x[[jk]])
}}
N<-length(temp)
rbbd<-NA
for(k in 1:K){
bigm<-xr[[k]]
jk<-k
for (j in 2:J){
jk<-jk+K
bigm<-c(bigm,xr[[jk]])
}}
rbjk<-matrix(NA,nrow=J,ncol=K) #R_.jk
ic<-0
for (j in 1:J){
for(k in 1:K){
ic<-ic+1
rbjk[j,k]<-mean(xr[[ic]]) # R bar_.jk
}}
for(k in 1:K)rbbd[k]<-mean(rbjk[,k])
rbj<-1 # R_.j.
sigv<-0
njsam<-0 # n_j
icc<-1-K
ivec<-c(1:K)-K
for (j in 1:J){
icc<-icc+K
ivec<-ivec+K
temp<-xr[ivec[1]:ivec[K]]
temp<-matl(temp)
tempv<-apply(temp,1,mean)
njsam[j]<-nvec[icc]
rbj[j]<-mean(rbjk[j,])
sigv[j]<-var(tempv) # var of R bar_ij.
}
nv<-sum(njsam)
phat<-(rbjk-.5)/(nv*K)
sv2<-sum(sigv/njsam)
uv<-sum((sigv/njsam)^2)
dv<-sum((sigv/njsam)^2/(njsam-1))
testA<-J*var(rbj)/sv2
klow<-1-K
kup<-0
sv<-matrix(0,nrow=K,ncol=K)
rvk<-NA
for(j in 1:J){
klow<-klow+K
kup<-kup+K
sel<-c(klow:kup)
m<-matl(xr[klow:kup])
m<-elimna(m)
xx<-listm(m)
xx<-listm(m)
vsub<-nv*var(m)/(nv*K*nv*K*njsam[j])
v[sel,sel]<-vsub
sv<-sv+vsub
}
sv<-sv/J^2
testB<-nv/(nv*K*nv*K*sum(diag(Pb%*%sv)))*sum((rbbd-mean(rbbd))^2)
testAB<-0
for (j in 1:J){
for (k in 1:K){
testAB<-testAB+(rbjk[j,k]-rbj[j]-rbbd[k]+rdd)^2
}}
testAB<-nv*testAB/(nv*K*nv*K*sum(diag(conab%*%v)))
nu1B<-(sum(diag(Pb%*%sv)))^2/sum((diag(Pb%*%sv%*%Pb%*%sv)))
nu1A<-(J-1)^2/(1+J*(J-2)*uv/sv2^2)
nu2A<-sv2^2/dv
nu1AB<-(sum(diag(conab%*%v)))^2/sum(diag(conab%*%v%*%conab%*%v))
sig.A<-1-pf(testA,nu1A,nu2A)
sig.B<-1-pf(testB,nu1B,1000000)
sig.AB<-1-pf(testAB,nu1AB,1000000)
list(test.A=testA,sig.A=sig.A,test.B=testB,sig.B=sig.B,test.AB=testAB,
sig.AB=sig.AB,avg.ranks=rbjk,rel.effects=phat)
}



rqtest<-function(x,y,qval=.5,nboot=200,alpha=.05,SEED=TRUE,xout=FALSE,outfun=out,...){
#
#   Omnibus test when using  a quantile regression estimator
#
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
x<-as.matrix(x)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,rqtest.sub,x,y,qval=qval)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
p<-ncol(x)
if(p==1)stop("Use qregci when p=1")
n<-length(y)
np<-p+1
bvec<-t(bvec)
semat<-var(bvec[,2:np])
temp<-rqfit(x,y,qval=qval)$coef[2:np]
temp<-as.matrix(temp)
test<-t(temp)%*%solve(semat)%*%temp
test<-test*(n-p)/((n-1)*p)
p.value<-1-pf(test,p,n-p)
# Determine adjusted critical level, if possible.
adjusted.alpha=NULL
b1=NULL
if(n<=60){
if(alpha==.1){
if(p==2){
b1<-0-0.001965
b0<-.2179
}
if(p==3){
b1<-0-.003
b0<-.2814
}
if(p==4){
b1<-0-.0058
b0<-.4478
}
if(p==5){
b1<-0-.00896
b0<-.6373
}
if(p>=6){
b1<-0-.0112
b0<-.7699
}}
if(alpha==.05){
if(p==2){
b1<-0-0.001173
b0<-.1203
}
if(p==3){
b1<-0-.00223
b0<-.184
}
if(p==4){
b1<-0-.00476
b0<-.3356
}
if(p==5){
b1<-0-.0063
b0<-.425
}
if(p==6){
b1<-0-.00858
b0<-.5648
}}
if(alpha==.025){
if(p==2){
b1<-0-0.00056
b0<-.05875
}
if(p==3){
b1<-0-.00149
b0<-.1143
}
if(p==4){
b1<-0-.00396
b0<-.2624
}
if(p==5){
b1<-0-.00474
b0<-.3097
}
if(p==6){
b1<-0-.0064
b0<-.4111
}}
if(alpha==.01){
if(p==2){
b1<-0-0.00055
b0<-.043
}
if(p==3){
b1<-0-.00044
b0<-.0364
}
if(p==4){
b1<-0-.0024
b0<-.1546
}
if(p==5){
b1<-0-.00248
b0<-.159
}
if(p==6){
b1<-0-.00439
b0<-.2734
}}
if(!is.null(b1))adjusted.alpha<-b1*n+b0
adjusted.alpha<-max(alpha,adjusted.alpha)
}
list(test.stat=test,p.value=p.value,adjusted.alpha=adjusted.alpha)
}


runpd<-function(x,y,pts=x,est=tmean,fr=.8,plotit=TRUE,pyhat=FALSE,nmin=0,SCALE=FALSE,
expand=.5,xout=FALSE,outfun=out,pr=TRUE,xlab="X1",ylab="X2",zlab="",
theta=50,phi=25,duplicate="error",MC=FALSE,ticktype="simple",...){
#
# running mean using interval method
# Distances from a point are determined using a projection method
# see function pdclose
#
# fr controls amount of smoothing
# tr is the amount of trimming
# x is an n by p matrix of predictors.
#
if(is.list(x))stop("Data should  not stored be stored in list mode")
if(xout){
keepit<-outfun(x,plotit=FALSE)$keep
x<-x[keepit,]
y<-y[keepit]
}
x<-as.matrix(x)
pval<-ncol(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:pval]
x<-as.matrix(x)
y<-xx[,pval+1]
plotit<-as.logical(plotit)
iout<-c(1:nrow(x))
rmd<-1 # Initialize rmd
nval<-1
nmat<-pdclose(x,pts,fr=fr,MC=MC)
for(i in 1:nrow(pts))rmd[i]<-est(y[nmat[i,]],...)
for(i in 1:nrow(pts))nval[i]<-sum(nmat[i,])
if(ncol(x)==2){
if(plotit){
library(akima)
fitr<-rmd[nval>nmin]
y<-y[nval>nmin]
x<-x[nval>nmin,]
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
if(plotit){
if(pr){
if(!scale)print("With dependence, suggest using scale=T")
}
fitr<-rmd[nval>nmin]
y<-y[nval>nmin]
x<-x[nval>nmin,]
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fit,theta=theta,phi=phi,expand=expand,
scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype)
}}}
if(pyhat)last<-rmd
if(!pyhat)last <- "Done"
        last
}

sppbi<-function(J,K,x,est=onestep,JK=J*K,grp=c(1:JK),nboot=500,SEED=TRUE,...){
#
# A percentile bootstrap for interactions
# in a split-plot design.
# The analysis is done by taking difference scores
# among all pairs of dependent groups and seeing whether
# these differences differ across levels of Factor A.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
library(MASS)
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}

JK<-J*K
MJ<-(J^2-J)/2
MK<-(K^2-K)/2
JMK<-J*MK
Jm<-J-1
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
jp<-1-K
kv<-0
kv2<-0
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}}
xx<-x
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[jp]])
}
#
# Now take bootstrap samples from jth level
# of Factor A and average K  corresponding estimates
# of location.
#
bloc<-matrix(NA,ncol=J,nrow=nboot)
print("Taking bootstrap samples. Please wait.")
mvec<-NA
it<-0
for(j in 1:J){
paste("Working on level ",j," of Factor A")
x<-matrix(NA,nrow=nvec[j],ncol=MK)
#
im<-0
for(k in 1:K){
for(kk in 1:K){
if(k<kk){
im<-im+1
kp<-j*K+k-K
kpp<-j*K+kk-K
x[,im]<-xx[[kp]]-xx[[kpp]]
it<-it+1
mvec[it]<-est(x[,im],...)
}}}
data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot)
bvec<-matrix(NA,ncol=MK,nrow=nboot)
mat<-listm(x)
for(k in 1:MK){
temp<-x[,k]
bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by MK matrix
}
if(j==1)bloc<-bvec
if(j>1)bloc<-cbind(bloc,bvec)
}
#
MJMK<-MJ*MK
con<-matrix(0,nrow=JMK,ncol=MJMK)
cont<-matrix(0,nrow=J,ncol=MJ)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j<jj){
ic<-ic+1
cont[j,ic]<-1
cont[jj,ic]<-0-1
}}}
tempv<-matrix(0,nrow=MK-1,ncol=MJ)
con1<-rbind(cont[1,],tempv)
for(j in 2:J){
con2<-rbind(cont[j,],tempv)
con1<-rbind(con1,con2)
}
con<-con1
if(MK>1){
for(k in 2:MK){
con1<-push(con1)
con<-cbind(con,con1)
}}
bcon<-t(con)%*%t(bloc) #C by nboot matrix
tvec<-t(con)%*%mvec
tvec<-tvec[,1]
tempcen<-apply(bcon,1,mean)
vecz<-rep(0,ncol(con))
bcon<-t(bcon)
temp=bcon
for(ib in 1:nrow(temp))temp[ib,]=temp[ib,]-tempcen+tvec
smat<-var(temp)
#smat<-var(bcon-tempcen+tvec)
chkrank<-qr(smat)$rank
bcon<-rbind(bcon,vecz)
if(chkrank==ncol(smat))dv<-mahalanobis(bcon,tvec,smat)
if(chkrank<ncol(smat)){
smat<-ginv(smat)
dv<-mahalanobis(bcon,tvec,smat,inverted=T)
}
bplus<-nboot+1
sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
list(p.value=sig.level,psihat=tvec,con=con)
}
sppba<-function(J,K,x,est=onestep,JK=J*K,grp=c(1:JK),avg=TRUE,nboot=500,SEED=TRUE,
MC=FALSE,MDIS=FALSE,...){
#
# A percentile bootstrap for main effects
# among independent groups in a split-plot design
#
# avg=T: The analysis is done by averaging K measures of
# location for  each level of Factor A,
# and then comparing averages by testing the hypothesis
# that all pairwise differences are equal to zero.
#
# avg=F:   The analysis is done by testing whether $K$ equalities are
# simultaneously true. For kth level of Factor B, the kth equality is
# theta_{1k}= ... theta_{Jk}, k=1,...,K.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
library(MASS)
       if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
                x <- y
}

JK<-J*K
data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data
jp<-1-K
kv<-0
kv2<-0
for(j in 1:J){
jp<-jp+K
xmat<-matrix(NA,ncol=K,nrow=length(x[[jp]]))
for(k in 1:K){
kv<-kv+1
xmat[,k]<-x[[kv]]
}
xmat<-elimna(xmat)
for(k in 1:K){
kv2<-kv2+1
x[[kv2]]<-xmat[,k]
}
}
xx<-x
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[jp]])
}
#
# Now take bootstrap samples from jth level
# of Factor A.
#
bloc<-matrix(NA,nrow=J,ncol=nboot)
print("Taking bootstrap samples. Please wait.")
mvec<-NA
ik<-0
for(j in 1:J){
paste("Working on level ",j," of Factor A")
x<-matrix(NA,nrow=nvec[j],ncol=K)
#
for(k in 1:K){
ik<-ik+1
x[,k]<-xx[[ik]]
if(!avg)mvec[ik]<-est(xx[[ik]],...)
}
tempv<-apply(x,2,est,...)
data<-matrix(sample(nvec[j],size=nvec[j]*nboot,replace=TRUE),nrow=nboot)
bvec<-matrix(NA,ncol=K,nrow=nboot)
for(k in 1:K){
temp<-x[,k]
bvec[,k]<-apply(data,1,rmanogsub,temp,est,...) # An nboot by K matrix
}
if(avg){
mvec[j]<-mean(tempv)
bloc[j,]<-apply(bvec,1,mean)
}
if(!avg){
if(j==1)bloc<-bvec
if(j>1)bloc<-cbind(bloc,bvec)
}
}
if(avg){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
Jm<-J-1
for (j in 1:Jm){
jp<-j+1
for(k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
if(!avg){
MJK<-K*(J^2-J)/2 # NUMBER OF COMPARISONS
JK<-J*K
MJ<-(J^2-J)/2
cont<-matrix(0,nrow=J,ncol=MJ)
ic<-0
for(j in 1:J){
for(jj in 1:J){
if(j<jj){
ic<-ic+1
cont[j,ic]<-1
cont[jj,ic]<-0-1
}}}
tempv<-matrix(0,nrow=K-1,ncol=MJ)
con1<-rbind(cont[1,],tempv)
for(j in 2:J){
con2<-rbind(cont[j,],tempv)
con1<-rbind(con1,con2)
}
con<-con1
if(K>1){
for(k in 2:K){
con1<-push(con1)
con<-cbind(con,con1)
}}}
if(!avg)bcon<-t(con)%*%t(bloc) #C by nboot matrix
if(avg)bcon<-t(con)%*%(bloc)
tvec<-t(con)%*%mvec
tvec<-tvec[,1]
tempcen<-apply(bcon,1,mean)
vecz<-rep(0,ncol(con))
bcon<-t(bcon)
temp=bcon
for(ib in 1:nrow(temp))temp[ib,]=temp[ib,]-tempcen+tvec
bcon<-rbind(bcon,vecz)
if(!MDIS){
if(!MC)dv=pdis(bcon,center=tvec)
if(MC)dv=pdisMC(bcon,center=tvec)
}
if(MDIS){
smat<-var(temp)
bcon<-rbind(bcon,vecz)
chkrank<-qr(smat)$rank
if(chkrank==ncol(smat))dv<-mahalanobis(bcon,tvec,smat)
if(chkrank<ncol(smat)){
smat<-ginv(smat)
dv<-mahalanobis(bcon,tvec,smat,inverted=T)
}}
bplus<-nboot+1
sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
list(p.value=sig.level,psihat=tvec,con=con)
}

outpro<-function(m,gval=NA,center=NA,plotit=TRUE,op=TRUE,MM=FALSE,cop=3,
xlab="VAR 1",ylab="VAR 2",STAND=FALSE,tr=.2,q=.5,pr=TRUE,...){
#
# Detect outliers using a modification of the
# Stahel-Donoho  projection method.
#
# Determine center of data cloud, for each point,
# connect it with center, project points onto this line
# and use distances between projected points to detect
# outliers. A boxplot method is used on the
# projected distances.
#
# plotit=TRUE creates a scatterplot when working with
# bivariate data.
#
# op=T
# means the .5 depth contour is plotted
# based on data with outliers removed.
#
# op=F
# means .5 depth contour is plotted without removing outliers.
#
#  MM=F  Use interquatile range when checking for outliers
#  MM=T  uses MAD.
#
#  If value for center is not specified,
#  there are four options for computing the center of the
#  cloud of points when computing projections:
#
#  cop=2 uses MCD center
#  cop=3 uses median of the marginal distributions.
#  cop=4 uses MVE center
#  cop=5 uses TBS
#  cop=6 uses rmba (Olive's median ball algorithm)#  cop=7 uses the spatial (L1) median
#
#  args q and tr having are not used by this function. They are included to deal
#  with situations where smoothers have optional arguments for q and tr
#
#  When using cop=2, 3 or 4, default critical value for outliers
#  is square root of the .975 quantile of a
#  chi-squared distribution with p degrees
#  of freedom.
#
#  STAND=T means that marginal distributions are standardized before
#  checking for outliers.
#
#  Donoho-Gasko (Tukey) median is marked with a cross, +.
#
m<-as.matrix(m)
if(pr){
if(!STAND){
if(ncol(m)>1)print("STAND=FALSE. If measures are on different scales, might want to use STAND=TRUE")
}}
library(MASS)
m=elimna(m)
m<-as.matrix(m)
nv=nrow(m)
if(ncol(m)==1){
dis<-(m-median(m,na.rm=TRUE))^2/mad(m,na.rm=TRUE)^2
dis<-sqrt(dis)
dis[is.na(dis)]=0
crit<-sqrt(qchisq(.975,1))
chk<-ifelse(dis>crit,1,0)
vec<-c(1:nrow(m))
outid<-vec[chk==1]
keep<-vec[chk==0]
}
if(ncol(m)>1){
if(STAND)m=standm(m,est=median,scat=mad)
if(is.na(gval) && cop==1)gval<-sqrt(qchisq(.95,ncol(m)))
if(is.na(gval) && cop!=1)gval<-sqrt(qchisq(.975,ncol(m)))
if(cop==1 && is.na(center[1])){
if(ncol(m)>2)center<-dmean(m,tr=.5,cop=1)
if(ncol(m)==2){
tempd<-NA
for(i in 1:nrow(m))
tempd[i]<-depth(m[i,1],m[i,2],m)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center<-m[flag,]
if(sum(flag)>1)center<-apply(m[flag,],2,mean)
}}
if(cop==2 && is.na(center[1])){
center<-cov.mcd(m)$center
}
if(cop==4 && is.na(center[1])){
center<-cov.mve(m)$center
}
if(cop==3 && is.na(center[1])){
center<-apply(m,2,median)
}
if(cop==5 && is.na(center[1])){
center<-tbs(m)$center
}
if(cop==6 && is.na(center[1])){
center<-rmba(m)$center
}
if(cop==7 && is.na(center[1])){
center<-spat(m)
}
flag<-rep(0, nrow(m))
outid <- NA
vec <- c(1:nrow(m))
for (i in 1:nrow(m)){
B<-m[i,]-center
dis<-NA
BB<-B^2
bot<-sum(BB)
if(bot!=0){
for (j in 1:nrow(m)){
A<-m[j,]-center
temp<-sum(A*B)*B/bot
dis[j]<-sqrt(sum(temp^2))
}
temp<-idealf(dis)
if(!MM)cu<-median(dis)+gval*(temp$qu-temp$ql)
if(MM)cu<-median(dis)+gval*mad(dis)
outid<-NA
temp2<-(dis> cu)
flag[temp2]<-1
}}
if(sum(flag) == 0) outid <- NA
if(sum(flag) > 0)flag<-(flag==1)
outid <- vec[flag]
idv<-c(1:nrow(m))
keep<-idv[!flag]
if(ncol(m)==2){
if(plotit){
plot(m[,1],m[,2],type="n",xlab=xlab,ylab=ylab)
points(m[keep,1],m[keep,2],pch="*")
if(length(outid)>0)points(m[outid,1],m[outid,2],pch="o")
if(op){
tempd<-NA
keep<-keep[!is.na(keep)]
mm<-m[keep,]
for(i in 1:nrow(mm))tempd[i]<-depth(mm[i,1],mm[i,2],mm)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center<-mm[flag,]
if(sum(flag)>1)center<-apply(mm[flag,],2,mean)
m<-mm
}
points(center[1],center[2],pch="+")
x<-m
temp<-fdepth(m,plotit=FALSE)
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
#xord<-order(xx[,1])
#xx<-xx[xord,]
#temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}}}
list(n=nv,n.out=length(outid),out.id=outid,keep=keep)
}

skerd<-function(x,op=TRUE,kernel="gaussian"){
#
# Compute kernel density estimate
# for univariate data using S+ function density
#
# kernel=epanechnikov will use the Epanechnikov kernel.
#
if(!op)temp<-density(x,na.rm=TRUE,width=bandwidth.sj(x,method="dpi"),n=256)
if(op)temp<-density(x)
plot(temp$x,temp$y,type="n",ylab="",xlab="x")
lines(temp$x,temp$y)
}


bkreg<-function(x,y,kerfun=akerd,pyhat=FALSE,plotit=TRUE,xlab="X",ylab="Y",
zlab="Z",xout=FALSE,outfun=outpro,pr=TRUE,theta=50,phi=25,duplicate="error",
expand=.5,SCALE=FALSE,ticktype="simple",...){
#
# Kernel estimator for binary regression.
# (See Signorini and Jones, JASA, 2004, 119-)
#
x=as.matrix(x)
p=ncol(x)
p1=p+1
xx<-elimna(cbind(x,y))
x<-xx[,1:p]
y<-xx[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
x=as.matrix(x)
flag<-(y==1)
mv=sum(flag)
nv=sum(!flag)
phat<-NA
fhat<-kerfun(x[flag,],pyhat=TRUE,plotit=FALSE,pts=x)
ghat<-kerfun(x[!flag,],pyhat=TRUE,plotit=FALSE,pts=x)
phat<-mv*fhat/(mv*fhat+nv*ghat)
if(p==1){
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
flag2<-order(x)
#lines(x[flag2],phat[flag2])
lines(x[flag2],phat)
}}
if(p==2){
if(plotit){
library(akima)
if(pr){
if(!scale)print("With dependence, suggest using scale=T")
}
fitr<-phat
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fit,theta=theta,phi=phi,expand=expand,
scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype)
}}
if(!pyhat)phat<-"Done"
phat
}

logSM<-function(x,y,pyhat=FALSE,plotit=TRUE,xlab="X",ylab="Y",
zlab="Z",xout=FALSE,outfun=outpro,pr=TRUE,theta=50,phi=25,duplicate="error",
expand=.5,scale=FALSE,fr=2,ticktype="simple",...){
#
# A smoother designed specifically for binary outcomes
#
x=as.matrix(x)
p=ncol(x)
p1=p+1
xx<-elimna(cbind(x,y))
x<-xx[,1:p]
y<-xx[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
x=as.matrix(x)
library(MASS)
m=cov.mve(x)
flag<-(y==1)
phat<-NA
m1=matrix(NA,nrow=length(y),ncol=length(y))
for(i in 1:nrow(x))m1[,i]<-mahalanobis(x,x[i,],m$cov)
m2<-exp(-1*m1)*(sqrt(m1)<=fr)
m3<-matrix(y,length(y),length(y))*m2
phat=apply(m3,2,sum)/apply(m2,2,sum)
if(p==1){
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
flag2<-order(x)
lines(x[flag2],phat[flag2])
}}
if(p==2){
if(plotit){
library(akima)
if(pr){
if(!scale)print("With dependence, suggest using scale=T")
}
fitr<-phat
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fit,theta=theta,phi=phi,expand=expand,
scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype)
}}
if(!pyhat)phat<-"Done"
phat
}

YYmanova<-function(x1,x2,tr=.2){
#
# Do MANOVA using generalization of
# Yanagihara, H. \& Yuan, K. H. (2005).
# Three approximate solutions to the
#  multivariate Behrens-Fisher problem.  Communications in Statistics--
# Simulation and Computation, 34, 975--988; see their eq. (2.7).
#
#  x1 and x2 are assumed to be matrices
#
x1=elimna(x1)
x2=elimna(x2)
s1=winall(x1,tr=tr)$cov
s2=winall(x2,tr=tr)$cov
n1=nrow(x1)
n2=nrow(x2)
n=n1+n2
g1=floor(n1*tr)
g2=floor(n2*tr)
h1=n1-2*g1
h2=n2-2*g2
h=h1+h2
sbar=n2*s1/n+n1*s2/n
sbarinv=solve(sbar)
psi1=n2^2*(n-2)*(sum(diag(s1%*%sbarinv)))^2/(n^2*(n1-1))+
n1^2*(n-2)*(sum(diag(s2%*%sbarinv)))^2/(n^2*(n2-1))
psi2=n2^2*(n-2)*(sum(diag(s1%*%sbarinv%*%s1%*%sbarinv)))/(n^2*(n1-1))+
n1^2*(n-2)*(sum(diag(s2%*%sbarinv%*%s2%*%sbarinv)))/(n^2*(n2-1))
p=ncol(x1)
theta1=(p*psi1+(p-2)*psi2)/(p*(p+2))
theta2=(psi1+2*psi2)/(p*(p+2))
nuhat=(h-2-theta1)^2/((h-2)*theta2-theta1)
xb1=apply(x1,2,mean,tr=tr)
xb2=apply(x2,2,mean,tr=tr)
dif=xb1-xb2
dif=as.matrix(dif)
Ttest=t(dif)%*%solve((n1-1)*s1/(h1*(h1-1))+(n2-1)*s2/(h2*(h2-1)))%*%dif
TF=(n-2-theta1)*Ttest/((n-2)*p)
pv=1-pf(TF,p,nuhat)
list(test.stat=TF,p.value=pv)
}


logreg<-function(x,y,xout=FALSE,outfun=outpro,plotit=FALSE,POLY=FALSE,
xlab="X",ylab="Y",zlab="",SCALE=FALSE,expand=.5,theta=50,phi=25,
duplicate="error",ticktype="simple",...){
#
# Perform  logistic regression.
# The predictors are assumed to be stored in the n by p matrix x.
# The y values should be 1 or 0.
#
# xout=T will remove outliers from among the x values and then fit
# the regression line.
#  Default:
# One predictor, a mad-median rule is used.
# With more than one, projection method is used.
#
# outfun=out will use MVE method
#
#  plotit=TRUE will plot regression line
#  POLY=T,  will plot regression line assuming predictor
#  is in  col 1 of x and other columns are x (in col 1) raised to some power
#   or some other function of x
#
x<-as.matrix(x)
p=ncol(x)
xy=elimna(cbind(x,y))
x=xy[,1:ncol(x)]
y=xy[,ncol(xy)]
x<-as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
x<-as.matrix(x)
if(p==1 || POLY){
xord=order(x[,1])
x=x[xord,]
y=y[xord]
}
fitit=glm(formula=y~x,family=binomial)
init<-summary(fitit)
if(plotit){
vals=fitted.values(fitit)
if(p==1){
plot(x,y,xlab=xlab,ylab=ylab)
lines(x,vals)
}
if(p==2){
if(!scale)print("With dependence, suggest using scale=T")
fitr=vals
iout<-c(1:length(fitr))
nm1<-length(fitr)-1
for(i in 1:nm1){
ip1<-i+1
for(k in ip1:length(fitr))if(sum(x[i,]==x[k,])==2)iout[k]<-0
}
fitr<-fitr[iout>=1] # Eliminate duplicate points in the x-y plane
#                 This is necessary when doing three dimensional plots
#                 with the R function interp
mkeep<-x[iout>=1,]
fit<-interp(mkeep[,1],mkeep[,2],fitr,duplicate=duplicate)
persp(fit,theta=theta,phi=phi,expand=expand,
scale=scale,xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype)
}
}
init$coef
}


rplot.bin<-function
(x,y,est=mean,scat=TRUE,fr=NULL,plotit=TRUE,pyhat=FALSE,efr=.5,
theta=50,phi=25,SCALE=FALSE,expand=.5,SEED=TRUE,
nmin=0,xout=FALSE,outfun=out,eout=FALSE,xlab="X",ylab="Y",
zlab="",pr=TRUE,duplicate="error",zscale=TRUE,...){
#
#  This function applied the running interval smoother, but is designed
#  specifically for situations where y is  binary.
#
# duplicate="error"
# In some situations where duplicate values occur, when plotting with
# two predictors, it is necessary to set duplicate="strip"
#
yy=elimna(y)
nchk=length(yy)
chky=sum((yy==1))+sum((yy==0))
if(nchk != chky)print("Warning: some y values are not equal to 0 or 1")
x<-as.matrix(x)
if(ncol(x)==1){
if(is.null(fr))fr=.8
val<-rungen(x,y,est=est,scat=scat,fr=fr,plotit=plotit,pyhat=TRUE,
eout=eout,xout=xout,outfun=outfun,xlab=xlab,ylab=ylab,...)
val2<-rungen(x,y,est=est,fr=efr,plotit=FALSE,pyhat=TRUE,
eout=eout,xout=xout,outfun=outfun,...)$output
val<-val$output
}
if(ncol(x)>1){
if(is.null(fr))fr=1.2
if(ncol(x)==2 && !scale){
if(pr){print("scale=F is specified.")
print("If there is dependence, might want to use scale=T")
}}
val<-rung3dv2(x,y,est=est,fr=fr,plotit=plotit,pyhat=TRUE,SEED=SEED,nmin=nmin,
xout=xout,outfun=outfun,scale=scale,phi=phi,theta=theta,expand=expand,
duplicate="error",zscale=zscale,xlab=xlab,ylab=ylab,zlab=zlab,...)
}
if(!pyhat)val <- NULL
list(yhat = val)
}



wlogreg<-function(x0,y,initwml=FALSE,const=0.5,kmax=1e3,maxhalf=10)
{
#  Computation of the estimator of Bianco and Yohai (1996) in logistic regression
#  -------------
# This is a slightly modified version of code due to
#  Christophe Croux, Gentiane Haesbroeck, and Kristel Joossens
# (Here initwml defaults to F
#
#  This program computes the estimator of Bianco and Yohai in
#  logistic regression. By default, an intercept term is included
#  and p parameters are estimated.
#
#  For more details we refer to
#     Croux, C., and Haesbroeck, G. (2003), ``Implementing the Bianco and Yohai
#     estimator for Logistic Regression'',
#     Computational Statistics and Data Analysis, 44, 273-295
#
#Input:
#-------
# x0= n x (p-1) matrix containing the explanatory variables;
# y= n-vector containing binomial response (0 or 1);
#
# initwml= logical value for selecting one of the two possible methods for computing
#          the initial value of the optimization process. If initwml=T (default), a
#          weighted ML estimator is computed with weights derived from the MCD estimator
#          computed on the explanatory variables. If initwml=F, a classical ML fit is perfomed.
#          When the explanatory variables contain binary observations, it is recommended
#          to set initwml to F or to modify the code of the algorithm to compute the weights
#          only on the continuous variables.
# const= tuning constant used in the computation of the estimator (default=0.5);
# kmax= maximum number of iterations before convergence (default=1000);
# maxhalf= max number of step-halving (default=10).
#
# Example:
# x0=matrix(rnorm(100,1))
# y0=numeric(runif(100)>0.5)
# BYlogreg(x0,y)
#
#Output:
#--------
# list with
# 1st component: T or F if convergence achieved or not
# 2nd component: value of the objective function at the minimum
# p next components: estimates for the parameters.
# p last components: standard errors of the parameters (if first component is T)

library(MASS)
x0=as.matrix(x0)
#  n=nrow(x0)
  p=ncol(x0)+1
p0=p-1
  #Smallest value of the scale parameter before implosion
  sigmamin=1e-4

# eliminate any rows with missing values
zz=elimna(cbind(x,y))
x=as.matrix(zz[,1:p0])
y=zz[,p]
n=nrow(x)
print(n)
#  x=as.matrix(cbind(rep(1,n),x0))
  x=as.matrix(cbind(rep(1,n),x))
print(rep(1,n))
  y=as.numeric(y)

  # Computation of the initial value of the optimization process
  if (initwml==T)
  {
    hp=floor(n*(1-0.25))+1
    mcdx=cov.mcd(x0, quantile.used =hp,method="mcd")
    rdx=sqrt(mahalanobis(x0,center=mcdx$center,cov=mcdx$cov))
    vc=sqrt(qchisq(0.975,p-1))
    wrd=(rdx<=vc)
    gstart=glm(y~x0,family=binomial,subset=wrd)$coef
  }

else {gstart=glm(y~x0,family=binomial)$coef}

  sigmastart=1/sqrt(sum(gstart^2))
  xistart=gstart*sigmastart
  stscores=x %*% xistart
sigma1=sigmastart

  #Initial value for the objective function
  oldobj=mean(phiBY3(stscores/sigmastart,y,const))
  kstep=jhalf=1

  while ((kstep < kmax) & (jhalf<maxhalf)){


unisig <- function(sigma)

{ mean(phiBY3(stscores/sigma,y,const))}

optimsig=nlminb(sigma1,unisig,lower=0)
sigma1=optimsig$par



    if (sigma1<sigmamin) {print("Explosion");kstep=kmax
      } else {
      gamma1=xistart/sigma1
      scores=stscores/sigma1
      newobj=mean(phiBY3(scores,y,const))
      oldobj=newobj
      gradBY3=colMeans((derphiBY3(scores,y,const)%*%matrix(1,ncol=p))*x)
      h=-gradBY3+((gradBY3 %*% xistart) *xistart)
      finalstep=h/sqrt(sum(h^2))
      xi1=xistart+finalstep
      xi1=xi1/(sum(xi1^2))
      scores1=(x%*%xi1)/sigma1
      newobj=mean(phiBY3(scores1,y,const))

      ####stephalving
      hstep=jhalf=1
      while  ((jhalf <=maxhalf) & (newobj>oldobj)){
        hstep=hstep/2
        xi1=xistart+finalstep*hstep
        xi1=xi1/sqrt(sum(xi1^2))
        scores1=x%*%xi1/sigma1
        newobj=mean(phiBY3(scores1,y,const))
        jhalf=jhalf+1
      }
    CONV=F
      if ((jhalf==maxhalf+1) & (newobj>oldobj)) {CONV=T
        } else {
        jhalf=1
        xistart=xi1
        oldobj=newobj
        stscores=x%*% xi1
        kstep=kstep+1
      }
    }
  }

  if (kstep == kmax) {
CONV=F #    print("No convergence")
    result=list(convergence=FALSE,objective=0,coef=t(rep(NA,p)))
    } else {
    gammaest=xistart/sigma1
    stander=sterby3(x0,y,const,gammaest)
    result=list(convergence=CONV,coef=t(gammaest),sterror=stander)
  }
  return(result)
}




###############################################################
###############################################################
#Functions needed for the computation of estimator of Bianco and Yohai

phiBY3 <- function(s,y,c3)
{
  s=as.double(s)
  dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0)
  return(rhoBY3(dev,c3)+GBY3Fs(s,c3)+GBY3Fsm(s,c3))
}


rhoBY3 <- function(t,c3)
{
  (t*exp(-sqrt(c3))*as.numeric(t <= c3))+
    (((exp(-sqrt(c3))*(2+(2*sqrt(c3))+c3))-(2*exp(-sqrt(t))*(1+sqrt(t))))*as.numeric(t >c3))
}

psiBY3 <- function(t,c3)
{(exp(-sqrt(c3))*as.numeric(t <= c3))+(exp(-sqrt(t))*as.numeric(t >c3))}

derpsiBY3 <- function(t,c3)
{
res=NULL

  for (i in 1:length(t))

{
if (t[i] <= c3)

 { res=rbind(res,0) }

else

{res=rbind(res,-exp(-sqrt(t[i]))/(2*sqrt(t[i]))) }

}
res

}


sigmaBY3<-function(sigma,s,y,c3) {mean(phiBY3(s/sigma,y,c3))}

derphiBY3=function(s,y,c3)
{
  Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0)))
  ds=Fs*(1-Fs)
  dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0)
  Gprim1=log(1+exp(-abs(s)))+abs(s)*(s<0)
  Gprim2=log(1+exp(-abs(s)))+abs(s)*(s>0)
  return(-psiBY3(dev,c3)*(y-Fs)+((psiBY3(Gprim1,c3)-psiBY3(Gprim2,c3))*ds))
}

der2phiBY3=function(s,y,c3)
{
  s=as.double(s)
  Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0)))
  ds=Fs*(1-Fs)
  dev=log(1+exp(-abs(s)))+abs(s)*((y-0.5)*s<0)
  Gprim1=log(1+exp(-abs(s)))+abs(s)*(s<0)
  Gprim2=log(1+exp(-abs(s)))+abs(s)*(s>0)
  der2=(derpsiBY3(dev,c3)*(Fs-y)^2)+(ds*psiBY3(dev,c3))
  der2=der2+(ds*(1-2*Fs)*(psiBY3(Gprim1,c3)-psiBY3(Gprim2,c3)))
  der2=der2-(ds*((derpsiBY3(Gprim1,c3)*(1-Fs))+(derpsiBY3(Gprim2,c3)*Fs)))
  der2
}


GBY3Fs <- function(s,c3)
{
  Fs= exp(-(log(1+exp(-abs(s)))+abs(s)*(s<0)))
  resGinf=exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(-log(Fs))))-1)
  resGinf=(resGinf+(Fs*exp(-sqrt(-log(Fs)))))*as.numeric(s <= -log(exp(c3)-1))
  resGsup=((Fs*exp(-sqrt(c3)))+(exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(c3)))-1)))*as.numeric(s > -log(exp(c3)-1))
  return(resGinf+resGsup)
}


GBY3Fsm <- function(s,c3)
{
  Fsm=exp(-(log(1+exp(-abs(s)))+abs(s)*(s>0)))
  resGinf=exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(-log(Fsm))))-1)
  resGinf=(resGinf+(Fsm*exp(-sqrt(-log(Fsm)))))*as.numeric(s >= log(exp(c3)-1))
  resGsup=((Fsm*exp(-sqrt(c3)))+(exp(0.25)*sqrt(pi)*(pnorm(sqrt(2)*(0.5+sqrt(c3)))-1)))*as.numeric(s < log(exp(c3)-1))
  return(resGinf+resGsup)
}

sterby3 <- function(x0,y,const,estim)
{
  n=nrow(x0)
  p=ncol(x0)+1

  z=cbind(matrix(1,nrow=n),x0)
  argum=z %*% estim

  matM=matrix(data=0,nrow=p,ncol=p)
  IFsquar=matrix(data=0,nrow=p,ncol=p)
  for (i in 1:n)
{
myscalar=as.numeric(der2phiBY3(argum[i],y[i],const))
matM=matM+myscalar * (z[i,] %*% t(z[i,]))
IFsquar=IFsquar+myscalar^2 * (z[i,] %*% t(z[i,]))
}
  matM=matM/n
  matMinv=solve(matM)
  IFsquar=IFsquar/n
  asvBY=matMinv %*% IFsquar %*% t(matMinv)
  sqrt(diag(asvBY))/sqrt(n)
}
long2mat<-function(x,Sid.col,dep.col){
#
# Have data in a matrix or data frame, x
# Sid.col indicates Subject's id
# Here, each subject has one or more rows of data
#
# Goal: store the data in a data frame where
# each row contains all of the data for an individual
# subject.
#
# dep.col indicates column of the outcome (dependent) variable
# This version assumed a single column of outcome values are to be
# rearranged.
#
if(length(dep.col)!=1)stop("Argument dep.col must have a single value")
if(is.null(dim(x)))stop("x must be a matrix or data frame")
Sid=unique(x[,Sid.col])
n=nrow(x)
nid=length(Sid)
flag=(x[,Sid.col]==Sid[1])
num.out=sum(flag)
res=matrix(NA,nrow=nid,ncol=num.out)
for(i in 1:nid){
flag=(x[,Sid.col]==Sid[i])
res[i,]=x[flag,dep.col]
}
res
}


longcov2mat<-function(x,Sid.col,dep.col){
#
# Have data in a matrix or data frame, x
# Sid.col indicates Subject's id
# Here, each subject has one or more rows of data
#
# In a regression setting, each subject has
# one or more covariates corresponding to columns.
# For example, two covariates might be stored in columns
# 3 and 6.
#
# Goal: For ith subject, store the covariate data in
# list mode, which is a matrix.
# So for ith subject, store covariate data in z[[i]], say, which
# contains a matrix of dimension  m by p,
# m is the number of observations for ith subject and p
# the number of covariates.
#
# dep.col, having length p, indicates columns containe the covariates
# Column Sid.col indicates the column containing subject's id
#
if(is.null(dim(x)))stop("x must be a matrix or data frame")
Sid=unique(x[,Sid.col])
res=list()
nid=length(Sid)
p=length(dep.col)# Number of covariates for each subject
n=nrow(x)
flag=(x[,Sid.col]==Sid[1])
n.each.s=sum(flag) # the number of rows for each subject
ns=n/n.each.s # the number of  subjects
if(!is.wholenumber(ns))stop("Not all S's have same number of rows of data")
for(i in 1:ns){
#res[[i]]=matrix(NA,nrow=n.each.s,ncol=p)
flag=(x[,Sid.col]==Sid[i])
res[[i]]=as.matrix(x[flag,dep.col])
}
res
}
is.wholenumber <-
    function(x, tol = .Machine$double.eps^0.5)  abs(x - round(x)) < tol

long2g<-function(x,x.col,y.col,s.id,grp.id,regfun=tsreg,MAR=TRUE,tr=.2){
#
# x is a matrix or data frame.
#
# Longitudinal data, compare two groups, where the groups correspond to two
# values in column
# grp.id.
# The outcome (dependent) variable is assumed to be stored in
# the column indicated by the argument  y.col.
# Example, y.col=3 means the outcome variable of interest is in col. 3
# Predictors are stored in columns  indicated by
# x.col.
# s.id indicates column where subject's id is stored.
#
# Assuming data are stored as for example in the R variable
# Orthodont,
# which can be accessed via the command  library(nlme)
#
m=matsplit(x,grp.id)
g1=longreg(m$m1,x.col,y.col,s.id,regfun)$est.S
g2=longreg(m$m2,x.col,y.col,s.id,regfun)$est.S
res=list()
if(MAR){
for(iv in 1:ncol(g1))res[[iv]]=yuen(g1[,iv],g2[,iv],tr=tr)
}
if(!MAR)res=smean2(g1,g2)
res
}

longreg.plot<-function(x,x.col,y.col,s.id,regfun=tsreg,scat=TRUE,xlab="X",
ylab="Y"){
#
# x is a data frame or matrix
#
# Longitudinal data: plot regression lines
#
# For each subject, fit a regression line
# using outcome data in col y.col and predictors, usually times
# when measures were taken, in columns indicated by x.col.
# s.id indicates column where subject's id is stored.
#
# Assuming data are stored as for example in the R variable
# Orthodont,
# which can be accessed via the command  library(nlme)
# For this data set, x.col=2 would indicated that the
# participants age at the time of being measured, is used
# to predict the outcome variable.
#
ymat=long2mat(x,s.id,y.col) # matrix, ith row contains outcome y
#                           for the ith subject.
#
xvals=longcov2mat(x,s.id,x.col)# list mode
n=nrow(ymat)
p=length(x.col)+1
if(p!=2)stop("Plot allows a single covariate only")
outmat=matrix(NA,nrow=n,ncol=p)
datx=NULL
daty=NULL
for(i in 1:n){
outmat[i,]=regfun(as.matrix(xvals[[i]]),ymat[i,])$coef
temp=as.matrix(xvals[[i]])
datx=c(datx,temp)
daty=c(daty,ymat[i,])
}
if(!scat)plot(datx,daty,type="n",xlab=xlab,ylab=ylab)
if(scat)plot(datx,daty,xlab=xlab,ylab=ylab)
for(i in 1:n)abline(outmat[i,1],outmat[i,2])
}

hotel1.tr<-function(x,null.value=0,tr=.2) {
#
# Perform a trimmed analog of Hotelling's (one-sample) T^2 test
# That is, for p-variate data, test the hypothesis that the p marginal
# trimmed means are equal to the value specified by
# the argument null.value
#
if (is.data.frame(x))
        x <- as.matrix(x)
x=elimna(x)
    if(!is.matrix(x))
        stop("'x' must be a numeric matrix or a data frame")
    n <- nrow(x)
    p <- ncol(x)
    mu=null.value
xbar=apply(x,2,mean,tr=tr)
    if(!is.numeric(mu) || ((lmu <- length(mu)) > 1 & lmu != p))
        stop("'null.value' must be a numeric vector of length ", p)
if(lmu == 1) mu <- rep(mu, p)
    xbar.mu <- xbar - mu
    V <- winall(x,tr=tr)$cov
h=n-2*floor(n*tr)
        k <- h / (n - 1) * (h - p) / p
        stat <- k * crossprod(xbar.mu, solve(V, xbar.mu))[1, ]
        pvalue <- 1 - pf(stat, p, h - p)
list(test.statistic = stat, degrees_of_freedom = c(p, h - p), p.value =
pvalue, estimate = xbar,
                null.value = mu)
}

hotel1<-function(x,null.value=0,tr=0) {
#
# Perform a trimmed analog of Hotelling's (one-sample) T^2 test
# That is, for p-variate data, test the hypothesis that the p marginal
# trimmed means are equal to the value specified by
# the argument null.value
#
if (is.data.frame(x))
        x <- as.matrix(x)
x=elimna(x)
    if(!is.matrix(x))
        stop("'x' must be a numeric matrix or a data frame")
    n <- nrow(x)
    p <- ncol(x)
    mu=null.value
xbar=apply(x,2,mean,tr=tr)
    if(!is.numeric(mu) || ((lmu <- length(mu)) > 1 & lmu != p))
        stop("'null.value' must be a numeric vector of length ", p)
if(lmu == 1) mu <- rep(mu, p)
    xbar.mu <- xbar - mu
    V <- winall(x,tr=tr)$cov
h=n-2*floor(n*tr)
        k <- h / (n - 1) * (h - p) / p
        stat <- k * crossprod(xbar.mu, solve(V, xbar.mu))[1, ]
        pvalue <- 1 - pf(stat, p, h - p)
list(test.statistic = stat, degrees_of_freedom = c(p, h - p), p.value =
pvalue, estimate = xbar,
                null.value = mu)
}

wwmcp<-function(J,K,x,tr=.2,alpha=.05,dif=TRUE){
#
# Do all multiple comparisons for a within-by-within design
# using trimmed means
#
conM=con2way(J,K)
A=rmmcp(x,con=conM$conA,tr=tr,alpha=alpha,dif=dif)
B=rmmcp(x,con=conM$conB,tr=tr,alpha=alpha,dif=dif)
AB=rmmcp(x,con=conM$conAB,tr=tr,alpha=alpha,dif=dif)
list(Factor_A=A,Factor_B=B,Factor_AB=AB)
}

wwmcpbt<-function(J,K,x, tr=.2, alpha = 0.05, nboot = 599){
#
# Do multiple comparisons for a within-by-within design.
# using a bootstrap-t method and trimmed means.
# All linear contrasts relevant to main effects and interactions
# are tested.
#
conM=con2way(J,K)
A=lindepbt(x,con=conM$conA,alpha=alpha,tr=tr,nboot=nboot)
B=lindepbt(x,con=conM$conB,alpha=alpha,tr=tr,nboot=nboot)
AB=lindepbt(x,con=conM$conAB,alpha=alpha,tr=tr,nboot=nboot)
list(Factor_A=A,Factor_B=B,Factor_AB=AB)
}


wwmcppb<-function(J,K,x, alpha = 0.05, con = 0,est=tmean, plotit = FALSE,
    dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = T, xlab = "Group 1",
    ylab = "Group 2", pr = TRUE, SEED = TRUE, ...){
#
# Do all multiple comparisons for a within-by-within design.
# using a percentile bootstrap method and trimmed means
#
conM=con2way(J,K)
A=rmmcppb(x,con=conM$conA,alpha=alpha,dif=dif,plotit=plotit,est=est,
nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...)
B=rmmcppb(x,con=conM$conB,alpha=alpha,dif=dif,
plotit=plotit,est=est,nboot=nboot,BA=BA,hoch=hoch,
SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...)
AB=rmmcppb(x,con=conM$conAB,alpha=alpha,dif=dif,plotit=plotit,est=est,
nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...)
list(Factor_A=A,Factor_B=B,Factor_AB=AB)
}

wmcppb<-function(x, alpha = 0.05, con = 0,est=tmean, plotit = FALSE,
    dif = TRUE, grp = NA, nboot = NA, BA = TRUE, hoch = TRUE, xlab = "Group 1",
    ylab = "Group 2", pr = TRUE, SEED = TRUE, ...){
#
# Do all multiple comparisons for a repeated measures design.
# using a percentile bootstrap method and trimmed means
#
A=rmmcppb(x,con=con,alpha=alpha,dif=dif,plotit=plotit,est=est,
nboot=nboot,BA=BA,hoch=hoch,SEED=SEED,xlab=xlab,ylab=ylab,pr=pr,...)
A
}
lindepbt<-function(x, con = NULL, tr = 0.2, alpha = 0.05,nboot=599,dif=TRUE,
SEED=TRUE){
#
# MCP on trimmed means with FWE controlled with Rom's method
# Using a bootstrap-t method.
#
#  dif=T, difference scores are used. And for linear contrasts a simple
#  extension is used.
#
#  dif=F, hypotheses are tested based on the marginal trimmed means.
#
if(SEED)set.seed(2)
if(is.data.frame(x))x=as.matrix(x)
if(is.list(x))x=matl(x)
if(is.null(con))con=con2way(1,ncol(x))$conB # all pairwise
x=elimna(x)
n=nrow(x)
flagcon=F
if(!is.matrix(x))x<-matl(x)
if(!is.matrix(x))stop("Data must be stored in a matrix or in list mode.")
con<-as.matrix(con)
J<-ncol(x)
xbar<-vector("numeric",J)
nval<-nrow(x)
h1<-nrow(x)-2*floor(tr*nrow(x))
df<-h1-1
xbar=apply(x,2,mean,tr=tr)
if(sum(con^2!=0))CC<-ncol(con)
ncon<-CC
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01)dvec<-alpha/c(1:ncon)
if(nrow(con)!=ncol(x))warning("The number of groups does not match the number
 of contrast coefficients.")
ncon<-ncol(con)
psihat<-matrix(0,ncol(con),4)
dimnames(psihat)<-list(NULL,c("con.num","psihat","ci.lower","ci.upper"))
test<-matrix(0,ncol(con),5)
dimnames(test)<-list(NULL,c("con.num","test","p.value","p.crit","se"))
temp1<-NA
for (d in 1:ncol(con)){
psihat[d,1]<-d
#
#  !dif  Use marginal trimmed means
#
if(!dif){
psihat[d,2]<-sum(con[,d]*xbar)
#
#
sejk<-0
for(j in 1:J){
for(k in 1:J){
djk<-(nval-1)*wincor(x[,j],x[,k], tr)$cov/(h1*(h1-1))
sejk<-sejk+con[j,d]*con[k,d]*djk
}}
sejk<-sqrt(sejk)
test[d,1]<-d
test[d,2]<-sum(con[,d]*xbar)/sejk
test[d,5]<-sejk
#
# now use boostrap-t to determine p-value
#
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
xcen=x
for(j in 1:ncol(x))xcen[,j]=xcen[,j]-tmean(x[,j],tr=tr)
bvec=apply(data,1,lindep.sub,xcen,con[,d],tr)
bsort<-sort(abs(bvec))
ic<-round((1-alpha)*nboot)
ci<-0
psihat[d,3]<-psihat[d,2]-bsort[ic]*test[d,5]
psihat[d,4]<-psihat[d,2]+bsort[ic]*test[d,5]
p.value<-mean(abs(test[d,2])<=abs(bvec))
temp1[d]=p.value
}
if(dif){
for(j in 1:J){
if(j==1)dval<-con[j,d]*x[,j]
if(j>1)dval<-dval+con[j,d]*x[,j]
}
temp=trimcibt(dval,tr=tr,alpha=alpha,nboot=nboot,pr=FALSE)
temp1[d]<-temp$p.value #trimci(dval,tr=tr,pr=FALSE)$p.value
test[d,1]<-d
test[d,5]<-trimse(dval,tr=tr)
psihat[d,2]<-mean(dval,tr=tr)
psihat[d,3]<-temp$ci[1] #psihat[,2]-qt(1-test[,4]/2,df)*test[,5]
psihat[d,4]<-temp$ci[2] #psihat[,2]+qt(1-test[,4]/2,df)*test[,5]
}}
#
#   d ends here
#
test[,3]<-temp1
temp2<-order(0-temp1)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2,3]>=zvec)
test[temp2,4]<-zvec
if(flagcon)num.sig<-sum(test[,4]<=test[,5])
if(!flagcon)num.sig<-sum(test[,3]<=test[,4])
list(test=test,psihat=psihat,con=con,num.sig=num.sig)
}

lindep.sub<-function(data,x,con=con,tr=tr){
con=as.matrix(con)
res=rmmcp(x[data,],con=con,tr=tr,dif=F)$test[,2]
res
}

mcp.nestAP<-function(x,tr=.2){
#
# Nested ANOVA
#
# Strategy: for each level of factor A, pool the data
# and then perform the analysis
#
# x is assumed to have list mode with length J,
# the number of independent groups.
#
# x[[1]] contains an n by K matrix, the nested data
# for the first level of the first factor.
# x[[2]] contains an n by K matrix, the nested data
# for the second level of the first factor, etc.
#
 xx=list()
for(j in 1: length(x))xx[[j]]=as.vector(x[[j]])
results=lincon(xx,tr=tr)
results
}

outmgvad<-function(m,center=NA,plotit=TRUE,op=1,
xlab="VAR 1",ylab="VAR 2",rate=.05,iter=100,ip=6,pr=T){
#
# Adjusts the critical value, gval used by outmgv,
# so that the outside rate per observation, under normality
# is approximatley equal to the value given by the argument
# rate, which defaults to .05.
# That is, expected proportion of points declared outliers under normality
# is intended to be rate=.05
#
# When dealing with p-variate data, p>9, this adjustment can be crucial
#
m=elimna(m)
n=nrow(m)
newgval=sqrt(qchisq(.975,ncol(m)))
z=array(rmul(n*iter*ncol(m)),c(iter,n,ncol(m)))
newq=0
gtry=NA
for(itry in 1:ip){
newq=newq+9/10^itry
gtry[itry]=newq
}
gtry=c(.95,.975,gtry[-1])
if(pr)print("Computing adjustment")
for(itry in 1:ip){
for(i in 1:iter){
temp=outmgv.v2(z[i,,],gval=gval,op=op)$out.id
val[i]=length(temp)
}
erate=mean(val)/n
if(erate<rate){
newgval=sqrt(qchisq(gtry[itry],ncol(m)))
break
}}
res=outmgv(m,gval=newgval,plotit=TRUE,op=op, xlab = xlab, ylab = ylab)
list(results=res,used.gval=newgval)
}


outmgv.v2<-function(x,outfun=outbox,se=TRUE,op=1,
gval=sqrt(qchisq(.975,ncol(x))),
cov.fun=rmba,xlab="X",ylab="Y",SEED=TRUE,...){
#
# Check for outliers using mgv method
#
m<-x
m=elimna(m)
temp<-mgvar(m,se=se,op=op,cov.fun=cov.fun,SEED=SEED)
temp[is.na(temp)]<-0
temp2<-outbox(temp,mbox=TRUE,gval=sqrt(qchisq(.975,ncol(m))))$out.id
vec<-c(1:nrow(m))
flag<-rep(T,nrow(m))
flag[temp2]<-F
vec<-vec[flag]
vals<-c(1:nrow(m))
keep<-vals[flag]
list(out.id=temp2,keep=keep)
}

out3d<-function(x,outfun=outpro,xlab="Var 1",ylab="Var 2",zlab="Var 3",
reg.plane=FALSE,regfun=tsreg,COLOR=FALSE,tick.marks=TRUE,...){
#
# Create a 3D plot of points and indicate outliers with red dots.
#
#  Assumes that the package scatterplot3d has been installed.
#  If not, use the command install.packages("scatterplot3d")
#  assuming you are connected to the web.
#
# To add a regression plane, set
#  reg.plane=T.The regression method used is specified with the argument
#  regfun.
# First two columns are taken to be predictors and third column is the outcome
#
#  Package scatterplot3d is required. To install it, use the command
#  install.packages("scatterplot3d")
#  while connected to the web
#
if(!is.matrix(x) && !is.data.frame(x))stop("Data must be stored in a matrix
or data frame with 3 columns.")
if(ncol(x)!=3)stop("Data must be stored in a matrix with 3 columns.")
x=as.matrix(x)
x<-elimna(x)
library(scatterplot3d)
temp<-scatterplot3d(x,xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=tick.marks)
outid<-outfun(x)$out.id
if(!is.na(outid[1])){
if(COLOR){
if(length(outid)==1)temp$points(t(as.matrix(x[outid,])),col="red")
if(length(outid)>1)temp$points(x[outid,],col="red")
}
if(!COLOR){
if(length(outid)==1)temp$points(t(as.matrix(x[outid,])),pch="*")
if(length(outid)>1)temp$points(x[outid,],pch="*")
}
}
if(reg.plane){
vals<-regfun(x[,1:2],x[,3],...)$coef
if(COLOR)temp$plane(vals,col="blue")
if(!COLOR)temp$plane(vals)
}
}

ees.ci<-function(x,y,SEED=TRUE,nboot=400,tr=.2,alpha=.05,pr=T){
#
# Compute a 1-alpha  confidence interval
# for a robust, heteroscedastic  measure of effect size
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
x=elimna(x)
y=elimna(y)
bvec=0
datax<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(y,size=length(x)*nboot,replace=TRUE),nrow=nboot)
for(i in 1:nboot){
bvec[i]=yuenv2(datax[i,],datay[i,],tr=tr,SEED=F)$Var.Explained
}
bvec<-sort(bvec)
crit<-alpha/2
icl<-round(crit*nboot)+1
icu<-nboot-icl
ci<-NA
ci[1]<-bvec[icl]
pchk=yuen(x,y,tr=tr)$p.value
if(pchk>alpha)ci[1]=0
ci[2]<-bvec[icu]
if(ci[1]<0)ci[1]=0
ci=sqrt(ci)
ci
}
wwwtrimbt<-function(J, K,L, x, tr = 0.2, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 599,SEED = TRUE, ...){
        #
        # A bootstrap-t for a within-by-within-by-within omnibus tests
        #  for all main effects and interactions
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
#
#  within-by-within-by-within design
#
#  JKL dependent groups
#

       #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
if(is.data.frame(x))x=as.matrix(x)
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
ncon=ncol(con)
 p <- J*K*L
JKL=p
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
KL=K*L
#        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
xx[[j]]=x[[grp[j]]] # save input data
data[[j]] = xx[[j]] - mean(xx[[j]], tr = tr)
#                # Now have the groups in proper order.
        }
        if(SEED)set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        # Next determine the n_j values
        bsam = list()
        bdat = list()
aboot=NA
bboot=NA
cboot=NA
abboot=NA
acboot=NA
bcboot=NA
abcboot=NA
test.stat=wwwtrim(J,K,L,xx,tr=tr)
nv=length(x[[1]])
        for(ib in 1:nboot) {
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in 1:JKL) bsam[[k]] = data[[k]][bdat[[j]]]
temp=wwwtrim(J,K,L,bsam,tr=tr)
aboot[ib]=temp$Qa
bboot[ib]=temp$Qb
cboot[ib]=temp$Qc
acboot[ib]=temp$Qac
bcboot[ib]=temp$Qbc
abboot[ib]=temp$Qab
abcboot[ib]=temp$Qabc
}
pbA=NA
pbB=NA
pbC=NA
pbAB=NA
pbAC=NA
pbBC=NA
pbABC=NA
pbA=mean(test.stat$Qa[1,1]<aboot)
pbB=mean(test.stat$Qb[1,1]<bboot)
pbC=mean(test.stat$Qc[1,1]<cboot)
pbAB=mean(test.stat$Qab[1,1]<abboot)
pbAC=mean(test.stat$Qac[1,1]<acboot)
pbBC=mean(test.stat$Qbc[1,1]<bcboot)
pbABC=mean(test.stat$Qabc[1,1]<abcboot)
list(p.value.A=pbA,p.value.B=pbB,p.value.C=pbC,p.value.AB=pbAB,
p.value.AC=pbAC,p.value.BC=pbBC,p.value.ABC=pbABC)
}



bwwtrimbt<-function(J, K, L, x, tr = 0.2, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 599, SEED = TRUE, ...)
{
        #
        # A bootstrap-t for omnibus tests associated with
        # all main effects and interactions.
        # in a between-by-within-within design.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
if(is.data.frame(x))data=as.matrix(x)
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x)) y[[j]] <- x[, j]
                x <- y
}

        conM = con3way(J,K,L)
 p <- J*K*L
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
KL=K*L
        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
                # Now have the groups in proper order.
                data[[j]] = data[[j]] - mean(data[[j]], tr = tr)
        }
        x <- data  # centered data xx has original data
test=bwwtrim(J,K,L,xx,tr=tr)
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        bsam = list()
        bdat = list()
aboot=NA
bboot=NA
cboot=NA
abboot=NA
acboot=NA
bcboot=NA
abcboot=NA
        for(ib in 1:nboot) {
                ilow <- 1 - KL
                iup = 0
 for(j in 1:J) {
 ilow <- ilow + KL
 iup = iup + KL
nv=length(x[[ilow]])
 bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
}
temp=bwwtrim(J,K,L,bsam,tr=tr)
aboot[ib]=temp$Qa
bboot[ib]=temp$Qb
cboot[ib]=temp$Qc
acboot[ib]=temp$Qac
bcboot[ib]=temp$Qbc
abboot[ib]=temp$Qab
abcboot[ib]=temp$Qabc
        }
pbA=NA
pbB=NA
pbC=NA
pbAB=NA
pbAC=NA
pbBC=NA
pbABC=NA
pbA=mean(test$Qa[1,1]<aboot)
pbB=mean(test$Qb[1,1]<bboot)
pbC=mean(test$Qc[1,1]<cboot)
pbAB=mean(test$Qab[1,1]<abboot)
pbAC=mean(test$Qac[1,1]<acboot)
pbBC=mean(test$Qbc[1,1]<bcboot)
pbABC=mean(test$Qabc[1,1]<abcboot)
list(p.value.A=pbA,p.value.B=pbB,p.value.C=pbC,p.value.AB=pbAB,
p.value.AC=pbAC,p.value.BC=pbBC,p.value.ABC=pbABC)

}
bbwtrimbt<-function(J, K, L, x, tr = 0.2, JKL = J * K*L, con = 0,
 alpha = 0.05, grp =c(1:JKL), nboot = 599, SEED = TRUE, ...)
{
        #
        # Bootstrap-t for omniubs tests associated with
        # all main effects and interactions
        # for a between-by-between-within design.
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level for all three factors: levels 1,1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and second factor and level 2 of the third: level 1,1,2
        #  x[[K]] is the data for level 1,1,L
        #  x[[L+1]] is the data for level 1,2,1, x[[2K]] is level 1,2,2, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
if(is.data.frame(x))data=as.matrix(x)
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x)) y[[j]] <- x[, j]
                x <- y
}

#        conM = con3way(J,K,L)
 p <- J*K*L
if(p>length(x))stop("JKL is less than the Number of groups")
JK=J*K
        v <- matrix(0, p, p)
        data <- list()
xx=list()
        for(j in 1:length(x)) {
                data[[j]] <- x[[grp[j]]]
xx[[j]]=x[[grp[j]]] # save input data
                # Now have the groups in proper order.
                data[[j]] = data[[j]] - mean(data[[j]], tr = tr)
        }
#ilow=0-L
#iup=0
#for(j in 1:JK){
#ilow <- ilow + L
# iup = iup + L
#sel <- c(ilow:iup)
#xx[sel]=listm(elimna(matl(xx[sel])))
# v[sel, sel] <- covmtrim(xx[sel], tr)
#                }
test.stat=bbwtrim(J,K,L,xx,tr=tr)
        x <- data  # Centered data
#        jp <- 1 - K
#        kv <- 0
        if(SEED)
                set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        testA = NA
        testB = NA
        testC=NA
        testAB = NA
        testAC = NA
        testBC = NA
        testABC = NA
        bsam = list()
        bdat = list()
aboot=NA
bboot=NA
cboot=NA
abboot=NA
acboot=NA
bcboot=NA
abcboot=NA
nvec=NA
        for(j in 1:JK){
                nvec[j] = length(x[[j]])
        for(ib in 1:nboot) {
                ilow <- 1 - L
                iup = 0
 for(j in 1:JK) {
 ilow <- ilow + L
 iup = iup + L
nv=length(x[[ilow]])
 bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in ilow:iup){
 bsam[[k]] = x[[k]][bdat[[j]]]
}
}
temp=bbwtrim(J,K,L,bsam,tr=tr)
aboot[ib]=temp$Qa
bboot[ib]=temp$Qb
cboot[ib]=temp$Qc
acboot[ib]=temp$Qac
bcboot[ib]=temp$Qbc
abboot[ib]=temp$Qab
abcboot[ib]=temp$Qabc
}}
pbA=NA
pbB=NA
pbC=NA
pbAB=NA
pbAC=NA
pbBC=NA
pbABC=NA
pbA=mean(test.stat$Qa[1,1]<aboot)
pbB=mean(test.stat$Qb[1,1]<bboot)
pbC=mean(test.stat$Qc[1,1]<cboot)
pbAB=mean(test.stat$Qab[1,1]<abboot)
pbAC=mean(test.stat$Qac[1,1]<acboot)
pbBC=mean(test.stat$Qbc[1,1]<bcboot)
pbABC=mean(test.stat$Qabc[1,1]<abcboot)
list(p.value.A=pbA,p.value.B=pbB,p.value.C=pbC,p.value.AB=pbAB,
p.value.AC=pbAC,p.value.BC=pbBC,p.value.ABC=pbABC)
}
bwtrimbt<-function(J,K,x,tr=.2,alpha=.05,JK=J*K,grp=c(1:JK),nboot=599,
SEED=TRUE,monitor=F){
#
# A bootstrap-t for performing a split-plot design
# with trimmed means.
# By default, 20% trimming is used with B=599 bootstrap samples.
#
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix.
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(SEED)set.seed(2)
if(is.matrix(x)) {
y <- list()
ik=0
il=c(1:K)-K
for(j in 1:J){
il=il+K
zz=x[,il]
zz=elimna(zz)
for(k in 1:K){
ik=ik+1
y[[ik]]=zz[,k]
}}
                x <- y
}
JK<-J*K
data<-list()
xcen<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
xcen[[j]]<-data[[j]]-mean(data[[j]],tr) # Centered data
}
x<-data
#
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[j]])
}
blist<-list()
print("Taking bootstrap samples. Please wait.")
testmat<-matrix(NA,ncol=3,nrow=nboot)
for(iboot in 1:nboot){
iv<-0
for(j in 1:J){
temp<-sample(nvec[j],replace = T)
for(k in 1:K){
iv<-iv+1
tempx<-xcen[[iv]]
blist[[iv]]<-tempx[temp]
}}
if(monitor)print(paste("Bootstrap iteration" ,iboot, "is complete"))
btest<-tsplit(J,K,blist,tr)
testmat[iboot,1]<-btest$Qa
testmat[iboot,2]<-btest$Qb
testmat[iboot,3]<-btest$Qab
}
lcrit<-round((1-alpha)*nboot)
temp3<-sort(testmat[,1])
crit.Qa<-temp3[lcrit]
temp3<-sort(testmat[,2])
crit.Qb<-temp3[lcrit]
temp3<-sort(testmat[,3])
crit.Qab<-temp3[lcrit]
temp4<-tsplit(J,K,x,tr=tr)
list(Qa=temp4$Qa,Qb=temp4$Qb,Qab=temp4$Qab,crit.Qa=crit.Qa,crit.Qb=crit.Qb,crit.Qab=crit.Qab)
}
bwtrimbt<-function(J,K,x,tr=.2,JK=J*K,grp=c(1:JK),nboot=599,
SEED=TRUE){
#
# A bootstrap-t for performing a split-plot design
# with trimmed means.
# By default, 20% trimming is used with B=599 bootstrap samples.
#
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix or a data frame
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(SEED)set.seed(2)
if(is.data.frame(x) || is.matrix(x)) {
y <- list()
ik=0
il=c(1:K)-K
for(j in 1:J){
il=il+K
zz=x[,il]
zz=elimna(zz)
for(k in 1:K){
ik=ik+1
y[[ik]]=zz[,k]
}}
                x <- y
}
JK<-J*K
data<-list()
xcen<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
xcen[[j]]<-data[[j]]-mean(data[[j]],tr) # Centered data
}
x<-data
#
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[j]])
}
blist<-list()
print("Taking bootstrap samples. Please wait.")
testmat<-matrix(NA,ncol=3,nrow=nboot)
for(iboot in 1:nboot){
iv<-0
for(j in 1:J){
temp<-sample(nvec[j],replace = T)
for(k in 1:K){
iv<-iv+1
tempx<-xcen[[iv]]
blist[[iv]]<-tempx[temp]
}}
btest<-tsplit(J,K,blist,tr)
testmat[iboot,1]<-btest$Qa
testmat[iboot,2]<-btest$Qb
testmat[iboot,3]<-btest$Qab
}
test=tsplit(J,K,x,tr=tr)
pbA=mean(test$Qa[1]<testmat[,1])
pbB=mean(test$Qb[1]<testmat[,2])
pbAB=mean(test$Qab[1]<testmat[,3])
list(p.value.A=pbA,p.value.B=pbB,p.value.AB=pbAB)
}

dtrimpb<-function(x,y=NULL,alpha=.05,con=0,est=tmean,plotit=TRUE,dif=TRUE,grp=NA,
hoch=TRUE,nboot=NA,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,BA=FALSE,...){
#
#   Use a percentile bootstrap method to  compare
#   medians of dependent groups.
#
#   This is essentially the function rmmcppb, but set to compare medians
#   by default.
#   And it is adjusted to handle tied values.
#
#   By default,
#   compute a .95 confidence interval for all linear contasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#   If con is not specified, all pairwise comparisons are done.
#
#   A sequentially rejective method
#   is used to control the probability of at least one Type I error.
#
#   dif=T indicates that difference scores are to be used
#   dif=F indicates that measure of location associated with
#   marginal distributions are used instead.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of contrasts there are.
#
#   x can be an n by J matrix or it can have list mode
#   for two groups, data for second group can be put in y
#   otherwise, assume x is a matrix (n by J) or has list mode.
#
#
if(dif){
if(pr)print("dif=T, so analysis is done on difference scores")
temp<-rmmcppbd(x,y=y,alpha=.05,con=con,est=est,plotit=plotit,grp=grp,
nboot=nboot,hoch=hoch,...)
output<-temp$output
con<-temp$con
}
if(!dif){
if(pr)print("dif=F, so analysis is done on marginal distributions")
if(!is.null(y[1]))x<-cbind(x,y)
if(is.data.frame(x))x=as.matrix(x)

if(!is.list(x) && !is.matrix(x))
stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))
stop("The number of rows in con is not equal to the number of groups.")
}}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matl(x)
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))
stop("The number of rows in con is not equal to the number of groups.")
mat<-x
}
if(is.matrix(x))mat<-x
if(!is.na(sum(grp)))mat<-mat[,grp]
mat<-elimna(mat) # Remove rows with missing values.
x<-mat
J<-ncol(mat)
xcen<-x
for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j])
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
if(is.na(nboot)){
if(d<=4)nboot<-1000
if(d>4)nboot<-5000
}
n<-nrow(mat)
crit.vec<-alpha/c(1:d)
connum<-ncol(con)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xbars<-apply(mat,2,est)
psidat<-NA
for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars)
psihat<-matrix(0,connum,nboot)
psihatcen<-matrix(0,connum,nboot)
bvec<-matrix(NA,ncol=J,nrow=nboot)
bveccen<-matrix(NA,ncol=J,nrow=nboot)
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib,]<-apply(x[data[ib,],],2,est,...)
bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...)
}
#
# Now have an nboot by J matrix of bootstrap values.
#
test<-1
bias<-NA
tval<-NA
tvalcen<-NA
for (ic in 1:connum){
psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic])
psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic])
tvalcen[ic]<-sum((psihatcen[ic,]==0))/nboot
bias[ic]<-sum((psihatcen[ic,]>0))/nboot+sum((psihatcen[ic,]==0))/nboot-.5
tval[ic]<-sum((psihat[ic,]==0))/nboot
if(BA){
test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic]-.1*bias[ic]
if(test[ic]<0)test[ic]<-0
}
if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic]
test[ic]<-min(test[ic],1-test[ic])
}
test<-2*test
ncon<-ncol(con)
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(hoch)dvec<-alpha/(2* c(1:ncon))
dvec<-2*dvec
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvecba<-dvec
dvec[1]<-alpha/2
}
if(plotit && ncol(bvec)==2){
z<-c(0,0)
one<-c(1,1)
plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n")
points(bvec)
totv<-apply(x,2,est,...)
cmat<-var(bvec)
dis<-mahalanobis(bvec,totv,cmat)
temp.dis<-order(dis)
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
if(BA)zvec<-dvecba[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p-value","p.crit",
"ci.lower","ci.upper"))
tmeans<-apply(mat,2,est,...)
psi<-1
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
output[temp2,4]<-zvec
temp<-sort(psihat[ic,])
icl<-round(output[ic,4]*nboot/2)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}
wwtrimbt<-function(J, K, x, tr = 0.2, JK = J*K, con = 0,
 alpha = 0.05, grp =c(1:JK), nboot = 599,SEED = TRUE, ...){
        #
        # A bootstrap-t for a within-by-within omnibus tests
        #  for all main effects and interactions
        #
        #  The R variable x is assumed to contain the raw
        #  data stored in list mode or in a matrix.
        #  If in list mode, x[[1]] contains the data
        #  for the first level of both factors: level 1,1.
        #  x[[2]] is assumed to contain the data for level 1 of the
        #  first factor and level 2 of the second: level 1,2
        #  x[[K]] is the data for level 1,K
        #  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
        #
        #  If the data are in a matrix, column 1 is assumed to
        #  correspond to x[[1]], column 2 to x[[2]], etc.
        #
        #  When in list mode x is assumed to have length JK, the total number
        #  groups being tested, but a subset of the data can be analyzed
        #  using grp
        #
if(is.data.frame(x))x=as.matrix(x)
        if(is.matrix(x)) {
                y <- list()
                for(j in 1:ncol(x))
                        y[[j]] <- x[, j]
x=y
}
ncon=ncol(con)
 p <- J*K
JK=p
if(p>length(x))stop("JK is less than the Number of groups")
JK=J*K
        data <- list()
xx=list()
        for(j in 1:length(x)) {
xx[[j]]=x[[grp[j]]] # save input data
data[[j]] = xx[[j]] - mean(xx[[j]], tr = tr)
#                # Now have the groups in proper order.
        }
        if(SEED)set.seed(2)
        # set seed of random number generator so that
        #             results can be duplicated.
        bsam = list()
        bdat = list()
aboot=NA
bboot=NA
cboot=NA
abboot=NA
test.stat=wwtrim(J,K,xx,tr=tr)
nv=length(x[[1]])
        for(ib in 1:nboot) {
bdat[[j]] = sample(nv, size = nv, replace =T)
for(k in 1:JK) bsam[[k]] = data[[k]][bdat[[j]]]
temp=wwtrim(J,K,bsam,tr=tr)
aboot[ib]=temp$Qa
bboot[ib]=temp$Qb
abboot[ib]=temp$Qab
}
pbA=NA
pbB=NA
pbAB=NA
pbA=mean(test.stat$Qa[1,1]<aboot)
pbB=mean(test.stat$Qb[1,1]<bboot)
pbAB=mean(test.stat$Qab[1,1]<abboot)
list(p.value.A=pbA,p.value.B=pbB,p.value.AB=pbAB)
}



sband<-function(x,y,
crit=1.36*sqrt((length(x)+length(y))/(length(x)*length(y))),flag=FALSE,plotit=TRUE,
sm=TRUE,op=1,xlab="First Group",ylab="Delta",pr=TRUE){
#
#  Compute a confidence band for the shift function.
#  Assuming two independent groups are being compared
#
#  The default critical value is the approximate .05 critical value.
#
#  If flag is F (false), the exact significance level is not computed.
#
#  If plotit=TRUE, a plot of the shift function is created, assuming that
#  the graphics window has already been activated.
#
#  This function removes all missing observations.
#
#  When plotting, the median of x is marked with a + and the two
#  quaratiles are marked with o.
#
#  sm=T, shift function is smoothed using:
#  op!=1, running interval smoother,
#  otherwise use lowess.
#
x<-x[!is.na(x)]  # Remove missing values from x.
y<-y[!is.na(y)]  # Remove missing values from y.
plotit<-as.logical(plotit)
flag<-as.logical(flag)
pc<-NA
if(flag){
if(pr)print("Computing the exact value of the probability coverage")
pc<-1-kssig(length(x),length(y),crit)
chk=sum(duplicated(x,y))
if(chk>0)pc=1-kstiesig(x,y,crit)
}
xsort<-sort(x)
ysort<-c(NA,sort(y))
l<-0
u<-0
ysort[length(y)+1+1]<-NA
for(ivec in 1:length(x))
{
isub<-max(0,ceiling(length(y)*(ivec/length(x)-crit)))
l[ivec]<-ysort[isub+1]-xsort[ivec]
isub<-min(length(y)+1,floor(length(y)*(ivec/length(x)+crit))+1)
u[ivec]<-ysort[isub+1]-xsort[ivec]
}
num<-length(l[l>0 & !is.na(l)])+length(u[u<0 & !is.na(u)])
qhat<-c(1:length(x))/length(x)
m<-matrix(c(qhat,l,u),length(x),3)
dimnames(m)<-list(NULL,c("qhat","lower","upper"))
if(plotit){
xsort<-sort(x)
ysort<-sort(y)
del<-0
for (i in 1:length(x)){
ival<-round(length(y)*i/length(x))
if(ival<=0)ival<-1
if(ival>length(y))ival<-length(y)
del[i]<-ysort[ival]-xsort[i]
}
xaxis<-c(xsort,xsort)
yaxis<-c(m[,1],m[,2])
allx<-c(xsort,xsort,xsort)
ally<-c(del,m[,2],m[,3])
temp2<-m[,2]
temp2<-temp2[!is.na(temp2)]
plot(allx,ally,type="n",ylab=ylab,xlab=xlab)
ik<-rep(F,length(xsort))
if(sm){
if(op==1){
ik<-duplicated(xsort)
del<-lowess(xsort,del)$y
}
if(op!=1)del<-runmean(xsort,del,pyhat=TRUE)
}
lines(xsort[!ik],del[!ik])
lines(xsort,m[,2],lty=2)
lines(xsort,m[,3],lty=2)
temp<-summary(x)
text(temp[3],min(temp2),"+")
text(temp[2],min(temp2),"o")
text(temp[5],min(temp2),"o")
}
list(m=m,crit=crit,numsig=num,pc=pc)
}
yhbt<-function(x,y,tr=.2,alpha=.05,nboot=600,SEED=TRUE,PV=F){
#
#  Compute a 1-alpha confidence interval for the difference between
#  the trimmed means corresponding to two independent groups.
#  The bootstrap-t method with Hall's transformation is used.
#
if(SEED)set.seed(2) # set seed of random number generator so that
#                    results can be duplicated.
x<-x[!is.na(x)]  # Remove missing values in x
y<-y[!is.na(y)]  # Remove missing values in y
xcen<-x-mean(x,tr)
ycen<-y-mean(y,tr)
print("Taking bootstrap samples. Please wait.")
datax<-matrix(sample(xcen,size=length(x)*nboot,replace=TRUE),nrow=nboot)
datay<-matrix(sample(ycen,size=length(y)*nboot,replace=TRUE),nrow=nboot)
val<-NA
for(ib in 1:nboot)val[ib]<-yhall(datax[ib,],datay[ib,],tr=tr,alpha)$test.stat
temp<-yhall(x,y,tr=tr)
sigtil<-temp$sig.tilda
nhat<-temp$nu.tilda
val<-sort(val)
dif<-mean(x,tr=tr)-mean(y,tr=tr)
ilow<-round(alpha*nboot/2)
il<-ilow+1
uval<-nboot-ilow
b.low<-3*((1+nhat*val[il]-nhat/6)^{1/3})/nhat-3/nhat
uval<-nboot-ilow
b.low<-3*((1+nhat*val[il]-nhat/6)^{1/3})/nhat-3/nhat
b.hi<-3*((1+nhat*val[uval]-nhat/6)^{1/3})/nhat-3/nhat
ci.LOW<-dif-sigtil*b.hi
ci.UP<-dif-sigtil*b.low
pv=NULL
if(PV){
#  Determine p-value
pv=1
flag=F
if(dif !=0){
alpha=seq(1:100)/1000
for(i in 1:length(alpha)){
ilow<-round(alpha[i]*nboot/2)
il<-ilow+1
uval<-nboot-ilow
b.low<-3*((1+nhat*val[il]-nhat/6)^{1/3})/nhat-3/nhat
b.hi<-3*((1+nhat*val[uval]-nhat/6)^{1/3})/nhat-3/nhat
ci.low<-dif-sigtil*b.hi
ci.up<-dif-sigtil*b.low
if(ci.low>0 || ci.up<0){
pv=alpha[i]
flag=T
}
if(flag)break
}
if(!flag){
alpha=c(1:99)/100
for(i in 1:length(alpha)){
ilow<-round(alpha[i]*nboot/2)
il<-ilow+1
uval<-nboot-ilow
b.low<-3*((1+nhat*val[il]-nhat/6)^{1/3})/nhat-3/nhat
b.hi<-3*((1+nhat*val[uval]-nhat/6)^{1/3})/nhat-3/nhat
ci.low<-dif-sigtil*b.hi
ci.up<-dif-sigtil*b.low
if(ci.low>0 || ci.up<0){
pv=alpha[i]
flag=T
}
if(flag)break
}
}}}
list(est.dif=dif,conf.interval=c(ci.LOW,ci.UP),p.value=pv)
}

mlrregCI<-function(x,y,nboot=300,MC=FALSE,SEED=TRUE,op.dis=TRUE){
#
#  Based on Rousseeuw et al.
#  multivariate regression estimator
#  compute p-value for each of the parameters using a percentile
#  bootstrap method.
#
if(SEED)set.seed(2)
if(MC)library(parallel)
est=mlrreg(x,y)$coef
pval=est
n=nrow(x)
JK=(ncol(x)+1)*ncol(y)
vals=matrix(0,nrow=nboot,ncol=JK)
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
if(!MC)for(ib in 1:nboot){
vals[ib,]=mlrreg(x[data[ib,],],y[data[ib,],])$coef
}
if(MC){
data=listm(t(data))
vals=mclapply(data,mlrreg.est,x,y,mc.preschedule=TRUE)
vals=t(matl(vals))
}
pv=NULL
for(j in 1:JK){
pv[j]=mean(vals[,j]>0)+.5*mean(vals[,j]==0)
pv[j]=2*min(c(pv[j],1-pv[j]))
}
ic=0
il=1
iu=ncol(x)+1
for(iy in 1:ncol(y)){
pval[,iy]=pv[il:iu]
il=il+ncol(x)+1
iu=iu+ncol(x)+1
}
list(estimates=est,p.values=pval)
}
mlrreg.est<-function(data,x,y){
xv=x[data,]
yv=y[data,]
vals=as.vector(mlrreg(xv,yv)$coef)
vals
}
bmcppb<-function(x,alpha=.05,nboot=NA,grp=NA,est=tmean,con=0,bhop=FALSE,SEED=TRUE,
...){
#
#   Multiple comparisons for  J independent groups using trimmed means
#
#   A percentile bootstrap method with Rom's method is used.
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
tempn<-0
mvec<-NA
for(j in 1:J){
temp<-x[[j]]
temp<-temp[!is.na(temp)] # Remove missing values.
tempn[j]<-length(temp)
x[[j]]<-temp
mvec[j]<-est(temp,...)
}
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.")
#  Determine nboot if a value was not specified
if(is.na(nboot)){
nboot<-5000
if(J <= 8)nboot<-4000
if(J <= 3)nboot<-2000
}
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-matrix(NA,nrow=J,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
#print(paste("Working on group ",j))
data<-matrix(sample(x[[j]],size=length(x[[j]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,]<-apply(data,1,est,...) # Bootstrapped values for jth group
}
test<-NA
bcon<-t(con)%*%bvec #ncon by nboot matrix
tvec<-t(con)%*%mvec
for (d in 1:ncon){
tv<-sum(bcon[d,]==0)/nboot
test[d]<-sum(bcon[d,]>0)/nboot+.5*tv
if(test[d]> .5)test[d]<-1-test[d]
}
test<-2*test
output<-matrix(0,ncon,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tvec[ic,]
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(bcon[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}
mlrregWtest<-function(x,y,nboot=300,MC=FALSE,SEED=TRUE){
#
#  Test hypothesis that all slopes=0  based on Rousseeuw et al.
#  multivariate regression estimator
#
#  Strategy: a variation of the wild bootstrap method, percentile version.
#
if(SEED)set.seed(2)
if(MC)library(parallel)
estit=mlrreg.subest(y,x)  #YES, y before x
n=nrow(x)
JK=ncol(x)*ncol(y)
vals=matrix(0,nrow=nboot,ncol=JK)
data=list()
for(i in 1:nboot){
bsam=sample(n,replace=TRUE)
data[[i]]=y[bsam,]
}
if(!MC){
vals=lapply(data,mlrreg.subest,x)
}
if(MC){
vals=mclapply(data,mlrreg.subest,x,mc.preschedule=TRUE)
}
vals=t(matl(vals))
nullv=rep(0,JK)
vals=rbind(vals,estit)
cen=rep(0,ncol(vals))
if(MC)dv=pdisMC(vals,center=cen)
if(!MC)dv=pdis(vals,center=cen)
bplus=nboot+1
pv=1-sum(dv[bplus]>=dv[1:nboot])/nboot
list(p.value=pv)
}
mlrreg.subest<-function(data,x){
vals=as.vector(mlrreg(x,data)$coef[-1,])
vals
}
btrim<-function(x,tr=.2,grp=NA,g=NULL,dp=NULL,nboot=599,SEED=TRUE){
#
#   Test the hypothesis of equal trimmed means, corresponding to J independent
#   groups, using a bootstrap-t method.
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#   g=NULL, x is assumed to be a matrix or have list mode
#
#   if g is specifed, it is assumed that column g of x is
#   a factor variable and that the dependent variable of interest is in column
#   dp of x, which can be a matrix or data frame.
#
#   The default number of bootstrap samples is nboot=599
#
if(!is.null(g)){
if(is.null(dp))stop("Specify a value for dp, the column containing the data")
x=fac2list(x[,dp],x[,g])
}
if(is.data.frame(x))x=as.matrix(x)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in a matrix or in list mode.")
if(is.na(grp[1]))grp<-c(1:length(x))
J<-length(grp)
nval=NA
x=lapply(x,elimna)
nval=lapply(x,length)
xbar=lapply(x,mean,tr=tr)
bvec<-array(0,c(J,2,nboot))
hval<-vector("numeric",J)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
for(j in 1:J){
hval[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # hval is the number of observations in the jth group after trimming.
print(paste("Working on group ",grp[j]))
xcen<-x[[grp[j]]]-mean(x[[grp[j]]],tr)
data<-matrix(sample(xcen,size=length(x[[grp[j]]])*nboot,replace=TRUE),nrow=nboot)
bvec[j,,]<-apply(data,1,trimparts,tr) # A 2 by nboot matrix. The first row
#                     contains the bootstrap trimmed means, the second row
#                     contains the bootstrap squared standard errors.
}
m1<-bvec[,1,]  # J by nboot matrix containing the bootstrap trimmed means
m2<-bvec[,2,]  # J by nboot matrix containing the bootstrap sq standard errors
wvec<-1/m2  # J by nboot matrix of w values
uval<-apply(wvec,2,sum)  # Vector having length nboot
blob<-wvec*m1
xtil<-apply(blob,2,sum)/uval # nboot vector of xtil values
blob1<-matrix(0,J,nboot)
for (j in 1:J)blob1[j,]<-wvec[j,]*(m1[j,]-xtil)^2
avec<-apply(blob1,2,sum)/(length(x)-1)
blob2<-(1-wvec/uval)^2/(hval-1)
cvec<-apply(blob2,2,sum)
cvec<-2*(length(x)-2)*cvec/(length(x)^2-1)
testb<-avec/(cvec+1)
#            A vector of length nboot containing bootstrap test values
ct<-sum(is.na(testb))
if(ct>0)print("Some bootstrap estimates of the test statistic could not be computed")
test<-t1way(x,tr=tr,grp=grp)
pval<-sum(test$TEST<=testb)/nboot
#
# Determine explanatory effect size
#
e.pow=t1wayv2(x)$Explanatory.Power
list(test=test$TEST,p.value=pval,Explanatory.Power=e.pow,
Effect.Size=sqrt(e.pow))
}


linconMpb.sub<-function(data,x,est,...){
res=apply(x[data,],2,est,...)
res
}
mcdcen<-function(x){
#
# Compute MCD measure of location only.
#
res=covmcd(x)$center
res
}
mvecen<-function(x){
#
# Compute MCD measure of location only.
#
res=covmve(x)$center
res
}

linconSpb.sub<-function(data,x,est,...){
res=est(x[data,],...)
res
}

fac2Mlist<-function(x,grp.col,lev.col,pr=T){
#
#  sort and store data in a matrix or data frame into
#  groups, where the jth group
#  has p-variate data
#
#  grp.col is column indicating levels of between factor.
#  lev.col indicates the columns where repeated measures are contained
#
#  Example:  column 2 contains information on levels of between factor
#  have a 3 by 2 design, column 3 contains time 1 data,
#  column 7 contains time 2
#  fac2Mlist(x,2,c(3,7)) will store data in list mode, having length
#  2 (the number of levels), with each level containing a
#  matrix having two columns. The first column is based on values
#  in column 3 of the matrix x, and the second column is based on
#  data in column 7 of x.
#
res=selbybw(x,grp.col,lev.col)
if(pr){
print("Levels for between factor:")
print(sort(unique(x[,grp.col])))
}
res=res$x
p=length(lev.col)
J=length(unique(x[,grp.col]))
y=list()
ic=1-p
iu=0
for(j in 1:J){
ic=ic+p
iu=iu+p
y[[j]]=matl(res[ic:iu])
}
y
}



fac2BBMlist<-function(x,grp.col,lev.col,pr=T){
#
#  This function is useful when dealing with a two-way MANOVA
#  It takes data stored in x, a matrix or data frame,
#  and creates groups based on the data in the two columns
#  indicated by the argument
#
#  grp.col
#  lev.col indicates the columns where p-variate  are contained.
#
#  Example:
#   z=fac2BBMlist(plasma,c(2,3),c(7,8))
#   creates groups based on values in columns 2 (Factor A) and 3 (Factor B).
#  z[[1]] contains a matrix having two columns; the data are taken
# from columns 7 and 8 of plasma
#
res=selbybbw(x,grp.col,lev.col,pr=pr)
p=length(lev.col)
J=length(unique(x[,grp.col[1]]))
K=length(unique(x[,grp.col[2]]))
y=list()
ic=1-p
iu=0
jk=0
for(j in 1:J){
for(k in 1:K){
ic=ic+p
iu=iu+p
jk=jk+1
y[[jk]]=matl(res[ic:iu])
}}
y
}


regmediate<-function(x,y,regfun=tsreg,nboot=400,alpha=.05,xout=FALSE,outfun=out,MC=FALSE,SEED=TRUE,...){
#
#   In a mediation analysis, two of the linear equations that play a role are
#   y=b_{01} + b_{11}x + e_1
#   y=b_{03} + b_{13}x + b_{23} x_m + e_3
#   where x_m is the mediator variable.
#   An additional assumption is
#   x_m=b_{02} + b_{12}x + \epsilon_2.
#   Goal: Compute a confidence interval for b_{11}-b_{13}
#
#   The default regression method is the Theil-Sen estimator.
#
#   The predictor values are assumed to be in the n-by-2 matrix x, with the
#   mediator variable in column 2.
#   MC=T. A multicore processor will be used.
#   xout=T will remove leverage points using the function indicated by the argument out.
#
if(MC)library(parallel)
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
if(p!=2)stop("Argument x should have two columns")
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),ncol=nboot)
data=listm(data)
if(MC){
bvec1<-mclapply(data,regbootMC,as.matrix(x[,1]),y,regfun,mc.preschedule=TRUE)
bvec2<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE)
}
if(!MC){
bvec1<-lapply(data,regboot,as.matrix(x[,1]),y,regfun)
bvec2<-lapply(data,regboot,x,y,regfun)
}
bvec1=matl(bvec1)
bvec2=matl(bvec2)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
dif=bvec1[2,]-bvec2[2,]
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
sig.level<-NA
temp<-mean(dif<0)
sig.level<-2*(min(temp,1-temp))
bsort<-sort(dif)
regci<-bsort[ilow]
regci[2]<-bsort[ihi]
list(conf.interval=regci,p.value=sig.level)
}



regmed2<-function(x,y,regfun=tsreg,nboot=400,alpha=.05,xout=FALSE,outfun=out,MC=FALSE,
SEED=TRUE,pr=TRUE,...){
#
#   In a mediation analysis, two of the linear equations that play a role are
#   y=b_{01} + b_{11}x + e_1
#   y=b_{03} + b_{13}x + b_{23} x_m + e_3
#   where x_m is the mediator variable.
#   An additional assumption is
#   x_m=b_{02} + b_{12}x + \epsilon_2.
#   Goal: Test hypotheses  b_{12}=0 and b_{23}=0
#
#   The default regression method is the Theil-Sen estimator.
#
#   The predictor values are assumed to be in the n-by-2 matrix x, with the
#   mediator variable in column 2.
#   MC=T. A multicore processor will be used.
#   xout=T will remove leverage points using the function indicated by the argument out.
#
if(MC)library(parallel)
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
if(p!=2)stop("Argument x should have two columns")
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
if(MC){
temp1=regciMC(x[,1],x[,2],regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE)
temp2=regciMC(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE)
}
if(!MC){
temp1=regci(x[,1],x[,2],regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE)
temp2=regci(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=FALSE)
}
if(pr){
print("Output returned in res1 is for the slope of the regression line")
print("where the goal is to predict the mediator variable given the other")
print("predictor variable stored in column 1 of x.")
print("Output in res2 is for slope of the mediator when both predictors are used.")
}
res1=c(temp1$regci[2,],temp1$p.value[2])
z1=t(as.matrix(res1))
dimnames(z1)=list(NULL,c("ci.low","ci.up","p.value"))
res2=c(temp2$regci[3,],temp2$p.value[3])
z2=t(as.matrix(res2))
dimnames(z2)=list(NULL,c("ci.low","ci.up","p.value"))
list(res1=z1,res2=z2)
}


ogk.center<-function(x,beta=.9,...){
#
# Compute OGK multivariate measure of location
#
center=ogk(x,beta=beta,...)$center
list(center=center)
}
sdwe<-function(m,K=3){
#
# Stahel-Donoho W-estimator implemented as suggested by
# Zuo, Cui and He 2004, Annals of Statistics, 32, 167--188
#
m=elimna(m)
pd=1/(1+zdepth(m)) # projection depth
MPD=median(pd) # C in Zuo et al. notation
flag=(pd<MPD)
W=rep(1,rep(length(pd)))
W[flag]=(exp(0-K*(1-pd[flag]/MPD)^2)-exp(0-K))/(1-exp(0-K))
bot=sum(W)
temp=m
for(i in 1:length(pd))temp[i,]=W[i]*m[i,]
val=apply(temp,2,sum)/bot
dif=m
for(i in 1:length(pd))dif[i,]=m[i,]-val
V=matrix(0,ncol(m),ncol(m))
for(i in 1:length(pd))V=V+W[i]*(as.matrix(dif[i,])%*%t(as.matrix(dif[i,])))
V=V/bot
list(center=val,cov=V)
}
MARest<-function(x,kappa=.1){
#
# Compute Maronna multivariate measure of location and scatter
#
#kappa    # the percent of cases to be controlled when robust method is used
#
ep=0.00000001  # convergence criteria
Z=x
p=ncol(x)
HT=HuberTun(kappa,p)
r=HT$r
tau=HT$tau
H=MARONNA.sub(Z,r,tau,ep,p)
list(center=t(H$center),cov=H$cov)
}
MARONNA.sub<-function(Z,r,tau,ep,p){
 # Starting values
      mu0=MeanCov(Z)$zbar
      Sigma0=MeanCov(Z)$S
      Sigin=solve(Sigma0)

      diverg=0 # convergence flag

      for (k in 1:200) {
                sumu1=0
                mu=matrix(0,p,1)
                Sigma=matrix(0,p,p)
                d=rep(NA,n)
                u1=rep(NA,n)
                u2=rep(NA,n)

                for (i in 1:n) {                        zi=Z[i,]
                        zi0=zi-mu0
                        di2=t(zi0)%*%Sigin%*%zi0
                        di=as.numeric(sqrt(di2))
                        d[i]=di

                #get u1i,u2i
                        if (di<=r) {
                           u1i=1.0
                           u2i=1.0/tau
                        }else {
                           u1i=r/di
                           u2i=u1i^2/tau
                   }
                        u1[i]=u1i
                        u2[i]=u2i

                        sumu1=sumu1+u1i
                        mu=mu+u1i*zi
                        Sigma=Sigma+u2i*zi0%*%t(zi0)

                } # end of loop i

                mu1=mu/sumu1
                Sigma1=Sigma/n
                Sigdif=Sigma1-Sigma0
                dt=sum(Sigdif^2)

                mu0=mu1
                Sigma0=Sigma1
                Sigin=solve(Sigma0)
  if (dt<ep) {break}

          } # end of loop k


       if (k==200) {
                        diverg=1
                        mu0=rep(0,p)
                        sigma0=matrix(NA,p,p)

          }
list(center=mu0,cov=Sigma0)
}
mcpOV<-function(x,alpha=.05,nboot=500,grp=NA,est=smean,con=0,bhop=FALSE,SEED=TRUE,
...){
#
#   Multiple comparisons for  J independent groups using
#   using a multivariate O-type measure of location
#
#   A percentile bootstrap method with Rom's method is used.
#
#   The data are assumed to be stored in x
#   which either has list mode or is a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
if(is.data.frame(x))x=as.matrix(x)
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
tempn<-0
mvec<-NA
Jm<-J-1
x=elimna(matl(x))
n=nrow(x)
mvec=est(x)
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.")
# Determine critical values
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-matrix(NA,nrow=J,ncol=nboot)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(i in 1:nboot)bvec[,i]=est(x[data[i,],])
test<-NA
bcon<-t(con)%*%bvec #ncon by nboot matrix
tvec<-t(con)%*%mvec
for (d in 1:ncon){
tv<-sum(bcon[d,]==0)/nboot
test[d]<-sum(bcon[d,]>0)/nboot+.5*tv
if(test[d]> .5)test[d]<-1-test[d]
}
test<-2*test
output<-matrix(0,ncon,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p.value","p.crit","ci.lower","ci.upper"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,4]<-zvec
icl<-round(dvec[ncon]*nboot/2)+1
icu<-nboot-icl-1
for (ic in 1:ncol(con)){
output[ic,2]<-tvec[ic,]
output[ic,1]<-ic
output[ic,3]<-test[ic]
temp<-sort(bcon[ic,])
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}

COVreg<-function(x,y,cov.fun=MARest,loc.fun=MARest,xout=FALSE,outfun=out,...){
#
# Regression estimation can be done via the usual maximum likelihood
# covariance matrix. This function uses the same approach
# using a robust covariance matrix instead.
#
# The predictors are assumed to be stored in the n-by-p matrix x.
#
xy=elimna(cbind(x,y))
p1=ncol(xy)
p=p1-1
x=xy[,1:p]
y=xy[,p1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
AC=cov.fun(cbind(x,y),...)$cov
ma<-AC[1:p,p1]
m<-AC[1:p,1:p]
slope<-solve(m,ma)
mvals<-loc.fun(cbind(x,y))$center
b0<-mvals[p1]-sum(slope%*%mvals[1:p])
res<-y-x%*%slope-b0
list(coef=c(b0,slope),residuals=res)
}




dmedpb<-function(x,y=NULL,alpha=.05,con=0,est=median,plotit=TRUE,dif=TRUE,grp=NA,
hoch=TRUE,nboot=NA,xlab="Group 1",ylab="Group 2",pr=TRUE,SEED=TRUE,BA=FALSE,...){
#
#   Use a percentile bootstrap method to  compare
#   medians of dependent groups.
#
#   This is essentially the function rmmcppb, but set to compare medians
#   by default.
#   And it is adjusted to handle tied values.
#
#   By default,
#   compute a .95 confidence interval for all linear contrasts
#   specified by con, a J by C matrix, where  C is the number of
#   contrasts to be tested, and the columns of con are the
#   contrast coefficients.
#   If con is not specified, all pairwise comparisons are done.
#
#   A sequentially rejective method
#   is used to control the probability of at least one Type I error.
#
#   dif=T indicates that difference scores are to be used
#   dif=F indicates that measure of location associated with
#   marginal distributions are used instead.
#
#   nboot is the bootstrap sample size. If not specified, a value will
#   be chosen depending on the number of contrasts there are.
#
#   x can be an n by J matrix or it can have list mode
#   for two groups, data for second group can be put in y
#   otherwise, assume x is a matrix (n by J) or has list mode.
#
#
if(dif){
if(pr)print("dif=T, so analysis is done on difference scores")
temp<-rmmcppbd(x,y=y,alpha=alpha,con=con,est=est,plotit=plotit,grp=grp,
nboot=nboot,hoch=hoch,...)
output<-temp$output
con<-temp$con
}
if(!dif){
if(pr)print("dif=F, so analysis is done on marginal distributions")
if(!is.null(y[1]))x<-cbind(x,y)
if(is.data.frame(x))x=as.matrix(x)
if(!is.list(x) && !is.matrix(x))
stop("Data must be stored in a matrix or in list mode.")
if(is.list(x)){
if(is.matrix(con)){
if(length(x)!=nrow(con))
stop("The number of rows in con is not equal to the number of groups.")
}}
if(is.list(x)){
# put the data in an n by J matrix
mat<-matl(x)
}
if(is.matrix(x) && is.matrix(con)){
if(ncol(x)!=nrow(con))
stop("The number of rows in con is not equal to the number of groups.")
mat<-x
}
if(is.matrix(x))mat<-x
if(!is.na(sum(grp)))mat<-mat[,grp]
mat<-elimna(mat) # Remove rows with missing values.
x<-mat
J<-ncol(mat)
xcen<-x
for(j in 1:J)xcen[,j]<-x[,j]-est(x[,j])
Jm<-J-1
if(sum(con^2)==0){
d<-(J^2-J)/2
con<-matrix(0,J,d)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
d<-ncol(con)
if(is.na(nboot)){
if(d<=4)nboot<-1000
if(d>4)nboot<-5000
}
n<-nrow(mat)
crit.vec<-alpha/c(1:d)
connum<-ncol(con)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xbars<-apply(mat,2,est)
psidat<-NA
for (ic in 1:connum)psidat[ic]<-sum(con[,ic]*xbars)
psihat<-matrix(0,connum,nboot)
psihatcen<-matrix(0,connum,nboot)
bvec<-matrix(NA,ncol=J,nrow=nboot)
bveccen<-matrix(NA,ncol=J,nrow=nboot)
print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib,]<-apply(x[data[ib,],],2,est,...)
bveccen[ib,]<-apply(xcen[data[ib,],],2,est,...)
}
#
# Now have an nboot by J matrix of bootstrap values.
#
test<-1
bias<-NA
tval<-NA
tvalcen<-NA
for (ic in 1:connum){
psihat[ic,]<-apply(bvec,1,bptdpsi,con[,ic])
psihatcen[ic,]<-apply(bveccen,1,bptdpsi,con[,ic])
tvalcen[ic]<-sum((psihatcen[ic,]==0))/nboot
bias[ic]<-sum((psihatcen[ic,]>0))/nboot+sum((psihatcen[ic,]==0))/nboot-.5
tval[ic]<-sum((psihat[ic,]==0))/nboot
if(BA){
test[ic]<-sum((psihat[ic,]>0))/nboot+tval[ic]-.1*bias[ic]
if(test[ic]<0)test[ic]<-0
}
if(!BA)test[ic]<-sum((psihat[ic,]>0))/nboot+.5*tval[ic]
test[ic]<-min(test[ic],1-test[ic])
}
test<-2*test
ncon<-ncol(con)
dvec<-alpha/c(1:ncon)
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
dvecba<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
dvecba<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(hoch)dvec<-alpha/(2* c(1:ncon))
dvec<-2*dvec
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
dvecba<-dvec
#dvec[1]<-alpha/2
}
if(plotit && ncol(bvec)==2){
z<-c(0,0)
one<-c(1,1)
plot(rbind(bvec,z,one),xlab=xlab,ylab=ylab,type="n")
points(bvec)
totv<-apply(x,2,est,...)
cmat<-var(bvec)
dis<-mahalanobis(bvec,totv,cmat)
temp.dis<-order(dis)
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
temp2<-order(0-test)
ncon<-ncol(con)
zvec<-dvec[1:ncon]
if(BA)zvec<-dvecba[1:ncon]
sigvec<-(test[temp2]>=zvec)
output<-matrix(0,connum,6)
dimnames(output)<-list(NULL,c("con.num","psihat","p-value","p.crit",
"ci.lower","ci.upper"))
tmeans<-apply(mat,2,est,...)
psi<-1
for (ic in 1:ncol(con)){
output[ic,2]<-sum(con[,ic]*tmeans)
output[ic,1]<-ic
output[ic,3]<-test[ic]
output[temp2,4]<-zvec
temp<-sort(psihat[ic,])
icl<-round(output[ic,4]*nboot/2)+1
icu<-nboot-(icl-1)
output[ic,5]<-temp[icl]
output[ic,6]<-temp[icu]
}
}
num.sig<-sum(output[,3]<=output[,4])
list(output=output,con=con,num.sig=num.sig)
}
MAT2list<-function(x,J=NULL,p=NULL){
#
# Store the data in a matrix or data frame in a new
# R variable having list mode.
# The results are stored in y, having list mode
# Col 1 to p of x will be stored as a matrix in  y[[1]],
# Col p+1 to 2p are stored  in y[[2]], and so on.
#
# The function assumes ncol(x)=J*P
# either J, the number of groups, or p, the number of variables,
# must be specified.
#
#  This function is used by the R function linconMpb when testing
#  hypotheses about linear contrasts based on multvariate data.
#
if(is.null(dim(x)))stop("The argument x must be a matrix or data frame")
y<-list()
if(is.null(J) && is.null(p))stop("Specify J or P")
if(is.null(J))J=ncol(x)/p
if(is.null(p))p=ncol(x)/J
Jp=floor(J)*floor(p)
if(Jp != ncol(x))stop("Jp is not equal to the number of columns")
lp=1-p
up=0
for(j in 1:J){
lp=lp+p
up=up+p
y[[j]]<-as.matrix(x[,lp:up])
}
y
}
linconMpb<-function(x,alpha=.05,nboot=1000,grp=NA,est=tmean,con=0,bhop=FALSE,
SEED=TRUE,PDIS=FALSE,J=NULL,p=NULL,...){
#
#   Multiple comparisons for  J independent groups using trimmed means
#   with multivariate data for each group.
#
#   A percentile bootstrap method with Rom's method is used.
#
#   The data are assumed to be stored in x
#   which  has list mode,
#   x[[1]] contains the data for the first group in the form of a
#   matrix, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#
#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are automatically removed.
#
con<-as.matrix(con)
if(is.matrix(x) || is.data.frame(x)){
if(is.null(J) && is.null(p))stop("Specify J or P")
x=MAT2list(x,p=p,J=J)
}
if(!is.list(x))stop("Data must be stored in list mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
nullvec=rep(0,ncol(x[[1]]))
bplus=nboot+1
tempn<-0
mvec<-list
for(j in 1:J){
x[[j]]<-elimna(x[[j]])
}
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.")
# Determine critical levels
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-array(NA,c(J,nboot,ncol(x[[1]])))
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
nvec=lapply(x,nrow)
for(j in 1:J){
data<-matrix(sample(nvec[[j]],size=nvec[[j]]*nboot,replace=TRUE),nrow=nboot)
bvec[j,,]<-apply(data,1,linconMpb.sub,x[[j]],est,...) # Bootstrapped values for jth group
}
#print(bvec[1,,])
test<-NA
for (d in 1:ncon){
tv=matrix(0,nboot,ncol(x[[1]])) #nboot by p matrix reflecting Psi hat
estit=rep(0,ncol(x[[1]]))
for(j in 1:J){
tv=tv+con[j,d]*bvec[j,,]
estit=estit+con[j,d]*apply(x[[j]],2,est,...)
}
if(!PDIS)m1=cov(tv)
tv=rbind(tv,nullvec)
if(!PDIS)dv=mahalanobis(tv,center=estit,m1)
if(PDIS)dv=pdis(tv,center=estit) # projection distances
test[d]=1-sum(dv[bplus]>=dv[1:nboot])/nboot
}
output<-matrix(0,ncon,3)
dimnames(output)<-list(NULL,c("con.num","p.value","p.crit"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,3]<-zvec
for (ic in 1:ncol(con)){
output[ic,1]<-ic
output[ic,2]<-test[ic]
}
num.sig<-sum(output[,2]<=output[,3])
list(output=output,con=con,num.sig=num.sig)
}
linconMpb.sub<-function(data,x,est,...){
res=apply(x[data,],2,est,...)
res
}
linconSpb<-function(x,alpha=.05,nboot=1000,grp=NA,est=smean,con=0,bhop=FALSE,
SEED=TRUE,PDIS=FALSE,J=NULL,p=NULL,...){
#
#   Multiple comparisons for  J independent groups
#   with multivariate data for each group.
#   That is, linear contrasts relevant to MANOVA can be tested.
#   The method can handle
#   multivariate measures of location that take into account
#   the overall structure of the data, as opposed to using, for example
#   the marginal trimmed means, which is done by default when using
#   linconMpb.
#   The argument
#
#   est=smean,
#
#   means that by default the skipped measure of location, based on
#   on projection method for detecting outliers, is used.
#
#   Mahalanobis distance is used to compute a p-value, but projection
#   distances could be used by setting PDIS=T.
#
#   A percentile bootstrap method with Rom's method is used.
#
#   alpha=.05 means the probability of one or more type I errors is .05.
#
#   The data are assumed to be stored in x
#   which  has list mode,
#   x[[1]] contains the data for the first group in the form of a
#   matrix, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#
#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are automatically removed.
#
if(is.matrix(x) || is.data.frame(x)){
if(is.null(J) && is.null(p))stop("Specify J or P")
x=MAT2list(x,p=p,J=J)
}
con<-as.matrix(con)
if(!is.list(x))stop("Data must be stored in list mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
nullvec=rep(0,ncol(x[[1]]))
bplus=nboot+1
tempn<-0
mvec<-list
for(j in 1:J){
x[[j]]<-elimna(x[[j]])
}
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J)stop("Something is wrong with con; the number of rows does not match the number of groups.")
# Determine critical levels
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
bvec<-array(NA,c(J,nboot,ncol(x[[1]])))
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
nvec=lapply(x,nrow)
for(j in 1:J){
data<-matrix(sample(nvec[[j]],size=nvec[[j]]*nboot,replace=TRUE),nrow=nboot)
bvec[j,,]<-apply(data,1,linconSpb.sub,x[[j]],est,...) # Bootstrapped values for jth group
}
test<-NA
for (d in 1:ncon){
tv=matrix(0,nboot,ncol(x[[1]])) #nboot by p matrix reflecting Psi hat
estit=rep(0,ncol(x[[1]]))
for(j in 1:J){
tv=tv+con[j,d]*bvec[j,,]
estit=estit+con[j,d]*est(x[[j]],...)
}
if(!PDIS)m1=cov(tv)
tv=rbind(tv,nullvec)
if(!PDIS)dv=mahalanobis(tv,center=estit,m1)
if(PDIS)dv=pdis(tv,center=estit) # projection distances
test[d]=1-sum(dv[bplus]>=dv[1:nboot])/nboot
}
output<-matrix(0,ncon,3)
dimnames(output)<-list(NULL,c("con.num","p.value","p.crit"))
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,3]<-zvec
for (ic in 1:ncol(con)){
output[ic,1]<-ic
output[ic,2]<-test[ic]
}
num.sig<-sum(output[,2]<=output[,3])
list(output=output,con=con,num.sig=num.sig)
}
linconSpb.sub<-function(data,x,est,...){
res=est(x[data,],...)
res
}

MULtr.anova<-function(x,J=NULL,p=NULL,tr=.2,alpha=.05){
#
#  Do Multivariate ANOVA with trimmed means using
#  Johansen's method
#
#  x is assumed to have list mode with length(x)=J=number of groups and
#  x[[j]] is an n_j-by-p  matrix, p is the number of variables.
#
#  x can also be a matrix when J and p are specified. It is assumed the data are stored in
#   a matrix in the same manner expected by bwtrim.
#
#  To get a p-value, use the function MULAOVp
#
if(is.matrix(x) || is.data.frame(x)){
if(is.null(J) && is.null(p))stop("Specify J or P")
x=MAT2list(x,p=p,J=J)
}
x=lapply(x,as.matrix)
x=lapply(x,elimna)
p=ncol(x[[1]])
iden=diag(p)
J=length(x)
tvec=list()
nval=lapply(x,nrow)
Rtil=lapply(x,wincov,tr=tr)
tvec=lapply(x,mmean,tr=tr)
g=list()
gmean=rep(0,p) # grand mean eventually
groupm=list()
Wsum=matrix(0,ncol=p,nrow=p)
W=list()
f=0
Aw=0
for(j in 1:J){
tvec[[j]]=as.matrix(tvec[[j]])
g[[j]]=floor(nval[[j]]*tr)
Rtil[[j]]=Rtil[[j]]*(nval[[j]]-1)/((nval[[j]]-2*g[[j]])*(nval[[j]]-2*g[[j]]-1))
f[j]=nval[[j]]-2*g[[j]]-1
W[[j]]=solve(Rtil[[j]])
groupm[[j]]=apply(x[[j]],2,tmean,tr=tr)
Wsum=Wsum+W[[j]]
gmean=gmean+W[[j]]%*%tvec[[j]]
}
Wsuminv=solve(Wsum)
for(j in 1:J){
temp=iden-Wsuminv%*%W[[j]]
tempsq=temp%*%temp
Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/f[j]
}
Aw=Aw/2
gmean=as.matrix(gmean)
gmean=solve(Wsum)%*%gmean # Final weighted grand mean
df=p*(J-1)
crit<-qchisq(1-alpha,df)
crit<-crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
test=0
for(k in 1:p){
for(m in 1:p){
for(j in 1:J){
test=test+W[[j]][k,m]*(groupm[[j]][m]-gmean[m])*(groupm[[j]][k]-gmean[k])
}}}
list(test.stat=test,crit.value=crit)
}


MULAOVp<-function(x,J=NULL,p=NULL,tr=.2){
#
#  Do Multivariate ANOVA with trimmed means using
#  Johansen's method
#
#  x is assumed to have list mode with J=number of groups
#  x[[j]] is an n_j by p  matrix
#
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
Qa<-MULtr.anova(x,J=J,p=p,tr=tr,alpha=alval[i])
if(Qa$test.stat>Qa$crit.value)break
}
list(test.stat=Qa$test.stat,p.value=alval[i])
}

YYmcp<-function(x,alpha=.05,grp=NA,tr=.2,bhop=FALSE,J=NULL,p=NULL,...){
#
#   All pairwise  comparisons among J independent groups using trimmed means
#   with multivariate data for each group.
#   The method applies the Yanagihara - Yuan for each pair of groups
#   and controls FWE via Rom's method if bhop=F.
#   bhop=T, use Benjamini-Hochberg method
#
#   The data are assumed to be stored in x
#   which  has list mode,
#   x[[1]] contains the data for the first group in the form of a
#   matrix, x[[2]] the data
#   for the second group, etc., each matrix having the same
#   number of columns Length(x)=the number of groups = J.
#
#   The data can be stored in a single matrix having Jp columns
#   J = number of groups.
#   If this is the case, specify the argument J or p(number of variables)

#   est is the measure of location and defaults to the median
#   ... can be used to set optional arguments associated with est
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are automatically removed.
#
con<-as.matrix(con)
if(is.matrix(x) || is.data.frame(x)){
if(is.null(J) && is.null(p))stop("Specify J or P")
x=MAT2list(x,p=p,J=J)
}
if(!is.list(x))stop("Data must be stored in list mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
for(i in 1:length(grp))xx[[i]]<-x[[grp[i]]]
x<-xx
}
J<-length(x)
nullvec=rep(0,ncol(x[[1]]))
bplus=nboot+1
tempn<-0
mvec<-list
for(j in 1:J){
x[[j]]<-elimna(x[[j]])
}
Jm<-J-1
#
# Determine contrast matrix
#
ncon<-(J^2-J)/2
if(!bhop){
if(alpha==.05){
dvec<-c(.05,.025,.0169,.0127,.0102,.00851,.0073,.00639,.00568,.00511)
if(ncon > 10){
avec<-.05/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha==.01){
dvec<-c(.01,.005,.00334,.00251,.00201,.00167,.00143,.00126,.00112,.00101)
if(ncon > 10){
avec<-.01/c(11:ncon)
dvec<-c(dvec,avec)
}}
if(alpha != .05 && alpha != .01){
dvec<-alpha/c(1:ncon)
}
}
if(bhop)dvec<-(ncon-c(1:ncon)+1)*alpha/ncon
#
output<-matrix(0,ncon,4)
dimnames(output)<-list(NULL,c("Group","Group","p.value","p.crit"))
ic=0
for (j in 1:Jm){
jp<-j+1
for (k in jp:J){
ic=ic+1
output[ic,1]=j
output[ic,2]=k
output[ic,3]<-YYmanova(x[[j]],x[[k]],tr=tr)$p.value
}}
test=output[,3]
temp2<-order(0-test)
zvec<-dvec[1:ncon]
sigvec<-(test[temp2]>=zvec)
output[temp2,4]<-zvec
num.sig<-sum(output[,3]<=output[,4])
list(output=output,num.sig=num.sig)
}


loc2dif<-function(x,y=NULL,est=median,na.rm=TRUE,plotit=FALSE,xlab="",ylab="",...){
#
# Compute a measure of location associated with the
# distribution of x-y, the measure location given by
# the argument
# est.
# x and y are paired data or independent variables having the same length.
#  If x and y have different lengths, use the function wmwloc
#
# Advantage of this estimator: relatively high efficiency even under normality versus
# using sample means.
#
if(is.null(y)){
#if(!is.matrix(x))stop("x should be an n-by-2 matrix")
if(ncol(x)!=2)stop("x should be an n-by-2 matrix")
y=x[,2]
x=x[,1]
}
	if(na.rm){
m=elimna(cbind(x,y))
x=m[,1]
y=m[,2]
}
temp=as.vector(outer(x,y,FUN="-"))
val<-est(temp,na.rm=TRUE,...)
if(plotit)akerd(temp,xlab=xlab,ylab=ylab)
val
}

mlrreg<-function(x,y,cov.fun=cov.mcd,ols.op=TRUE,mcd.op=TRUE,
quantile.used=floor(.75*n),RES=FALSE,...){
#
# Do Multivariate regression, using by default the method
#  in Rousseeuw, Van Aelst, Van Driessen Agullo
# Technometrics, 46, 293-305
#
# Note, to use the method recommended by Rousseeuw et al., the argument
# quantile.used=.75*n is used when calling cov.mcd.
#
#  RES=T, the residuals will be returned.
#
# y is assumed to be  multivariate with data stored in a matrix.
#
# an initial fit is found using the measures of scatter and location
# corresponding to cof.fun and mcd.op. If
# mcd.op=T, cov.mcd is used with quanitle.used=.75n
# mcd.op=F, cov.fun is used and defaults to cov.mcd with the
# default value usded by R for the argument quanitle.used
# But any function that returns location and scatter in $center and $cov
# can be used.
#
#  if ols.op=T, OLS is applied after points are removed based on iniital fit
#  if ols.op=F, Theil-Sen is used by calling the function mopreg
#
#  Early version of this function considered estimating
#  explanatory power in terms of the generalized variance
#  of the predicted y values and the observed y values
#  epow.cov determines which robust covariance matrix will be used.
#  This idea has not been explored enough
#  Some choices are:
# cov (the usual generalized variance)
# skipcov
# tbscov
# covout
# covogk
# mgvcov
# mvecov
# mcdcov
#
library(MASS)
if(!is.matrix(y))stop("y is not a matrix")
X<-cbind(x,y)
X<-elimna(X)
n<-nrow(X)
qy<-ncol(y)
qx<-ncol(x)
qxp1<-qx+1
tqyqx<-qy+qx
y<-X[,qxp1:tqyqx]
# compute initial estimate of slopes and intercept:
if(!mcd.op)locscat<-cov.fun(X,...)
if(mcd.op)locscat<-cov.mcd(X,quan=quantile.used)
sig<-locscat$cov
mu<-locscat$center
sigxx<-sig[1:qx,1:qx]
sigxy<-sig[1:qx,qxp1:tqyqx]
sigyy<-sig[qxp1:tqyqx,qxp1:tqyqx]
Bhat<-solve(sigxx)%*%sigxy
sige<-sigyy-t(Bhat)%*%sigxx%*%Bhat
sige.inv<-solve(sige)
Ahat<-t(mu[qxp1:tqyqx]-t(Bhat)%*%mu[1:qx])
resL<-matrix(nrow=nrow(X),ncol=qy)
for(i in 1:nrow(X))resL[i,]<-y[i,]-t(Bhat)%*%X[i,1:qx]
for(j in 1:qy)resL[,j]<-resL[,j]-Ahat[j]
drL<-NA
for(i in 1:nrow(X))drL[i]<-t(resL[i,])%*%sige.inv%*%resL[i,]
# In Rousseeuw notation, drL<- is d^2
w<-rep(0,nrow(X))
qdr<-qchisq(.99,qy)
iflag<-(drL<qdr)
w[iflag]<-1
term1<-0
vec<-c(1:nrow(X))
keep<-vec[iflag==1]
X<-X[keep,]
if(ols.op)output<-lsfit(X[,1:qx],X[,qxp1:tqyqx])
if(!ols.op)output<-mopreg(X[,1:qx],X[,qxp1:tqyqx],KEEP=T)
yhat=X[,qxp1:tqyqx]-output$residuals
res=NULL
if(RES)res=output$residuals
#epow=(gvarg(yhat,epow.cov)/gvarg(X[,qxp1:tqyqx],epow.cov))
#list(coef=output$coefficients,residuals=res,E.power=epow,Strength.Assoc=sqrt(epow))
list(coef=output$coefficients,residuals=res)
}
Mreglde<-function(x,y,xout=FALSE,eout=FALSE,outfun=outpro,epow.cov=mcdcov,RES=FALSE,...){
#
# Do multivariate regression where parameters are
# estimated via the least distance estimator.
#  See Jhun and Choi (2009). Comp Stat & Data Analysis, 53, 4221-4227
#
#  RES=T, the residuals will be returned.
#
if(eout){
flag=outfun(cbind(x,y),...)$keep
x=x[flag,]
y=y[flag,]
}
if(xout){
flag=outfun(x,...)$keep
x=x[flag,]
y=y[flag,]
}
npar=(ncol(x)+1)*ncol(y)
xy=elimna(cbind(x,y))
x=xy[,1:ncol(x)]
for(i in 1:ncol(x))x[,i]=(x[,i]-mean(x[,i]))/sqrt(var(x[,i]))
p1=ncol(x)+1
y=xy[,p1:ncol(xy)]
INIT=as.vector(lsfit(x,y)$coef)
xx=c(nrow(x),ncol(x),ncol(y),as.vector(x),as.vector(y))
Bs=nelderv2(xx,npar,Mreglde.sub,START=INIT)
Bs=matrix(Bs,ncol=ncol(y))
dimnames(Bs)<-list(c("INTER",rep("SLOPE",ncol(x))),rep("Y",ncol(Bs)))
yhat=matrix(NA,nrow=nrow(y),ncol=ncol(y))
for(i in 1:nrow(y)){
z=as.matrix(x[i,])
yhat[i,]=t(Bs[2:nrow(Bs),])%*%z
}
yhat=yhat+Bs[1,]
res=NULL
if(RES)res=y-yhat
#epow=gvarg(yhat,epow.cov)/gvarg(y,epow.cov)
list(coef=Bs,residuals=res)
}
winse<-function(x,tr=.2){
#
# Estimate the standard error of the Winsorized mean
#
x=elimna(x)
n=length(x)
h=n-2*floor(tr*n)
top=(n-1)*sqrt(winvar(x,tr=tr))
bot=(h-1)*sqrt(n)
se=top/bot
se
}
winci<-function(x,tr=.2,alpha=.05,null.value=0,pr=T){
#
#  Compute a 1-alpha confidence interval for the Winsorized mean
#
#  The default amount of  Winsorizing is tr=.2
#
if(pr){
print("The p-value returned by the this function is based on the")
print("null value specified by the argument null.value, which defaults to 0")
}
x<-elimna(x)
se<-winse(x,tr=tr)
df<-length(x)-2*floor(tr*length(x))-1
trimci<-winmean(x,tr)-qt(1-alpha/2,df)*se
trimci[2]<-winmean(x,tr)+qt(1-alpha/2,df)*se
test<-(winmean(x,tr)-null.value)/se
sig<-2*(1-pt(abs(test),df))
list(ci=trimci,test.stat=test,p.value=sig)
}

# Multivariate Regression
# Input:
#   x: data-matrix (n,p)
#   y: data-matrix (n,q)
#   gamma: proportion of trimming
#   arguments:
#     - ns : contains number of subsets; default=5000
#     - nc : number of C-steps; default=10
#     - delta : critical value for Reweighted estimator, deafult=0.01
# Output:
#     beta : matrix (p,q) of MLTS-regression coefficients
#     sigma: matrix (q,q) containing MLTS-residual covariance
#     dres : residual distances (n,1) w.r.t. initial fit
#     betaR : matrix (p,q) of RMLTS-regression coefficients
#     sigmaR: matrix (q,q) containing RMLTS-residual covariance
#     dresR : residual distances (n,1) w.r.t. RMLTS
# Remark: if intercept needed, add a column of ones to the x-matrix
#
# Ref: Agullo,J., Croux, C., and Van Aelst, S. (2008)
#      The Multivariate Least Trimmed Squares Estimator,
#      Journal of multivariate analysis, 99, 311-338.
#
# Author: Kristel Joossens
#
#
###########
# EXAMPLE #
###########
# n=1000;
# p=5;
# q=2;
# x = cbind(rep(1,n),array(rnorm(n*(p-1)),dim=c(n,p-1)))
# beta = cbind(c(2,1,4,2,1),c(5,2,1,.5,2))
# y = x %*% beta +array(rnorm(n*q),dim=c(n,q))
# gamma = 0.25
# out = mlts(x,y,gamma)
################################################################################
mlts <- function(x,y,gamma,ns=500,nc=10,delta=0.01)
{
  d <- dim(x); n <- d[1]; p <- d[2]
  q <- ncol(y)
  h <- floor(n*(1-gamma))+1
  obj0 <- 1e10
  for (i in 1:ns)
  { sorted <- sort(runif(n),na.last = NA,index.return=TRUE)
    istart <- sorted$ix[1:(p+q)]
    xstart <- x[istart,]
    ystart <- y[istart,]
    bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart)
    sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/q
    for (j in 1:nc)
    { res  <-  y - x %*% bstart
      tres <- t(res)
      dist2 <- colMeans(solve(sigmastart,tres)*tres)
      sdist2 <- sort(dist2,na.last = NA,index.return = TRUE)
      idist2 <- sdist2$ix[1:h]
      xstart <- x[idist2,]
      ystart <- y[idist2,]
      bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart)
      sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/(h-p)
    }
    obj <- det(sigmastart)
    if (obj < obj0)
    { result.beta <- bstart
      result.sigma <- sigmastart
      obj0 <- obj
    }
  }
  cgamma <- (1-gamma)/pchisq(qchisq(1-gamma,q),q+2)
  result.sigma <- cgamma * result.sigma
  res <- y - x %*% result.beta
  tres<-t(res)
  result.dres <- colSums(solve(result.sigma,tres)*tres)
  result.dres <- sqrt(result.dres)

  qdelta <- sqrt(qchisq(1-delta,q))
  good  <- (result.dres <= qdelta)
  xgood <- x[good,]
  ygood <- y[good,]
  result.betaR <- solve(t(xgood)%*%xgood,t(xgood)%*%ygood)
  result.sigmaR <- (t(ygood-xgood%*%result.betaR)) %*%
    (ygood-xgood%*%result.betaR)/(sum(good)-p)
  cdelta <- (1-delta)/pchisq(qdelta^2,q+2)
  result.sigmaR<-cdelta*result.sigmaR
  resR<-y-x%*%result.betaR
  tresR<-t(resR)
  result.dresR <- colSums(solve(result.sigmaR,tresR)*tresR)
  result.dresR <- sqrt(result.dresR)
  list(beta=result.beta,sigma=result.sigma,dres=result.dres,
    betaR=result.betaR,sigmaR=result.sigmaR,dresR=result.dresR)
}
MULtsreg<-function(x,y,tr=.2,RMLTS=T){
# Multivariate Least Trimmed Squares Estimator
# Input:
#   x: data-matrix (n,p)
#   y: data-matrix (n,q)
#   tr: proportion of trimming
#   This function calls an R function written by Kristel Joossens
#
# Output:
#     If MLTS=T coef: matrix (p,q) of MLTS-regression coefficients
#     IF MLTS=F betaR : matrix (p,q) of RMLTS-regression coefficients
#
# Ref: Agullo,J., Croux, C., and Van Aelst, S. (2008)
#      The Multivariate Least Trimmed Squares Estimator,
#      Journal of multivariate analysis, 99, 311-338.
#
x=as.matrix(x)
xy=elimna(cbind(x,y))
xx=as.matrix(cbind(rep(1,nrow(xy)),xy[,1:ncol(x)]))
p1=ncol(x)+1
y=as.matrix(xy[,p1:ncol(xy)])
outp=mlts(xx,y,tr)
if(!RMLTS)coef=outp$beta
if(RMLTS)coef=outp$betaR
list(coef=coef)
}

t1wayv2<-function(x,tr=.2,grp=NA,MAT=FALSE,lev.col=1,var.col=2,nboot=100,SEED=TRUE,pr=TRUE,IV=NULL,loc.fun=median){
#
# Same a t1way, but computes explanatory power and related effect size
#
# For n1!=n2, this function calls t1way.effect.
#
#  A heteroscedastic one-way ANOVA for trimmed means
#  using a generalization of Welch's method.
#
#  The data are assumed to be stored in $x$ in a matrix or in list mode.
#
# MAT=F, if x is a matrix, columns correspond to groups.
# if MAT=T, assumes argument
# lev.col
# indicates which column of x denotes the groups. And
#  var.col indicates the column where the data are stored.
#
#  IV, if specified, taken to be the independent variable
#      That is, the group id values
#      and x is assumed to be a vector containing all of the data
#
# if x has list mode:
#  length(x) is assumed to correspond to the total number of groups.
#  By default, the null hypothesis is that all groups have a common mean.
#  To compare a subset of the groups, use grp to indicate which
#  groups are to be compared. For example, if you type the
#  command grp<-c(1,3,4), and then execute this function, groups
#  1, 3, and 4 will be compared with the remaining groups ignored.
#
#  Missing values are automatically removed.
#
library(MASS)
if(SEED)set.seed(2)
if(MAT){
if(!is.matrix(x))stop("With MAT=T, data must be stored in a matrix")
if(length(lev.col)!=1)stop("Argument lev.col should have 1 value")
temp=selby(x,lev.col,var.col)
x=temp$x
grp2=rank(temp$grpn)
x=x[grp2]
}
if(!is.null(IV[1])){
if(pr)print("Assuming x is a vector containing all of the data, the dependent variable")
xi=elimna(cbind(x,IV))
x=fac2list(xi[,1],xi[,2])
}
if(is.matrix(x))x<-listm(x)
if(is.na(sum(grp[1])))grp<-c(1:length(x))
if(!is.list(x))stop("Data are not stored in a matrix or in list mode.")
J<-length(grp)
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
pts=NULL
nval=0
for(j in 1:J)x[[j]]=elimna(x[[j]])
for(j in 1:J){
val<-x[[j]]
val<-elimna(val)
nval[j]=length(val)
pts=c(pts,val)
x[[j]]<-val # missing values have been removed
h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
   # h is the number of observations in the jth group after trimming.
w[j]<-h[j]*(h[j]-1)/((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr))
xbar[j]<-mean(x[[grp[j]]],tr)
}
u<-sum(w)
xtil<-sum(w*xbar)/u
A<-sum(w*(xbar-xtil)^2)/(J-1)
B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1)
TEST<-A/(B+1)
nu1<-J-1
nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1))
sig<-1-pf(TEST,nu1,nu2)
nv=lapply(x,length)
#
# Determine explanatory effect size
#
chkn=var(nval)
if(chkn==0){
top=var(xbar)
bot=winvarN(pts,tr=tr)
e.pow=top/bot
}
if(chkn!=0){
vals=0
N=min(nval)
xdat=list()
for(i in 1:nboot){
for(j in 1:J){
xdat[[j]]=sample(x[[j]],N)
vals[i]=t1way.effect(xdat,tr=tr)$Var.Explained
}}
e.pow=loc.fun(vals,na.rm=TRUE)
}
list(TEST=TEST,nu1=nu1,nu2=nu2,n=nv,p.value=sig,Var.Explained=e.pow,
Effect.Size=sqrt(e.pow))
}




pool.a.list<-function(x){
#
# x has list mode. Pool all of the data into a single R variable.
#
if(!is.list(x))stop("x should have list mode")
pts=NULL
for(j in 1:length(x))pts=c(pts,x[[j]])
pts
}
esfun<-function(x,tr=.2,nboot=100,SEED=TRUE){
#
#  Compute a heteroscedastic robust measure of effect size
#  based on a robust analog of explanatory power
#
#  A one-way design is assumed if J=NULL.
#
#  Can be used with independent or dependent groups.
#  The same effect size reported by t1wayv2 is computed.
#
if(is.data.frame(x))x=as.matrix(x)
if(SEED)set.seed(2)
if(is.matrix(x))x<-listm(x)
J<-length(x)
xbar=NA
nval=NA
pts=NULL
for(j in 1:J){
nval[j]=length(x[[j]])
xbar[j]<-mean(x[[j]],tr=tr)
}
pts=pool.a.list(x)
chkn=var(nval)
if(chkn==0){
top=var(xbar)
bot=winvarN(pts,tr=tr)
e.pow=top/bot
}
if(chkn!=0){
if(SEED)set.seed(2)
vals=0
N=min(nval)
xdat=list()
for(i in 1:nboot){
for(j in 1:J){
xdat[[j]]=sample(x[[j]],N)
vals[i]=t1way.effect(xdat,tr=tr)$Var.Explained
}}
e.pow=mean(vals)
}
list(Effect.Size=sqrt(e.pow))
}

esmcp<-function(x,tr=.2,nboot=100,SEED=TRUE){
#
#  Compute a heteroscedastic robust measure of effect size
#  based on a robust analog of explanatory power
#
#  A one-way design is assumed
#
#  Can be used with independent or dependent groups.
#  The same effect size reported by yuenv2 is computed.
#
#  OUTPUT: effect size for all pairs of groups
#
if(is.data.frame(x))x=as.matrix(x)
if(SEED)set.seed(2)
if(is.matrix(x))x<-listm(x)
J<-length(x)
JALL=(J^2-J)/2
est=matrix(NA,JALL,3)
dimnames(est)=list(NULL,c("Group","Group","Effect Size"))
ic=0
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic=ic+1
est[ic,1]=j
est[ic,2]=k
est[ic,3]=yuenv2(x[[j]],x[[k]],SEED=SEED,tr=tr,nboot=nboot)$Effect.Size
}}}
list(Estimates=est)
}

l2v<-function(x){
#
#  combine data in list mode to a single
#  vector
if(!is.list(x))stop("x does not have list mode")
y=NULL
for(i in 1:length(x))y=c(y,x[[i]])
y
}
RANGE<-function(x){
#
#  compute the range (max - min) using data in x
#
r=diff(range(x))
r
}

Quart<-function(x){
# compute the quartiles
 temp=summary(x)
 Qv=c(temp[2],temp[3],temp[5])
 Qv
 }
olsMUL<-function(x,y,xout=FALSE,outfun=out){
#
# y is assumed to be a matrix. For each
#  outcome variable (for each column of  y), this function returns
# confidence intervals for each slope parameter and intercept.
#
# Performs OLS regression calling built-in R or S+ funtions.
#
# xout=T will eliminate any leverage points (outliers among x values)
#
m<-elimna(cbind(x,y))
x<-as.matrix(x)
p<-ncol(x)
pp<-ncol(m)
p1=p+1
x<-m[,1:p]
y<-m[,p:pp]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,pp]
}
x<-as.matrix(x)
y<-as.matrix(y)
py=ncol(y)
for(j in 1:py){
print(paste("Response", j))
temp<-summary(lm(y[,j]~x))
coef<-temp[4]$coefficients
Ftest<-temp[10]$fstatistic
Ftest.p.value<-1-pf(Ftest[1],Ftest[2],Ftest[3])
Rval=Rsq(x,y[,j])
print(list(coef=coef,Ftest.p.value=Ftest.p.value,R.squared=Rval))
}
}

prplot<-function(x,y,pval=ncol(x),regfun=tsreg,fr=.8,est=onestep,op=1,
xlab="X",ylab="Residuals",xout=FALSE,outfun=out,...){
#
# Goal: check for curvature associated with predictor
# indicated by pval.
# This is done by creating a partial residual plot.
# That is subtracting out the linear prediction based
# on the other predictors and then
# smooth the result versus the predictor in the column of x indicated by pval
#
x=as.matrix(x)
p=ncol(x)
p1=p+1
temp=elimna(cbind(x,y))
x=temp[,1:p]
y=temp[,p1]
if(xout){
flag<-outfun(x,...)$keep
x<-as.matrix(x)
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(!is.matrix(x))stop("Should have two or more variables stored in a matrix")
flag<-rep(T,ncol(x))
flag[pval]<-F
temp<-regfun(x[,flag],y)$residual
if(op!=1)rungen(x[,!flag],temp,est=est,fr=fr,xlab=xlab,ylab=ylab,...)
if(op==1)lplot(x[,!flag],temp,xlab=xlab,ylab=ylab)
}
adpchk<-function(x,y,adfun=adrun,gfun=runm3d,xlab="Additive Fit",
ylab="Gen. Fit",plotfun=lplot,xout=FALSE,outfun=out,...){
#
# Compare adrun estimate to runm3d
#
x=as.matrix(x)
p=ncol(x)
p1=p+1
temp=elimna(cbind(x,y))
x=temp[,1:p]
y=temp[,p1]
if(xout){
flag<-outfun(x,...)$keep
x<-as.matrix(x)
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
fit1<-adfun(x,y,pyhat=TRUE,plotit=FALSE,...)
if(is.list(fit1))fit1=fit1[[length(fit1)]]
fit2<-gfun(x,y,pyhat=TRUE,plotit=FALSE,pr=FALSE,...)
if(is.list(fit2))fit2=fit2[[length(fit2)]]
plotfun(fit1,fit2,xlab=xlab,ylab=ylab)
abline(0,1)
}

qrchk<-function(x,y,qval=.5,nboot=1000,com.pval=FALSE,SEED=TRUE,alpha=.05,pr=TRUE,
xout=FALSE,outfun=out,chk.table=FALSE,...){
#
# Test of a linear fit based on quantile regression
# The method stems from He and Zhu 2003, JASA, 98, 1013-1022.
# Here, resampling is avoided using approximate critical values if
# com.pval=F
#
#  To get a p-value, via simulations, set  com.pval=T
#  nboot is number of simulations used to determine p-value.
#  Execution time can be quite high
#
#  This function quickly determines .1, .05, .025 and .01
#  critical values for n<=400 and p<=6 (p= number of predictors)
#  and when dealing with the .5 quantile.
#  Otherwise, critical values are determined via simulations, which
#  can have high execution time.
#
if(pr){
if(!com.pval)print("To get a p-value, set com.pval=T")
print("Reject if test statistic is >= critical value")
}
x<-as.matrix(x)
p<-ncol(x)
pp1<-p+1
yx<-elimna(cbind(y,x)) #Eliminate missing values.
y<-yx[,1]
x<-yx[,2:pp1]
store.it=F
x<-as.matrix(x)
p.val<-NULL
crit.val<-NULL
x<-as.matrix(x)
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
# shift the  marginal x values so that the test statistic is
# invariant under changes in location
n<-length(y)
x=standm(x)
if(p<=6){
if(qval==.5){
aval<-c(.1,.05,.025,.01)
aokay<-duplicated(c(alpha,aval))
aokay<-sum(aokay)
if(aokay>0){
crit10<-matrix(c(.0254773,.008372,.00463254,.0023586,.000959315,.00042248,
.00020069,
.039728,.012163,.0069332,.0036521,.001571,.0006882, .0003621,
.055215,.0173357,.009427,.004581,.0021378,.00093787,.00045287,
.075832,.0228556,.0118571,.005924,.00252957,.0011593,.00056706,
.103135,.0298896,.0151193,.0073057,.00305456,.0014430,.000690435,
.12977,.03891,.018989,.009053,.0036326,.001617,.000781457),ncol=6,nrow=7)
crit05<-matrix(c(.031494,.010257,.00626,.00303523,.0012993,.000562247,
.00025972,
.046296,.015066,.00885556,.0045485,.0110904,.00086946,.000452978,
.063368,.0207096546,.010699,.005341,.0025426,.0011305,.000539873,
.085461,.027256,.014067,.0071169,.002954,.0013671,.000660338,
.11055,.03523,.017511,.0084263,.0036533,.0016338,.00081289,
.13692,.043843,.0222425,.0102265,.004283,.0019,.000907241),ncol=6,nrow=7)
crit025<-matrix(c(.0361936,.012518,.007296,.0036084,.00172436,.000725365,
.000327776,
.05315,.017593,.0102389,.0055043,.00227459,.0010062,.000523526,
.07214,.023944,.013689,.0060686,.0028378,.00136379,.000635645,
.093578,.0293223,.0156754,.0086059,.0035195,.001694,.00074467,
.118414,.03885,.0201468,.0094298,.0040263,.00182437,.000916557,
.14271,.047745,.0253974,.011385,.004725,.00207588,.0010191),ncol=6,nrow=7)
crit01<-matrix(c(.0414762,.0146553,.0098428,.0045274,.00219345,.00096244,
.000443827,
.058666,.020007,.01129658,.0063092,.002796,.0011364,.000628054,
.079446,.0267958,.015428,.0071267,.0034163,.0015876,.000734865,
.102736,.0357572,.017786,.0093682,.0042367,.0019717,.000868506,
.125356,.041411,.0234916,.0106895,.0047028,.0020759,.00101052,
.14837,.053246,.027759,.012723,.00528,.002437,.00116065),ncol=6,nrow=7)
if(alpha==.1)critit<-crit10
if(alpha==.05)critit<-crit05
if(alpha==.025)critit<-crit025
if(alpha==.01)critit<-crit01
nvec<-c(10,20,30,50,100,200,400)
nval<-duplicated(c(n,nvec))
nval<-nval[2:7]
if(sum(nval)>0)crit.val<-critit[nval,p]
if(is.null(crit.val)){
if(n<=400){
loc<-rank(c(n,nvec))
xx<-c(1/nvec[loc[1]-1]^1.5,1/nvec[loc[1]]^1.5)
yy<-c(critit[loc[1]-1,p],critit[loc[1],p])
}
icoef<-lsfit(xx,yy)$coef
crit.val<-icoef[1]+icoef[2]/n^1.5
}}}}
if(is.null(crit.val)){
# no critical value found
if(!com.pval){
print("Critical values not available, will set com.pval=T")
print("and compute a p-value")
com.pval<-T
}}
gdot<-cbind(rep(1,n),x)
gdot<-ortho(gdot)
x<-gdot[,2:pp1]
x<-as.matrix(x)
temp<-rqfit(x,y,qval=qval,res=TRUE)
coef<-temp$coef
psi<-NA
psi<-ifelse(temp$residuals>0,qval,qval-1)
rnmat<-matrix(0,nrow=n,ncol=pp1)
ran.mat<-apply(x,2,rank)
flagvec<-apply(ran.mat,1,max)
for(j in 1:n){
flag<-ifelse(flagvec<=flagvec[j],TRUE,FALSE)
flag<-as.numeric(flag)
rnmat[j,]<-apply(flag*psi*gdot,2,sum)
}
rnmat<-rnmat/sqrt(n)
temp<-matrix(0,pp1,pp1)
for(i in 1:n)temp<-temp+rnmat[i,]%*%t(rnmat[i,])
temp<-temp/n
test<-max(eigen(temp)$values)
if(com.pval){
if(SEED)set.seed(2)
p.val<-0
rem<-0
for(i in 1:nboot){
yboot<-rnorm(n)
if(p==1)xboot<-rnorm(n)
if(p>1)xboot<-rmul(n,p=p)
temp3<-qrchkv2(xboot,yboot,qval=qval)
if(test>=temp3)p.val<-p.val+1
rem[i]<-temp3
}
ic10<-round(.9*nboot)
ic05<-round(.95*nboot)
ic025<-round(.975*nboot)
ic001<-round(.99*nboot)
rem<-sort(rem)
p.val<-1-p.val/nboot
# now remember the critical values by storing them in "qrchk.crit"
print("The .1, .05, .025 and .001 critical values are:")
print(c(rem[ic10],rem[ic05],rem[ic025],rem[ic001]))
crit.val<-rem[ic05]
}
de="Fail to reject"
if( test>=crit.val)de="Reject"
list(test.stat=test,crit.value=crit.val,p.value=p.val,Decision=de)
}

qhomt<-function(x,y,nboot=100,alpha=.05,qval=c(.2,.8),plotit=TRUE,SEED=TRUE,
xlab="X",ylab="Y",xout=FALSE,outfun=outpro,pr=TRUE,...){
#
#   Test hypothesis that the error term is  homogeneous by
#   computing a confidence interval for beta_1-beta_2, the
#   difference between the slopes of the qval[2] and qval[1]
#   regression slopes, where qval[1] and qval[2] are
#   the quantile regression slopes
#   estimated via the Koenker-Bassett method.
#   So by default, use the .8 quantile slope minus the
#   the .2 quantile slope.
#
if(length(qval)!=2)stop("Argument qval should have 2 values exactly")
x<-as.matrix(x)
if(ncol(x)!=1)stop("Only one predictor is allowed; use qhomtv2")
xy<-elimna(cbind(x,y))
x<-xy[,1]
x<-as.matrix(x)
y<-xy[,2]
if(xout){
flag<-outfun(x,...)$keep
x<-as.matrix(x)
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,qhomtsub,x,y,qval) # An nboot vector.
se<-sqrt(var(bvec))
temp<-qplotreg(x,y,qval=qval,plotit=plotit,xlab=xlab,ylab=ylab)
crit<-qnorm(1-alpha/2)
crit.ad<-NA
dimnames(temp)=c(NULL,NULL)
dif<-temp[2,2]-temp[1,2]
regci<-NA
regci[1]<-dif-crit*se
regci[2]<-dif+crit*se
sig.level<-2*(1-pnorm(abs(dif)/se))
regci.ad<-NA
if(alpha==.05 && qval[1]==.2 && qval[2]==.8)crit.ad<-qnorm(0-.09/sqrt(length(y))+.975)
ci.ad<-c(dif-crit.ad*se,dif+crit.ad*se)
list(slope.bottom=temp[1,2],slope.top=temp[2,2],
dif.est=dif,dif.ci=regci,sig.level=sig.level,se=se,adjusted.ci=ci.ad)
}



runmean2g<-function(x1,y1,x2,y2,fr=.8,est=tmean,xlab="X",ylab="Y",SCAT=TRUE,
sm=FALSE,nboot=40,SEED=TRUE,eout=FALSE,xout=FALSE,outfun=out,LP=TRUE,...){
#
# Plot of running interval smoother for two groups
#
# fr controls amount of smoothing
# tr is the amount of trimming
#
# Missing values are automatically removed.
#
# sm=T results in using bootstrap bagging when estimating the regression line
# nboot controls number of bootstrap samples
#
m<-elimna(cbind(x1,y1))
if(eout && xout)stop("Not allowed to have eout=xout=T")
if(eout){
flag<-outfun(m,plotit=FALSE)$keep
m<-m[flag,]
}
x1<-m[,1]
y1<-m[,2]
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
}
m<-elimna(cbind(x2,y2))
if(eout && xout)stop("Not allowed to have eout=xout=T")
if(eout){
flag<-outfun(m,plotit=FALSE)$keep
m<-m[flag,]
}
x2<-m[,1]
y2<-m[,2]
if(xout){
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
ord1=order(x1)
x1=x1[ord1]
y1=y1[ord1]
ord2=order(x2)
x2=x2[ord2]
y2=y2[ord2]
if(!sm){
temp<-rungen(x1,y1,est=est,fr=fr,pyhat=TRUE,plotit=FALSE,xout=FALSE,eout=FALSE,LP=LP,...)
rmd1<-temp[1]$output
}
if(sm){
temp<-runmbo(x1,y1,est=est,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED,
nboot=nboot,eout=FALSE,xout=FALSE,...)
rmd1<-temp
}
if(!sm){
temp<-rungen(x2,y2,fr=fr,est=est,pyhat=TRUE,plotit=FALSE,xout=FALSE,eout=FALSE,LP=LP,...)
rmd2<-temp[1]$output
}
if(sm){
temp<-runmbo(x2,y2,est=est,fr=fr,pyhat=TRUE,plotit=FALSE,SEED=SEED,
nboot=nboot,eout=FALSE,xout=FALSE,...)
rmd2<-temp
}
plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab)
sx1<-sort(x1)
sx2<-sort(x2)
xorder1<-order(x1)
xorder2<-order(x2)
sysm1<-rmd1[xorder1]
sysm2<-rmd2[xorder2]
if(LP){
sysm1=lplot(sx1,sysm1[xorder1],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
#sysm1=lplot(x1[xorder1],sysm1,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
sysm2=lplot(sx2,sysm2,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
}
if(SCAT)points(x1,y1)
if(!SCAT)points(x1,y1,type="n")
if(SCAT)points(x2,y2,pch="+")
lines(sx1,sysm1)
lines(sx2,sysm2,lty=2)
}
ancovamp<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pts=NA,SEED=T){
#
# Compare two independent  groups using the ancova method.
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
# Design points are chosen based on depth of points in x1 if pts=NA
#  Assume data are in x1 y1 x2 and y2
#
if(SEED)set.seed(2) # now cov.mve always returns same result
x1=as.matrix(x1)
p=ncol(x1)
p1=p+1
m1=elimna(cbind(x1,y1))
x1=m1[,1:p]
y1=m1[,p1]
x2=as.matrix(x2)
p=ncol(x2)
p1=p+1
m2=elimna(cbind(x2,y2))
x2=m2[,1:p]
y2=m2[,p1]
#
#
#
if(is.na(pts[1])){
x1<-as.matrix(x1)
pts<-ancdes(x1)
}
pts<-as.matrix(pts)
if(nrow(pts)>=29){
print("WARNING: More than 28 design points")
print("Only first 28 are used.")
pts<-pts[1:28,]
}
n1<-1
n2<-1
vecn<-1
mval1<-cov.mve(x1)
mval2<-cov.mve(x2)
for(i in 1:nrow(pts)){
n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)])
n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)])
}
flag<-rep(T,nrow(pts))
for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F
pts<-pts[flag,]
if(sum(flag)==1)pts<-t(as.matrix(pts))
if(sum(flag)==0)stop("No comparable design points found, might increase span.")
mat<-matrix(NA,nrow(pts),8)
dimnames(mat)<-list(NULL,c("n1","n2","DIF","TEST","se","ci.low","ci.hi","p.value"))
for (i in 1:nrow(pts)){
g1<-y1[near3d(x1,pts[i,],fr1,mval1)]
g2<-y2[near3d(x2,pts[i,],fr2,mval2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test<-yuen(g1,g2,tr=tr)
mat[i,1]<-length(g1)
mat[i,2]<-length(g2)
if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i,]))
if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i,]))
mat[i,3]<-test$dif
mat[i,4]<-test$teststat
mat[i,5]<-test$se
mat[i,8]<-test$p.value
if(nrow(pts)>=2)critv<-smmcrit(test$df,nrow(pts))
if(nrow(pts)==1)critv<-qt(.975,test$df)
cilow<-test$dif-critv*test$se
cihi<-test$dif+critv*test$se
mat[i,6]<-cilow
mat[i,7]<-cihi
}
list(points=pts,output=mat,crit=critv)
}

rplot2g<-runmean2g

Qancsm<-function(x1,y1,x2,y2,crit.mat=NULL,nboot=200,SEED=TRUE,REP.CRIT=FALSE,
qval=.5,q=NULL,xlab="X",ylab="Y",plotit=TRUE,pr=TRUE,xout=FALSE,outfun=out,...){
#
# Compare two nonparametric
# regression lines corresponding to two independent groups
#  using the depths of smooths.
#
# NULL hypothesis: regression lines are identical in terms of the median
# of Y, given$X, for all X
# The method is based on comparing the depth of the fitted regression lines
# and is essentially a slight variation of the method in Wilcox
# (in press) Journal of Data Science.
#
# One covariate only is allowed.
#
if(ncol(as.matrix(x1))>1)stop("One covariate only is allowed")
if(!is.null(q))qval=q
if(xout){
flag1=outfun(x1)$keep
flag2=outfun(x2)$keep
x1=x1[flag1]
y1=y1[flag1]
x2=x2[flag2]
y2=y2[flag2]
}
if(SEED)set.seed(2)
xy=elimna(cbind(x1,y1))
x1=xy[,1]
xord=order(x1)
x1=x1[xord]
y1=xy[xord,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
xord=order(x2)
x2=x2[xord]
y2=xy[xord,2]
n1=length(y1)
n2=length(y2)
if(is.null(crit.mat[1])){
if(pr)print("Determining critical value. This might take a while")
crit.val=NA
yall=c(y1,y2)
xall=c(x1,x2)
nn=n1+n2
il=n1+1
for(i in 1:nboot){
data=sample(nn,nn,T)
yy1=yall[data[1:n1]]
yy2=yall[data[il:nn]]
xx1=xall[data[1:n1]]
xx2=xall[data[il:nn]]
crit.mat[i]=Qdepthcom(xx1,yy1,xx2,yy2,qval=qval)
}}
dep=Qdepthcom(x1,y1,x2,y2,qval=qval)
pv=1-mean(crit.mat<dep)
if(!REP.CRIT)crit.mat=NULL
if(plotit){
plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab)
temp1=cobs(x1,y1,print.mesg=FALSE,print.warn=FALSE,tau=qval)
temp2=cobs(x2,y2,print.mesg=FALSE,print.warn=FALSE,tau=qval)
points(x1,y1)
points(x2,y2,pch="+")
lines(x1,temp1$fitted)
lines(x2,temp2$fitted,lty=2)
}
list(p.value=pv,crit.mat=crit.mat,test.depth=dep)
}


Kmeans<-function(x,k,xout=FALSE,outfun=out){
#
# Do K means cluster analysis, outliers removed
#  if xout=T
x=elimna(x)
x=as.matrix(x)
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
x<-x[flag,]
}
x=as.matrix(x)
res=kmeans(x,k)
res
}

TKmeans<-function(x,k,trim=0.1,scaling=FALSE,runs=100,points=NULL,
countmode=runs+1,printcrit=FALSE,maxit = 2 * nrow(as.matrix(data))){
#
# Cluster analysis using trimmed k means method.
# This function merely accesses the R package trimcluster
# It uses the same default values for the arguments and it
# performs casewise deletion of missing values
#
x=elimna(x)
library(trimcluster)
res=trimkmeans(x,k=k,trim=trim,scaling=scaling,runs=runs,points=points,
countmode=countmode,printcrit=printcrit,maxit=maxit)
res
}


TKmeans.grp<-function(x,k,y){
#
# Create k groups based on data in x using trimmed k means method
#  Then sort the data in y into k groups based on the groups indicated
#
x=elimna(x)
x=as.matrix(x)
y=as.matrix(y)
res=Kmeans(x,k)
grpid=res[[1]]
grpdat=list()
for(i in 1:k){
flag=(grpid==i)
grpdat[[i]]=y[flag,]
}
grpdat
}


Kmeans.grp<-function(x,k,y,xout=FALSE,outfun=out){
#
#
# x and y are assumed to be matrices, each having n rows.
# This function create k groups based on data in x
#  Then it sorts the data in y into k groups based as indicated
#  by k-means cluter method applied to x
#
x=elimna(x)
x=as.matrix(x)
y=as.matrix(y)
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
x<-x[flag,]
y=y[flag,]
}
y=as.matrix(y)
res=kmeans(x,k)
grpid=res[[1]]
grpdat=list()
for(i in 1:k){
flag=(grpid==i)
grpdat[[i]]=y[flag,]
}
grpdat
}


lplot2g<-function(x1,y1,x2,y2,fr=.8,est=tmean,xlab="X",ylab="Y",xout=FALSE,eout=FALSE,
outfun=out,...){
#
# Plot of running interval smoother for two groups
#
# fr controls amount of smoothing
# tr is the amount of trimming
#
# Missing values are automatically removed.
#
# sm=T results in using bootstrap bagging when estimating the regression line
# nboot controls number of bootstrap samples
#
m1<-elimna(cbind(x1,y1))
if(eout && xout)stop("Can't have both eout and xout = F")
if(eout){
flag<-outfun(m1,plotit=FALSE,...)$keep
m1<-m1[flag,]
}
if(xout){
flag<-outfun(m1[,1],plotit=FALSE,...)$keep
m1<-m1[flag,]
}
x1<-m1[,1]
y1<-m1[,2]
m2<-elimna(cbind(x2,y2))
if(eout){
flag<-outfun(m2,plotit=FALSE,...)$keep
m2<-m2[flag,]
}
if(xout){
flag<-outfun(m2[,1],plotit=FALSE,...)$keep
m2<-m2[flag,]
}
x2<-m2[,1]
y2<-m2[,2]

flag=order(x1)
x1=x1[flag]
y1=y1[flag]
flag=order(x2)
x2=x2[flag]
y2=y2[flag]
temp1<-lplot(x1,y1,pyhat=TRUE,plotit=FALSE)$yhat.values
temp2<-lplot(x2,y2,pyhat=TRUE,plotit=FALSE)$yhat.values
plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab)
points(x1,y1)
points(x2,y2,pch="+")
lines(x1,temp1)
lines(x2,temp2,lty=2)
}
smstrcom<-function(x1,y1,x2,y2,nboot=200,pts=NA,plotit=TRUE,
SEED=TRUE,varfun=pbvar,fr=.8,xout=FALSE,outfun=out,...){
#
# Compare the association of the two variables x1 and y1 to the
# association between x2 and y2
# (two independent  groups) using a robust measure of the
# strength of the association associated with Cleveland's LOWESS smoother.
#
#  Assume data are in x1 y1 x2 and y2
#
# Reject at the .05 level if the reported p-value is less than or
# equal to p.crit, which is returned by the function.
#
m<-elimna(cbind(x1,y1))
x1<-m[,1]
y1<-m[,2]
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
}
m<-elimna(cbind(x2,y2))
x2<-m[,1]
y2<-m[,2]
if(xout){
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
if(SEED)set.seed(2)
estmat1=NA
estmat2=NA
data1<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
#
for(ib in 1:nboot){
estmat1[ib]=lplotv2(x1[data1[ib,]],y1[data1[ib,]],plotit=FALSE,
varfun=varfun)$Strength.Assoc
estmat2[ib]=lplotv2(x2[data2[ib,]],y2[data2[ib,]],
varfun=varfun,plotit=FALSE)$Strength.Assoc
}
dif<-(estmat1<estmat2)
dif0<-(estmat1==estmat2)
p.value=mean(dif)+.5*mean(dif0)
p.value=2*min(c(p.value,1-p.value))
n1=length(y1)
n2=length(y2)
p1=.05
p2=.05
temp1=tsreg(c(100,200),c(.08,.05),SEED=F)$coef
temp2=tsreg(c(50,100),c(.21,.08),SEED=F)$coef
temp3=tsreg(c(30,50),c(.3,.21),SEED=F)$coef
if(n1<200)p1=temp1[1]+temp1[2]*n1
if(n1<100)p1=temp2[1]+temp2[2]*n1
#if(n1<50)p1=temp3[1]+temp3[2]*n1
#if(n1<30)p1=.3
if(n2<200)p2=temp1[1]+temp1[2]*n2
if(n2<100)p2=temp2[1]+temp2[2]*n2
pcrit=(n2*p1+n1*p2)/(n1+n2)
names(pcrit)=NULL
est1=lplotv2(x1,y1,plotit=FALSE,varfun=varfun)$Strength.Assoc
est2=lplotv2(x2,y2,plotit=FALSE,varfun=varfun)$Strength.Assoc
if(plotit)runmean2g(x1,y1,x2,y2,fr=fr,est=tmean,sm=T)
list(p.value=p.value,pcrit.05=pcrit,cor1=est1,cor2=est2)
}






pcorbv4<-function(x,y,SEED=TRUE){
#   Compute a .95 confidence interval for Pearson's correlation coefficient.
#
#   This function uses an adjusted percentile bootstrap method that
#   gives good results when the error term is heteroscedastic.
#
#   WARNING: If the number of boostrap samples is altered, it is
#   unknown how to adjust the confidence interval when n < 250.
#   (An obvious guess seems to work well, but no formal investigations
#    have been performed.)
#
nboot<-599  #Number of bootstrap samples
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
xy<-elimna(cbind(x,y))
x<-xy[,1]
y<-xy[,2]
#print("Taking bootstrap samples; please wait")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,pcorbsub,x,y) # A 1 by nboot matrix.
ilow<-13
ihi<-586
if(length(y) < 250){
ilow<-12
ihi<-587
}
if(length(y) < 180){
ilow<-11
ihi<-588
}
if(length(y) < 80){
ilow<-8
ihi<-592
}
if(length(y) < 40){
ilow<-7
ihi<-593
}
bsort<-sort(bvec)
r<-cor(x,y)
ci<-c(bsort[ilow],bsort[ihi])
list(r=r,ci=ci)
}


mlrreg.Stest<-function(x,y,nboot=100,SEED=TRUE){
#
#  Test hypothesis that all slopes=0  based on Rousseeuw et al.
#  multivariate regression estimator
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Hotelling type test.
#
if(SEED)set.seed(2)
est=as.vector(mlrreg(x,y)$coef[-1,])
n=nrow(x)
JK=ncol(x)*ncol(y)
vals=matrix(0,nrow=nboot,ncol=JK)
for(i in 1:nboot){
bsam=sample(n,replace=TRUE)
vals[i,]=as.vector(mlrreg(x[bsam,],y[bsam,])$coef[-1,])
}
Sv=cov(vals)
est=as.matrix(est)
k=1/JK
test <- k * crossprod(est, solve(Sv, est))[1, ]
v1=JK-1
v2=n-JK
pval=1-pf(test,v1,v2)
list(test.stat=test,p.value=pval,est=est)
}
mlrGtest<-function(x,y,regfun=mlrreg,nboot=100,SEED=TRUE){
#
#  Test hypothesis that all slopes=0  based 
#  on some multivariate regression estimator.
#  By default the Rousseeuw et al.
#  multivariate regression estimator (RADA) is used.
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Hotelling type test.
#
if(SEED)set.seed(2)
estall=regfun(x,y)$coef
est=as.vector(estall[-1,])
n=nrow(x)
JK=ncol(x)*ncol(y)
vals=matrix(0,nrow=nboot,ncol=JK)
for(i in 1:nboot){
bsam=sample(n,replace=TRUE)
vals[i,]=as.vector(regfun(x[bsam,],y[bsam,])$coef[-1,])
}
Sv=cov(vals)
est=as.matrix(est)
k=1/JK
test <- k * crossprod(est, solve(Sv, est))[1, ]
v1=JK-1
v2=n-JK
pval=1-pf(test,v1,v2)
list(test.stat=test,p.value=pval,est=estall)
}
power.chisq.test<-function(w = NULL, N = NULL, df = NULL,
 sig.level = 0.05, power = NULL){
library(pwr)
res=pwr.chisq.test(w=w,N=N,df=df,sig.level=sig.level, power = power)
res
}

bi2KMS<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),
x=NA,y=NA,alpha=.05){
#
# Test the hypothesis that two independent binomials have equal
# probability of success
#
# r1=number of successes in group 1
# n1=number of observations in group 1
#
# Use Kulinskaya et al. method American Statistician, 2010, 64, 350-
#
N=n1+n2
u=.5
Dhat=(r1+.5)/(n1+1)-(r2+.5)/(n2+1)
psihat=((r1+.5)/(n1+1)+(r2+.5)/(n2+1))/2
nuhat=(1-2*psihat)*(.5-n2/N)
what=sqrt(2*u*psihat*(1-psihat)+nuhat^2)
se=qnorm(1-alpha/2)*sqrt(u/(2*n1*n2/N))
val1=max(c(-1,(u*Dhat+nuhat)/what-se))
ci=what*sin(asin(val1))/u-nuhat/u
val2=min(c(1,(u*Dhat+nuhat)/what+se))
ci[2]=what*sin(asin(val2))/u-nuhat/u
list(ci=ci,p1=r1/n1,p2=r2/n2)
}


binband<-function(x,y,KMS=FALSE,alpha=.05,ADJ.P=FALSE){
#
#  Comparing two independent variables in terms of their probability function.
#  For each value that occurs, say x, test P(X=x)=P(Y=x)
#  So this method is useful when dealing with highly discrete data.
#
#  If KMS=T, use Kulinskaya, Morgenthaler and Staudte (2010) method for comparing binomials
#  Otherwise use Storer and Kim.
#
#   ADJ.P=T means that critical p-value is adjusted to control FWE when the sample
#   size is small (<50).
#
#
#  Hochberg's method is used to determine critical p-values so that FWE=alpha
#
x=elimna(x)
y=elimna(y)
vals=sort(unique(c(x,y)))
ncon=length(vals)
n1=length(x)
n2=length(y)
p.values=NA
adj=1
cv=1
if(!KMS){
output=matrix(NA,ncol=6,nrow=length(vals))
dimnames(output)=list(NULL,c("Value","p1.est","p2.est","p1-p2","p.value","p.crit"))
}
if(KMS){
output=matrix(NA,ncol=8,nrow=length(vals))
dimnames(output)=list(NULL,c("Value","p1.est","p2.est","p1-p2","ci.low","ci.up","p.value",
"p.crit"))
}
for(i in 1:length(vals)){
x1=sum(x==vals[i])
y1=sum(y==vals[i])
if(!KMS){
output[i,5]=twobinom(x1,n1,y1,n2)$p.value
output[i,2]=x1/n1
output[i,3]=y1/n2
output[i,1]=vals[i]
output[i,4]=output[i,2]-output[i,3]
}
if(KMS){
temp=bi2KMSv2(x1,n1,y1,n2)
output[i,1]=vals[i]
output[i,5]=temp$ci[1]
output[i,6]=temp$ci[2]
output[i,2]=x1/n1
output[i,3]=y1/n2
output[i,4]=output[i,2]-output[i,3]
output[i,7]=temp$p.value
}}
# Determine adjusted  critical p-value using Hochberg method
ncon=length(vals)
dvec=alpha/c(1:ncon)
if(ADJ.P){
mn=max(c(n1,n2))
cv=1
if(ncon!=2){
if(mn>50){
cv=2-(mn-50)/50
if(cv<1)cv=1
}
if(mn<=50)cv=2
}
if(KMS){
flag=(output[,7]<=2*alpha)
output[flag,8]=output[flag,8]/cv
}
if(!KMS){
cv=1
flag=(output[,5]<=2*alpha)
if(min(c(n1,n2))<20 && n1!=n2 && ncon>=5)cv=2
output[flag,5]=output[flag,5]/cv
}}
if(KMS){
temp2=order(0-output[,7])
output[temp2,8]=dvec
}
if(!KMS){
temp2=order(0-output[,5])
output[temp2,6]=dvec
}
output
}



tworegwb<-function(x1,y1,x2,y2,nboot=599,RAD=FALSE,alpha=.05,SEED=TRUE,xout=FALSE,
outfun=out){
#
# Simple regression (one predictor)
# Test H_0: two independent groups have equal slopes.
#
xy=elimna(cbind(x1,y1))
if(ncol(xy)>2)stop("This function only allows one covariate")
if(xout){
m<-cbind(x1,y1)
flag<-outfun(x1,plotit=FALSE)$keep
m<-m[flag,]
x1<-m[,1]
y1<-m[,2]
m<-cbind(x2,y2)
flag<-outfun(x2,plotit=FALSE)$keep
m<-m[flag,]
x2<-m[,1]
y2<-m[,2]
}
x=c(x1,x2)
y=c(y1,y2)
g=c(rep(0,length(x1)),rep(1,length(x2)))
xgy=elimna(cbind(x,g,x*g,y))
xg=xgy[,1:3]
y=xgy[,4]
res=olswbtest(xg,y,nboot=nboot,SEED=SEED,RAD=RAD,alpha=alpha)
res[3,6]
}

regpreCV<-function(x,y,regfun=tsreg,varfun=pbvar,adz=TRUE,model=NULL,locfun=mean,
xout=FALSE,outfun=out,
plotit=TRUE,xlab="Model Number",ylab="Prediction Error",...){
#
# Estimate the prediction error using the regression method
#   regfun in conjunction with leave-one-out cross-validation
#
#   The argument model should have list mode, model[[1]] indicates
#   which predictors are used in the first model. For example, storing
#   1,4 in model[[1]] means predictors 1 and 4 are being considered.
#   If model is not specified, and number of predictors is at most 5,
#   then all models are considered.
#
#   If adz=T, added to the models to be considered is where
#   all regression slopes are zero. That is, use measure of location only
#   corresponding to
#   locfun.
#
x<-as.matrix(x)
d<-ncol(x)
p1<-d+1
temp<-elimna(cbind(x,y))
x<-temp[,1:d]
y<-temp[,d+1]
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(is.null(model)){
if(d<=5)model<-modgen(d,adz=adz)
if(d>5)model[[1]]<-c(1:ncol(x))
}
mout<-matrix(NA,length(model),3,dimnames=list(NULL,c("est.error",
"var.used","rank")))
for (imod in 1:length(model)){
nmod=length(model[[imod]])-1
temp=c(nmod:0)
mout[imod,2]=sum(model[[imod]]*10^temp)
#
if(sum(model[[imod]]==0)!=1){
xx<-x[,model[[imod]]]
xx<-as.matrix(xx)
mout[imod,1]<-regpecv(xx,y,regfun=regfun,varfun=varfun,...)
}
#
if(sum(model[[imod]]==0)==1){
mout[imod,1]<-locCV(y,varfun=varfun,locfun=locfun)
}}
mout[,3]=rank(mout[,1])
if(plotit)plot(c(1:nrow(mout)),mout[,1],xlab=xlab,ylab=ylab)
mout
}

locCV=function(y,varfun=pbvar,locfun=median){
vals=NA
n=length(y)
est=locfun(y)
for(i in 1:n)vals[i]=y[i]-locfun(y[-i])
res=varfun(vals)
res
}


esI<-function(x,tr=.2,nboot=100,SEED=TRUE){
#
# Explanatory measure of effect size for an interaction in
# a 2-by-2 ANOVA
#
#  Assume x is a mtrix with 4 columns or has list mode with length 4
#  Also assume interaction is for x_1-x_2 versus x_3-x_4
#
if(is.matrix(x)|| is.data.frame(x))x=listm(x)
es=yuenv2(outer(x[[1]],x[[2]],"-"),outer(x[[3]],x[[4]],"-"),
tr=tr,nboot=nboot,SEED=SEED)$Effect.Size
list(Effect.Size=es)
}


esImcp<-function(J,K,x,tr=0.2,nboot=100,SEED=TRUE){
#
#  Compute measure of effect size for all interactions in a J-by-K design
#  A robust, heteroscedastic measure of effect (explanatory measure of
#  effect size) is used.
#
if(is.matrix(x)|| is.data.frame(x))x=listm(x)
con=con2way(J,K)$conAB
es=NULL
for (j in 1:ncol(con)){
flag=(con[,j]!=0)
es[j]=esI(x[flag],tr=tr,nboot=nboot,SEED=SEED)$Effect.Size
}
list(Effect.Sizes=es,contrast.coef=con)
}


ESmainMCP<-function(J,K,x,tr=0.2,nboot=100,SEED=TRUE){
#
#  Compute explanatory measure of effect size for all main effects
#  in a two-way design. That is, for Factor A, compute it for all levels j < j'
#  For Factor B, compute it for all level k<k'
#
if(is.matrix(x))x=listm(x)
x=lapply(x,elimna)
con=con2way(J,K)
conA=con$conA
FA=matrix(NA,nrow=ncol(conA),ncol=3)
ic=0
for(jj in 1:J){
for(jjj in 1:J){
if(jj < jjj){
ic=ic+1
FA[ic,1]=jj
FA[ic,2]=jjj
}}}
for(j in 1:ncol(conA)){
flag1=(conA[,j]==1)
flagm1=(conA[,j]==-1)
x1=as.vector(matl(x[flag1]))
x2=as.vector(matl(x[flagm1]))
FA[j,3]=yuenv2(x1,x2,tr=tr,nboot=nboot,SEED=SEED)$Effect.Size
}
dimnames(FA)<-list(NULL,c("Level","Level","Effect.Size"))
conB=con$conB
FB=matrix(NA,nrow=ncol(conA),ncol=3)
ic=0
for(jj in 1:K){
for(jjj in 1:K){
if(jj < jjj){
ic=ic+1
FB[ic,1]=jj
FB[ic,2]=jjj
}}}
for(j in 1:ncol(conB)){
for(jj in 1:J){
for(jjj in 1:J){
if(jj < jjj){
}}}
flag1=(conB[,j]==1)
flagm1=(conB[,j]==-1)
x1=as.vector(matl(x[flag1]))
x2=as.vector(matl(x[flagm1]))
FB[j,3]=yuenv2(x1,x2,tr=tr,nboot=nboot,SEED=SEED)$Effect.Size
}
dimnames(FB)<-list(NULL,c("Level","Level","Effect.Size"))
list(Factor.A=FA,Factor.B=FB)
}

bi2CR<-function(k1,n1,k2,n2,alpha=.05,fin=100,xlab="p1",ylab="p2"){
#
# Have two independent binomials with probability of success p1 and p2
# plot 1-alpha confidence region for p1 and p2 using
# Sterne 2-dimensional CS method

# parameters:
#   k1,n1,k2,n2: the 2 samples (k1 out of n1, k2 out of n2)
#   alpha: 1-nominal level
#   fin: the sample space is scanned with step size 1/fin
#   plt: controls plotting of the confidence set

# value: a data frame consisting of columns x,y,indik, where
#   x and y are coordinates of points in the
#  parameter space (with resolution 1/fin)
#   indik=1 if the parameter pair (x,y) pair belongs to the CS, =0 if not
# This function is based on a slight modification of code
# written by Jeno Reiczigel, Budapest, Hungary
# E-mail: reiczigel.jeno@aotk.szie.hu
# Homepage: www.univet.hu/users/jreiczig

# versions:
# version 18.04.2008: first version
# version 31.10.2010: plotting is optional
nom=1-alpha
x=rep(0:fin/fin,(fin+1))
y=rep(0:fin/fin,rep((fin+1),(fin+1)))
indik=rep(0,(fin+1)^2)
#
# CS calculation
#
for (i1 in 0:fin){
 p1=i1/fin
 pr1=dbinom(0:n1,n1,p1)
 for (i2 in 0:fin){
  p2=i2/fin
  pr2=dbinom(0:n2,n2,p2)
  jpr=rep(pr1,n2+1)*rep(pr2,rep(n1+1,n2+1))
  obs=dbinom(k1,n1,p1)*dbinom(k2,n2,p2)
  sumpr=sum(jpr[jpr>obs])
  if (sumpr<nom) {k=(i1+1)+(fin+1)*i2 ; indik[k]=indik[k]+1}
 }
}
# CS plotting
  plot(x[indik==1],y[indik==1],xlim=c(0,1),ylim=c(0,1),col="green",pch=15,
xlab=xlab,ylab=ylab)
  points(k1/n1,k2/n2,pch=19)
}

SPCA<-function(x, k = 0, kmax = ncol(x), delta = 0.001,
    na.action = na.fail, scale = FALSE, signflip = TRUE, trace=FALSE, ...){
library(rrcov)
res=PcaLocantore(x,k=k,kmax=kmax,delta=delta,na.action = na.action,
scale=scale,signflip=signflip,trace=trace, ...)
res
}

larsR<-function(x,y,type="lasso",xout=FALSE,outfun=out){
library(lars)
p=ncol(x)
p1=p+1
xy=elimna(cbind(x,y))
if(xout){
x<-xy[,1:p]
y<-xy[,p1]
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
xy=cbind(x,y)
}
result=lars(xy[,1:p],xy[,p1],type=type)
result
}

regci<-function(x,y,regfun=tsreg,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,
xout=FALSE,outfun=outpro,plotit=FALSE,xlab="Predictor 1",ylab="Predictor 2",...){
#
#   Compute a .95 confidence interval for each of the parameters of
#   a linear regression equation. The default regression method is
#   the Theil-Sen estimator.
#
#   When using the least squares estimator, and when n<250, use
#   lsfitci instead.
#
#   The predictor values are assumed to be in the n by p matrix x.
#   The default number of bootstrap samples is nboot=599
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimated of
#   the first predictor, etc.
#
#   plotit=TRUE: If there are two predictors, plot 1-alpha confidence region based
#  on the bootstrap samples.
#
x<-as.matrix(x)
p1<-ncol(x)+1
p<-ncol(x)
xy<-cbind(x,y)
xy<-elimna(xy)
x<-xy[,1:p]
y<-xy[,p1]
nrem=length(y)
estit=regfun(x,y,xout=xout,...)$coef
if(xout){
if(pr)print("Default for outfun is now outpro, not out")
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
flagF=identical(regfun,tsreg)
if(flagF){if(pr){
if(sum(duplicated(y)>0))print("Duplicate values detected; tshdreg might have more power than tsreg")
}}
nv=length(y)
x<-as.matrix(x)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(pr)print("Taking bootstrap samples. Please wait.")
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,regboot,x,y,regfun,xout=FALSE,...)
#Leverage points already removed.
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
regci<-matrix(0,p1,5)
vlabs="Intercept"
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(regci)<-list(vlabs,c("ci.low","ci.up","Estimate","S.E.","p-value"))
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
se<-NA
pvec<-NA
for(i in 1:p1){
bsort<-sort(bvec[i,])
pvec[i]<-(sum(bvec[i,]<0)+.5*sum(bvec[i,]==0))/nboot
if(pvec[i]>.5)pvec[i]<-1-pvec[i]
regci[i,1]<-bsort[ilow]
regci[i,2]<-bsort[ihi]
se[i]<-sqrt(var(bvec[i,]))
}
if(p1==3){
if(plotit){
plot(bvec[2,],bvec[3,],xlab=xlab,ylab=ylab)
}}
regci[,3]=estit
pvec<-2*pvec
regci[,4]=se
regci[,5]=pvec
list(regci=regci,n=nrem,n.keep=nv)
}
M2m.loc<-function(m,grpc,col.dat,locfun=tmean,...){
#
# m is a matrix or data frame.
# Compute a measure of location for each of several categories, with
# categories indicated by the values in the column of m given by the
# argument grpc.
# The argument grpc can have up to 4 values, which correspond to factors.
#
#  col.dat indicates the column of m containing the outcome measure
# of interest.
# locfun indicates the measure of location, which defaults to the 20%
# trimmed mean.
#
#  Example,
# M2m.loc(x,c(1,4),5,locfun=mean)
# indicates that there are 2 factors, with levels of the factors indicated
# by the values in columns 1 and 4 of the matrix x. For each combination
# of levels,
# locfun=mean
# indicates that the sample mean will be computed.
#
flagit=F
if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame")
if(is.na(grpc[1]))stop("The argument grpc is not specified")
if(is.na(col.dat[1]))stop("The argument col.dat is not specified")
if(length(grpc)>4)stop("grpc must have length <= 4")
m=as.data.frame(m)
if(length(grpc)==1){
p1=ncol(m)+1
dum=rep(1,nrow(m))
flagit=T
m=cbind(m,dum)
grpc=c(NULL,gprc,p1)
cat1<-sort(unique(m[,grpc[1]]))
M=NULL
for (ig1 in 1:length(cat1)){
flag1=(m[,grpc[1]]==cat1[ig1])
flag=(flag1==1)
msub=as.data.frame(m[flag,])
loc=locfun(m[flag,col.dat],...)
M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc)))
}
M=M[,c(1,3)]
}
if(length(grpc)==2){
cat1<-sort(unique(m[,grpc[1]]))
cat2<-sort(unique(m[,grpc[2]]))
M=NULL
for (ig1 in 1:length(cat1)){
for (ig2 in 1:length(cat2)){
flag1=(m[,grpc[1]]==cat1[ig1])
flag2=(m[,grpc[2]]==cat2[ig2])
flag=(flag1*flag2==1)
msub=m[flag,]
loc=locfun(m[flag,col.dat],...)
M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc)))
}}}
if(length(grpc)==3){
cat1<-sort(unique(m[,grpc[1]]))
cat2<-sort(unique(m[,grpc[2]]))
cat3<-sort(unique(m[,grpc[3]]))
M=NULL
for (ig1 in 1:length(cat1)){
for (ig2 in 1:length(cat2)){
for (ig3 in 1:length(cat3)){
flag1=(m[,grpc[1]]==cat1[ig1])
flag2=(m[,grpc[2]]==cat2[ig2])
flag3=(m[,grpc[3]]==cat3[ig3])
flag=(flag1*flag2*flag3==1)
msub=m[flag,]
loc=locfun(m[flag,col.dat],...)
M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc)))
}}}}
if(length(grpc)==4){
cat1<-sort(unique(m[,grpc[1]]))
cat2<-sort(unique(m[,grpc[2]]))
cat3<-sort(unique(m[,grpc[3]]))
cat4<-sort(unique(m[,grpc[4]]))
M=NULL
for (ig1 in 1:length(cat1)){
for (ig2 in 1:length(cat2)){
for (ig3 in 1:length(cat3)){
for (ig4 in 1:length(cat4)){
flag1=(m[,grpc[1]]==cat1[ig1])
flag2=(m[,grpc[2]]==cat2[ig2])
flag3=(m[,grpc[3]]==cat3[ig3])
flag4=(m[,grpc[4]]==cat4[ig4])
flag=(flag1*flag2*flag3*flag4==1)
msub=m[flag,]
loc=locfun(m[flag,col.dat],...)
M=rbind(M,as.data.frame(cbind(msub[1,grpc],loc)))
}}}}}
if(flagit)M=M[,c(1,3)]
M
}
skip<-function(m,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,STAND=FALSE,pr=TRUE){
#
# m is an n by p matrix
#
# Compute skipped location and covariance matrix
#
# op=1:
# Eliminate outliers using a projection method
# That is, first determine center of data using:
#
# cop=1 Donoho-Gasko median,
# cop=2 MCD,
# cop=3 marginal medians.
#  cop=4 uses MVE center
#  cop=5 uses TBS
#  cop=6 uses rmba (Olive's median ball algorithm)
#
# For each point
# consider the line between it and the center,
# project all points onto this line, and
# check for outliers using
#
# MM=F, a boxplot rule.
# MM=T, rule based on MAD and median
#
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
#
# op=2 use mgv (function outmgv) method to eliminate outliers
#
# Eliminate any outliers and compute means
#  using remaining data.
# mgv.op=0, mgv uses all pairwise distances to determine center of the data
# mgv.op=1 uses MVE
# mgv.op=2 uses MCD
#
temp<-NA
m<-elimna(m)
if(op==2)temp<-outmgv(m,plotit=FALSE,op=mgv.op)$keep
if(op==1)temp<-outpro(m,plotit=FALSE,MM=MM,cop=outpro.cop,STAND=STAND,pr=pr)$keep
val<-var(m[temp,])
loc<-apply(m[temp,],2,mean)
list(center=loc,cov=val)
}

ancmppb<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,pts=NA,est=tmean,nboot=NA,
bhop=FALSE,SEED=TRUE,cov.fun=skip,cop=NULL,pr=TRUE,...){
#
# Compare two independent  groups using the ancova method
# with multiple covariates.
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
# Design points are chosen based on depth of points in x1 if pts=NA
#  Assume data are in x1 y1 x2 and y2
#
#  cov.fun determines the location and
#  scatter matrix used to find closest points to
#  a design point. It is used by ancdes.
#
#  Choices for cov.fun include
#  cov.mve
#  cov.mcd
#  rmba
#  skip
#  tbs
#
#if(pr)print("For the old version of this function, use ancmpbpb")
x1=as.matrix(x1)
y1=as.matrix(y1)
if(ncol(x1)==1)stop("Use a function designed for one covariate only")
x2=as.matrix(x2)
y2=as.matrix(y2)
if(ncol(x1)!=ncol(x2))
stop("Number of covariates must be the same for each group")
xy=elimna(cbind(x1,y1))
p=ncol(x1)
p1=p+1
x1=xy[,1:p]
y1=xy[,p1]
xy=elimna(cbind(x2,y2))
x2=xy[,1:p]
y2=xy[,p1]
x1=as.matrix(x1)
x2=as.matrix(x2)
if(is.na(pts[1])){
x1<-as.matrix(x1)
mval1=cov.fun(x1)
mval2=cov.fun(x2)
if(!is.null(cop))pts<-ancdes(x1,cop=cop)
if(is.null(cop))pts=ancdes(x1,center=mval1$center)
}
pts<-as.matrix(pts)
if(nrow(pts)>=29){
print("WARNING: More than 28 design points")
print("Only first 28 are used.")
pts<-pts[1:28,]
}
n1<-1
n2<-1
vecn<-1
for(i in 1:nrow(pts)){
n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)])
n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)])
}
flag<-rep(T,nrow(pts))
for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F
pts<-pts[flag,]
if(sum(flag)==1)pts<-t(as.matrix(pts))
if(sum(flag)==0)stop("No comparable design points found, might increase span.")
mat<-matrix(NA,nrow(pts),7)
dimnames(mat)<-list(NULL,c("n1","n2","DIF","TEST","se","ci.low","ci.hi"))
g1<-list()
ip<-nrow(pts)
ncom<-0
nc2<-ip
con<-matrix(0,nrow=2*ip,ncol=nrow(pts))
for (i in 1:nrow(pts)){
ip<-ip+1
ncom<-ncom+1
nc2<-nc2+1
con[ncom,i]<-1
con[nc2,i]<-0-1
temp<-y1[near3d(x1,pts[i,],fr1,mval1)]
g1[[i]]<-temp[!is.na(temp)]
temp<-y2[near3d(x2,pts[i,],fr2,mval2)]
g1[[ip]]<-temp[!is.na(temp)]
}
mat<-pbmcp(g1,alpha=alpha,nboot=nboot,est=est,con=con,bhop=bhop,SEED=SEED,...)
list(points=pts,output=mat)
}


hc4wmc<-function(x,y,nboot=599,k=2,grp=NA,con=0,SEED=TRUE,STOP=TRUE,...){
#
#   Test the hypothesis that J independent groups have identical slopes.
#   Using least squares regression
#   Data are stored in list mode or in a matrix.  In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, the columns of the matrix correspond
#   to groups.
#
#   Similarly, y[[1]] contains the data for the first group,
#   y[[2]] the data for the second groups, etc.
#
#   The argument grp can be used to analyze a subset of the groups
#   Example: grp=c(1,3,5) would compare groups 1, 3 and 5.
#
#   Missing values are allowed.
#
if(STOP)stop('Suggest ols1way. This function assumes equal n. To use anyway, set STOP=FALSE')
con<-as.matrix(con)
if(is.matrix(x))x<-listm(x)
if(!is.list(x))stop("Data must be stored in list mode or in matrix mode.")
if(is.matrix(y))y<-listm(y)
if(!is.list(y))stop("Data must be stored in list mode or in matrix mode.")
if(!is.na(sum(grp))){  # Only analyze specified groups.
xx<-list()
yy<-list()
for(i in 1:length(grp))
xx[[i]]<-x[[grp[i]]]
yy[[i]]<-y[[grp[i]]]
x<-xx
y<-yy
}
J<-length(x)
n<-length(x[[1]])
tempn<-0
slopes<-NA
covar<-NA
stemp<-NA
yhat<-numeric(J)
res<-matrix(,ncol=J, nrow=n)
for(j in 1:J){
temp<-cbind(x[[j]], y[[j]])
temp<-elimna(temp) # Remove missing values.
#n<-length(y[[j]])
tempn[j]<-length(temp)
x[[j]]<-temp[,1]
y[[j]]<-temp[,2]
tempx<-as.matrix(x[[j]])
tempy<-as.matrix(y[[j]])
#Getting yhat and residuals for wild bootstrap
yhat[j]<-mean(tempy)
res[,j]<-tempy-yhat[j]
#original Slope and SE
stemp<-lsfit(tempx, tempy)
slopes[j]<-stemp$coef[k] #Slopes for original data
covar[j]<-lsfitNci4(tempx, tempy)$cov[k,k] #original HC4 for coefficient(slope)
}
#
Jm<-J-1
#
# Determine contrast matrix
#
if(sum(con^2)==0){
ncon<-(J^2-J)/2
con<-matrix(0,J,ncon)
id<-0
for (j in 1:Jm){
jp<-j+1
for (h in jp:J){
id<-id+1
con[j,id]<-1
con[h,id]<-0-1
}}}
ncon<-ncol(con)
if(nrow(con)!=J){
stop("Something is wrong with con; the number of rows does not match the number of groups.")
}
#calculating original statistic
dif.slopes<-t(con)%*%slopes
o.se<-t(con^2)%*%covar
o.stat<-dif.slopes/sqrt(o.se) #original test statistics
#
om<-max(abs(o.stat)) #Max. absolute test statistics
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#
data<-matrix(ifelse(rbinom(n*nboot*J,1,0.5)==1,-1,1),ncol=nboot*J) #discrete wild bootstrap sample
test<-numeric(nboot)
u<-rep(1, n)
c<-1
for (i in 1:nboot*J-J+1){
d<-data[,i:i+J-1]
ystar<-u%*%t(yhat)+res*d
ystar<-listm(ystar)
i<-i+J
test[c]<-mcslope(x,ystar, con, k)
#
c<-c+1
}
sum<-sum(test>= om)
p.val<-sum/nboot
list(p.value=p.val)
}
mcslope<-function(X, Y, con, k){
J=length(X)
slopes<-numeric(J)
covar<-numeric(J)
for(j in 1:J){
tempx<-as.matrix(X[[j]])
tempy<-as.matrix(Y[[j]])
slopes[j]<-lsfit(tempx, tempy)$coef[k] #Slopes for original data
covar[j]<-lsfitNci4(tempx, tempy)$cov[k,k] #original HC4 for coefficient(slope)
}
dif.slopes<-t(con)%*%slopes
o.se<-t(con^2)%*%covar
o.stat<-dif.slopes/sqrt(o.se) #original test statistics
om<-max(abs(o.stat))
om
}


ZYmediate<-function(x,y,nboot=2000,alpha=.05,kappa=.05,SEED=TRUE,xout=FALSE,outfun=out){
#
# Robust mediation analysis using M-estimator as
# described in Zu and Yuan, 2010, MBR, 45, 1--44.
#
# x[,1] is predictor
# x[,2] is mediator variable
#  y is outcome variable.
ep=0.00000001  # convergence criteria
B=nboot         # the number of bootstrap replications
kappa    # the percent of cases to be controlled when robust method is used
               # Zu and Yuan used .05, so this is the default value used here.
level=alpha    # alpha level
if(SEED)set.seed(2)
Z=elimna(cbind(x,y))
if(xout){
flag<-outfun(Z[,1],plotit=FALSE,SEED=SEED)$keep
Z<-Z[flag,]
}
p=3
n=nrow(Z)
HT=HuberTun(kappa,p)
r=HT$r
tau=HT$tau
H=robEst(Z,r,tau,ep)
R.v=H$u2*tau
oH=order(R.v)
oCaseH=(1:n)[oH]        # case number with its Ri increases
oR.v=R.v[oH]

thetaH=H$theta
aH=thetaH[1]
bH=thetaH[2]
abH=aH*bH

muH=H$mu
SigmaH=H$Sigma
dH=H$d


### Use robust method
# point estimate
thetaH=H$theta
aH=thetaH[1]
bH=thetaH[2]
abH=aH*bH

muH=H$mu
SigmaH=H$Sigma
dH=H$d

#Standard errors
RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau)

Zr=RH$Zr
SEHI=RH$inf
SEHS=RH$sand

#Standard errors
RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau)

Zr=RH$Zr
SEHI=RH$inf
SEHS=RH$sand

#Standard errors
RH=SErob(Z,muH,SigmaH,thetaH,dH,r,tau)

Zr=RH$Zr
SEHI=RH$inf
SEHS=RH$sand
ParEstH<-round(cbind(thetaH,SEHI[1:6],SEHS[1:6]),3)
rnames<-c("a","b","c","vx","vem","vey")
ParEstH<-cbind(rnames,ParEstH)
res=t(ParEstH)
#
Res=BCI(Z,Zr,ab=3,abH,B,level)
list(CI.ab=Res$CI,p.value=Res$pv,a.est=aH,b.est=bH,ab.est=abH)
}


#------------------------------------------------------------
# Tunning parameter when use Huber type weight
#------------------------------------------------------------
# Input:
	#kappa: the proportion of cases to be controlled
	#p: the number of variables
# Output
	# r: the critical value of Mahalalanobis distance, as defined in (20)
	# tau: the constant to make the robust estimator of Sigma to be unbiased, as defined in (20)
	
HuberTun=function(kappa,p){
	prob=1-kappa
	chip=qchisq(prob,p)
	r=sqrt(chip)
	tau=(p*pchisq(chip,p+2)+ chip*(1-prob))/p	
	Results=list(r=r,tau=tau)
	return(Results)	
}

robEst=function(Z,r,tau,ep){

      p=ncol(Z)
      n=nrow(Z)
      # Starting values 	
      mu0=MeanCov(Z)$zbar
      Sigma0=MeanCov(Z)$S
      Sigin=solve(Sigma0)

      diverg=0 # convergence flag

      for (k in 1:200) {   	
		sumu1=0
		mu=matrix(0,p,1)
		Sigma=matrix(0,p,p)
		d=rep(NA,n)
		u1=rep(NA,n)
		u2=rep(NA,n)

   		for (i in 1:n) {			zi=Z[i,]
  			zi0=zi-mu0
  			di2=t(zi0)%*%Sigin%*%zi0
  			di=as.numeric(sqrt(di2))
  			d[i]=di
  		
    		#get u1i,u2i
			if (di<=r) {
     			   u1i=1.0
     			   u2i=1.0/tau
			}else {
     			   u1i=r/di
     			   u2i=u1i^2/tau
 		   }
    			u1[i]=u1i
    			u2[i]=u2i
 	
  			sumu1=sumu1+u1i
  			mu=mu+u1i*zi
  			Sigma=Sigma+u2i*zi0%*%t(zi0)
  		
   		} # end of loop i

  		mu1=mu/sumu1
  		Sigma1=Sigma/n
 		Sigdif=Sigma1-Sigma0
  		dt=sum(Sigdif^2)

  		mu0=mu1
  		Sigma0=Sigma1
  		Sigin=solve(Sigma0)
  if (dt<ep) {break}

	  } # end of loop k


       if (k==200) {
  			diverg=1
  			mu0=rep(0,p)
   			sigma0=matrix(NA,p,p)

  	  }

       theta=MLEst(Sigma0)

       Results=list(mu=mu0,Sigma=Sigma0,theta=theta,d=d,u1=u1,u2=u2,diverg=diverg)
       return(Results)
}

SErob=function(Z,mu,Sigma,theta,d,r,tau){
	n=nrow(Z)
	p=ncol(Z)
	ps=p*(p+1)/2
	q=6
	Dup=Dp(p)
	DupPlus=solve(t(Dup)%*%Dup)%*%t(Dup)

   	InvSigma=solve(Sigma)
	sigma=vech(Sigma)
   	W=0.5*t(Dup)%*%(InvSigma%x%InvSigma)%*%Dup
   	
   	Zr=matrix(NA,n,p) # robustly transformed data
   	A=matrix(0,p+q,p+q)
   	B=matrix(0,p+q,p+q)
 	sumg=rep(0,p+q)

   	for (i in 1:n) {
    	zi=Z[i,]
    	zi0=zi-mu
    	di=d[i]

     	if (di<=r) {
     			u1i=1.0
     			u2i=1.0/tau
     			du1i=0
     			du2i=0
		}else {
     			u1i=r/di
     			u2i=u1i^2/tau
     			du1i=-r/di^2
     			du2i=-2*r^2/tau/di^3
 		}

      	#robust transformed data
    	Zr[i,]=sqrt(u2i)*t(zi0)
    			   	
    	####	gi

    	g1i=u1i*zi0	# defined in (24)
      	vTi=vech(zi0%*%t(zi0))
    	g2i=u2i*vTi-sigma	# defined in (25)
	gi=rbind(g1i,g2i)
    	sumg=gi+sumg

		B=B+gi%*%t(gi)
    	
    	####	gdoti

    	#	derivatives of di with respect to mu and sigma
    	ddmu=-1/di*t(zi0)%*%InvSigma
    	ddsigma=-t(vTi)%*%W/di

    	#	
    	dg1imu=-u1i*diag(p)+du1i*zi0%*%ddmu
    	dg1isigma=du1i*zi0%*%ddsigma
    	dg2imu=-u2i*DupPlus%*%(zi0%x%diag(p)+diag(p)%x%zi0)+du2i*vTi%*%ddmu
    	dg2isigma=du2i*vTi%*%ddsigma-diag(q)
    	
    	dgi=rbind(cbind(dg1imu,dg1isigma),cbind(dg2imu,dg2isigma))
    	A=A+dgi
   } # end of loop n

	A=-1*A/n
	B=B/n
	invA=solve(A)
	OmegaSW=invA%*%B%*%t(invA)
   	OmegaSW=OmegaSW[(p+1):(p+q),(p+1):(p+q)]
	
		
   SEsw=getSE(theta,OmegaSW,n)
	SEinf=SEML(Zr,theta)$inf
	
	Results=list(inf=SEinf,sand=SEsw,Zr=Zr)  	
   return(Results)
   	
}

MeanCov=function(Z){
	n=nrow(Z)
	p=ncol(Z)

	zbar=t(Z)%*%matrix(1/n,n,1)
	S=t(Z)%*%(diag(n)-matrix(1/n,n,n))%*%Z/n
	
   	Results=list(zbar=zbar,S=S)
   	return(Results)
}

#-----------------------------------------------------------------------
# Obtaining normal-theory MLE of parameters in the mediation model
#-----------------------------------------------------------------------
# Input:
	# S: sample covariance
# Output:
   # thetaMLE: normal-theory MLE of theta. theta is defined in the subsection: MLEs of a,b, and c

MLEst=function(S){
	ahat=S[1,2]/S[1,1]
	vx=S[1,1]
	# M on X
	Sxx=S[1:2,1:2]
	sxy=S[1:2,3]
	vem=S[2,2]-S[2,1]*S[1,2]/S[1,1]
		
	# Y on X and M
	invSxx=solve(Sxx)
	beta.v=invSxx%*%sxy # chat, bhat
	vey=S[3,3]-t(sxy)%*%invSxx%*%sxy
	thetaMLE=c(ahat,beta.v[2],beta.v[1],vx,vem,vey)
    	return(thetaMLE)
}

Dp=function(p){
    p2=p*p
    ps=p*(p+1)/2
  	Dup=matrix(0,p2,ps)
	count=0
	for (j in 1:p){
    	for (i in j:p){
      		count=count+1
      		if (i==j){
      			Dup[(j-1)*p+j, count]=1
      		}else{
      			Dup[(j-1)*p+i, count]=1
                Dup[(i-1)*p+j, count]=1
      		}
      	}
	}	

	return(Dup)	
}

vech=function(A){
	l=0
	p=nrow(A)
	ps=p*(p+1)/2
	vA=matrix(0,ps,1)
	for (i in 1:p) {
		for (j in i:p) {
             l=l+1
             vA[l,1]=A[j,i]			
	    }	
	}

	return(vA)	
}


   getSE=function(theta,Omega,n){
	
	hdot=gethdot(theta)
	COV=hdot%*%(Omega/n)%*%t(hdot)  # delta method
	se.v=sqrt(diag(COV)) # se.v of theta
	
	a=theta[1]
	b=theta[2]
	SobelSE=sqrt(a^2*COV[2,2]+b^2*COV[1,1])
	
	se.v=c(se.v,SobelSE) # including Sobel SE

   return(se.v)

}
gethdot=function(theta){

p=3
ps=p*(p+1)/2
q=ps
	
a=theta[1]
b=theta[2]
c=theta[3]
#ab=a*b
vx=theta[4]
vem=theta[5]
vey=theta[6]

sigmadot=matrix(NA,ps,q)
sigmadot[1,]=c(0,0,0,1,0,0)
sigmadot[2,]=c(vx,0,0,a,0,0)
sigmadot[3,]=c(b*vx,a*vx,vx,a*b+c,0,0)
sigmadot[4,]=c(2*a*vx,0,0,a^2,1,0)
sigmadot[5,]=c((2*a*b+c)*vx,a^2*vx+vem,a*vx,a^2*b+a*c,b,0)
sigmadot[6,]=c((2*b*c+2*a*b^2)*vx,(2*c*a+2*a^2*b)*vx+2*b*vem,(2*a*b+2*c)*vx,c^2+2*c*a*b+a^2*b^2,b^2,1)

hdot=solve(sigmadot)

return(hdot)
}
SEML=function(Z,thetaMLE){
	n=nrow(Z)
	p=ncol(Z)
	ps=p*(p+1)/2
   	q=ps
	zbar=MeanCov(Z)$zbar
	S=MeanCov(Z)$S
	Dup=Dp(p)
	InvS=solve(S)
	W=0.5*t(Dup)%*%(InvS%x%InvS)%*%Dup
	OmegaInf=solve(W)  # only about sigma, not mu
	
	
	# Sandwich-type Omega
	S12=matrix(0,p,ps)
	S22=matrix(0,ps,ps)
	
	for (i in 1:n){
		zi0=Z[i,]-zbar
		difi=zi0%*%t(zi0)-S
   	   	vdifi=vech(difi)   		
		S12=S12+zi0%*%t(vdifi)	
		S22=S22+vdifi%*%t(vdifi)		
	}
	
	OmegaSW=S22/n # only about sigma, not mu
	
   	SEinf=getSE(thetaMLE,OmegaInf,n)
   	SEsw=getSE(thetaMLE,OmegaSW,n)
	
	Results=list(inf=SEinf,sand=SEsw)
   	return(Results)
   	
}
BCI=function(Z,Zr,ab=NULL,abH,B,level){
	p=ncol(Z)
	n=nrow(Z)
#	abhat.v=rep(NA,B) # save MLEs of a*b in the B bootstrap samples
	abhatH.v=matrix(NA,B)
	Index.m=matrix(NA,n,B)
	
	t1=0
	t2=0	
    for(i in 1:B){
	    U=runif(n,min=1,max=n+1)
       index=floor(U)	
		Index.m[,i]=index
		#H(.05)
		Zrb=Zr[index,]
		SH=MeanCov(Zrb)$S
		thetaH=MLEst(SH)
		abhatH=thetaH[1]*thetaH[2]	
		abhatH.v[i]=abhatH
		if (abhatH<abH){
			t2=t2+1	
		}
		
	 } # end of B loop
	
	abhatH.v=abhatH.v[!is.na(abhatH.v)]
	SEBH=sd(abhatH.v)
	
	# bootstrap confidence intervals using robust method
	CI2 =BpBCa(Zr,abhatH.v,t2,level)
#    Results=list(CI=CI2)
    Results=list(CI=CI2[[1]],pv=CI2[[2]])
 	return(Results)
	
}# end of function


BpBCa=function(Z,abhat.v,t,level){
	# Bootstrap percentile
	oab.v=sort(abhat.v)
	B=length(abhat.v)
	
	ranklowBp=round(B*level/2)
	
	if(ranklowBp==0){
		ranklowBp=1
	}
		
	Bpl=oab.v[ranklowBp]
	Bph=oab.v[round(B*(1-level/2))]	
	BP=c(Bpl,Bph)
pstar=mean(oab.v>0)
pv=2*min(c(pstar,1-pstar))
#	Results=list(BP=BP)
#    return(Results)
list(BP,pv)
}

RobRsq<-function(x,y){
library(robust)
z=lmRob(y~x)
res=robR2w(z)
res
}

robR2w = function (rob.obj, correc=1.2076) {
  ## R2 in robust regression, see
  ## Renaud, O. & Victoria-Feser, M.-P. (2010). A robust coefficient of determination for regression.
  ## Journal of Statistical Planning and Inference, 140, 1852-1862.
  ## rob.obj is an lmRob object. correc is the correction for consistancy. Call:
  ##
  ## library(robust)
  ## creat.lmRob = lmRob(original1 ~ approprie1+approprie2+creativite1+creativite2, data=creatif)
  ## summary(creat.lmRob)
  ## robR2w(creat.lmRob)

  ## Weights in robust regression
  wt.bisquare = function(u, c = 4.685) {
    U <- abs(u/c)
    w <- ((1. + U) * (1. - U))^2.
    w[U > 1.] <- 0.
    w
  }
  weight.rob=function(rob.obj){
    resid.rob=rob.obj$resid
    scale.rob=(rob.obj$scale)*rob.obj$df.residual/length(resid.rob)
    resid.rob= resid.rob/scale.rob
    weight=wt.bisquare(resid.rob)
  }

  if (attr(rob.obj, "class") !="lmRob")
    stop("This function works only on lmRob objects")
  pred = rob.obj$fitted.values
  resid = rob.obj$resid
  resp = resid+pred
  wgt = weight.rob(rob.obj)
  scale.rob = rob.obj$scale
  resp.mean = sum(wgt*resp)/sum(wgt)
  pred.mean = sum(wgt*pred)/sum(wgt)
  yMy = sum(wgt*(resp-resp.mean)^2)
  rMr = sum(wgt*resid^2)
  r2 = (yMy-rMr) / yMy
  r2correc= (yMy-rMr) / (yMy-rMr +rMr*correc)
  r2adjcor = 1-(1-r2correc) * (length(resid)-1) / (length(resid)-length(rob.obj$coefficients)-1)
  return(list(robR2w.NoCorrection=r2, robR2w.WithCorrection=r2correc, robR2w.AdjustedWithCorrection=r2adjcor))
}

bi2KMSv2<-function(r1=sum(x),n1=length(x),r2=sum(y),n2=length(y),
x=NA,y=NA,nullval=0){
#
# Test the hypothesis that two independent binomials have equal
# probability of success using method KMS.
#
#  Unlike the function bi2KMS, a p-value is returned
#
# r1=number of successes in group 1
# n1=number of observations in group 1
#
# Uses Kulinskaya et al. method American Statistician, 2010, 64, 350-
#
#  null value is the hypothesized value for p1-p2
#
alph<-c(1:99)/100
for(i in 1:99){
irem<-i
chkit<-bi2KMS(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=x,alpha=alph[i])
if(chkit$ci[1]>nullval || chkit$ci[2]<nullval)break
}
p.value<-irem/100
if(p.value<=.1){
iup<-(irem+1)/100
alph<-seq(.001,iup,.001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-bi2KMS(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=x,alpha=alph[i])
if(chkit$ci[1]>nullval || chkit$ci[2]<nullval)break
}}
est=bi2KMS(r1=r1,n1=n1,r2=r2,n2=n2,x=x,y=y)
#list(est.p1=est$p1,est.p2=est$p2,p.value=p.value)
list(ci=est$ci,est.p1=est$p1,est.p2=est$p2,p.value=p.value)
}

disc2com<-function(x,y,alpha=.05,nboot=500,SEED=TRUE){
#
#  Comparing two independent variables in terms of their probability function.
#  A global test of P(X=x)=P(Y=x) for all x.
#  (The R function binband tests this hypothesis for each x.)
#  So this method is useful when dealing with discrete data having a small sample space.
#
library(mc2d)
x=elimna(x)
y=elimna(y)
if(SEED)set.seed(2)
x=elimna(x)
y=elimna(y)
vals=sort(unique(c(x,y)))
n1=length(x)
n2=length(y)
K=length(vals)
C1=NULL
C2=NULL
HT=NULL
for(i in 1:K){
C1[i]=sum(x==vals[i])
C2[i]=sum(y==vals[i])
HT[i]=(C1[i]+C2[i])/(n1+n2)
}
p1hat=C1/n1
p2hat=C2/n2
test=sum((p1hat-p2hat)^2)
#test=max(abs(p1hat-p2hat))
tv=NULL
TB=NA
VP=NA
for(ib in 1:nboot){
xx=rmultinomial(n1,1,HT)
yy=rmultinomial(n2,1,HT)
B1=NA
B2=NA
BP=NA
for(i in 1:K){
B1[i]=sum(xx[,i])
B2[i]=sum(yy[,i])
}
B1hat=B1/n1
B2hat=B2/n2
TB[ib]=sum((B1hat-B2hat)^2)
}
pv=1-mean(test>TB)-.5*mean(test==TB)
list(test=test,p.value=pv)
}

wmwloc<-function(x,y,na.rm=TRUE,est=median,...){
#
# Estimate the median of the distribution of x-y
#
if(na.rm){
x<-x[!is.na(x)]
y<-y[!is.na(y)]
}
m<-outer(x,y,FUN="-")
est=est(m,na.rm=TRUE,...)
est
}


DEPanc<-function(x1,y1,y2,fr1=1,tr=.2,alpha=.05,plotit=TRUE,DISDIF=FALSE,DIF=TRUE,
pts=NULL,sm=FALSE,xout=FALSE,outfun=out,nboot=500){
#
# Compare two dependent  groups using a covariate
#
# x1 is the covariate and
# y1 and y2 are the two measures. For instance time 1 and time 2.
#
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  fr1 is span for running interval smoother
#
#  sm=T will create smooths using bootstrap bagging.
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
#  If DISDIF=T: 1. compare groups using median of distribution of D=Y1-Y2
#               2. if na.rm=T, case wise deletion is used, otherwise all of the data are used.
#
#   Also see the R function DEPancB, which includes alternative methods for handling missing values
#
m=cbind(x1,y1,y2)
flag=is.na(x1)
m=m[!flag,]
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
y2<-y2[xorder]
vecn<-1
for(i in 1:length(x1))vecn[i]<-length(y1[near(x1,x1[i],fr1)])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
}
if(!is.null(pts[1]))isub=c(1:length(pts))
#print(isub)
mat<-matrix(NA,length(isub),8)
dimnames(mat)<-list(NULL,c("X","n","DIF","TEST","se","ci.low","ci.hi",
"p.value"))
for (i in 1:length(isub)){
if(is.null(pts)){
ch=near(x1,x1[isub[i]],fr1)
mat[i,1]=x1[isub[i]]
}
if(!is.null(pts)){
ch=near(x1,pts[i],fr1)
mat[i,1]=pts[i]
}
mat[i,2]=sum(ch)
if(!DISDIF){
if(!DIF){
test<-yuend(m[ch,2],m[ch,3],tr=tr)
mat[i,3]=mean(m[ch,2],tr=tr)-mean(m[ch,3],tr=tr)
mat[i,4]<-test$teststat
mat[i,5]<-test$se
mat[i,6]<-test$ci[1]
mat[i,7]<-test$ci[2]
mat[i,8]<-test$siglevel
}
if(DIF){
test=trimci(m[ch,2]-m[ch,3],tr=tr,pr=FALSE)
mat[i,3]=mean(m[ch,2]-m[ch,3],tr=tr)
mat[i,4]<-test$test.stat
mat[i,5]<-test$se
mat[i,6]<-test$ci[1]
mat[i,7]<-test$ci[2]
mat[i,8]<-test$p.value
}}
if(DISDIF){
test=l2drmci(m[ch,2:3],pr=FALSE,nboot=nboot,na.rm=na.rm)
mat[i,3]<-loc2dif(m[ch,2],m[ch,3],na.rm=na.rm)
mat[i,4]<-NA
mat[i,5]<-NA
mat[i,6]<-test$ci[1]
mat[i,7]<-test$ci[2]
mat[i,8]<-test$p.value
}}
if(plotit)
runmean2g(x1,y1,x1,y2,fr=fr1,est=mean,tr=tr,sm=sm,xout=xout,outfun=outfun)
list(output=mat)
}


DEPancB<-function(x1,y1,y2,fr1=1,est=tmean,alpha=.05,plotit=TRUE,DISDIF=FALSE,DIF=TRUE,TLS=FALSE,SEED=TRUE,
pts=NULL,sm=FALSE,xout=FALSE,outfun=out,nboot=500,pr=FALSE,na.rm=TRUE,xlab="Group 1", ylab="Group 2",...){
#
# Compare two dependent  groups using a covariate
#
#  same as DEPanc, only use bootstrap methods in all cases.
#
# x1 is the covariate and
# y1 and y2 are the two measures. For instance time 1 and time 2.
#
#   case wise deletion of missing values used by default.
#   To use all of the data not missing, set DIF=F and na.rm=F
#   For the special case where the goal is to compare means, also set TLS=T
#   (But this can produce an error if too many missing values)
#
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  TLS=F, use percentile bootstrap when DIF=FALSE; otherwise (TLS=T) use Lin-Stivers method for means
#  fr1 is span for running interval smoother
#
#  sm=T will create smooths using bootstrap bagging.
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
m=cbind(x1,y1,y2)
flag=is.na(x1)
if(na.rm)m=elimna(m)
if(!na.rm){
m=m[!flag,]
}
x1=m[,1]
y1=m[,2]
y2=m[,3]
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
y2<-y2[xorder]
vecn<-1
for(i in 1:length(x1))vecn[i]<-length(y1[near(x1,x1[i],fr1)])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
}
if(!is.null(pts[1]))isub=c(1:length(pts))
mat<-matrix(NA,length(isub),6)
dimnames(mat)<-list(NULL,c("X","n","DIF","ci.low","ci.hi",
"p.value"))
for (i in 1:length(isub)){
if(is.null(pts)){
ch=near(x1,x1[isub[i]],fr1)
mat[i,1]=x1[isub[i]]
}
if(!is.null(pts)){
ch=near(x1,pts[i],fr1)
mat[i,1]=pts[i]
}
mat[i,2]=sum(ch)
if(!DISDIF){
if(!DIF){
if(!TLS){
test=rmmismcp(m[ch,2],m[ch,3],alpha=alpha,SEED=SEED,est=est,plotit = FALSE,
    grp = grp, nboot = 500, xlab = xlab, ylab = ylab, pr = pr, ...)
mat[i,3]=est(m[ch,2],na.rm=TRUE)-est(m[ch,3],na.rm=TRUE)
mat[i,4]<-test$output[1,6]
mat[i,5]<-test$output[1,7]
mat[i,6]<-test$output[1,4]
}
if(TLS){
test=rm2miss(m[ch,2],m[ch,3], nboot = nboot, alpha = alpha, SEED = SEED)
mat[i,3]=mean(m[ch,2],na.rm=TRUE)-mean(m[ch,3],na.rm=TRUE)
mat[i,4]<-test$ci[1]
mat[i,5]<-test$ci[2]
mat[i,6]<-test$p.value
}}
if(DIF){
test=onesampb(m[ch,2]-m[ch,3],est=est,nboot=nboot,alpha=alpha,SEED=SEED,...)
mat[i,3]=est(m[ch,2]-m[ch,3],na.rm=TRUE,...)
mat[i,4]<-test$ci[1]
mat[i,5]<-test$ci[2]
mat[i,6]<-test$p.value
}}
if(DISDIF){
test=l2drmci(m[ch,2:3],pr=FALSE,nboot=nboot,na.rm=na.rm)
mat[i,3]<-loc2dif(m[ch,2],m[ch,3],na.rm=na.rm)
mat[i,4]<-test$ci[1]
mat[i,5]<-test$ci[2]
mat[i,6]<-test$p.value
}}
if(plotit)
runmean2g(x1,y1,x1,y2,fr=fr1,est=est,sm=sm,xout=xout,outfun=outfun)
list(output=mat)
}

lplotPV<-function(x,y, span = 0.75, xout = FALSE,pr=TRUE,
    outfun = out,nboot=1000,SEED=TRUE,plotit=TRUE,pyhat = FALSE, expand = 0.5, low.span = 2/3,
    varfun = pbvar, cor.op = FALSE, cor.fun = pbcor, scale = FALSE,
    xlab = "X", ylab = "Y", zlab = "", theta = 50, phi = 25,
    family = "gaussian", duplicate = "error", pc = "*", ticktype = "simple",...){
#
# Compute a p-value based on the Strength of Association estimated via lplot
# If significant, conclude there is dependence.
#
if(SEED)set.seed(2)
x=as.matrix(x)
if(ncol(x)==2 && !scale){
if(pr){
print("scale=F is specified.")
print("If there is dependence, might use scale=T")
}}
vals=NA
nv=ncol(x)
m=elimna(cbind(x,y))
x<-m[,1:nv]
y<-m[,nv+1]
if(xout){
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:nv]
y<-m[,nv+1]
}
x=as.matrix(x)
est=lplot(x,y,span=span,plotit=plotit,pr=FALSE, pyhat = pyhat,
    outfun = outfun, expand = expand, low.span = low.span,
    varfun = varfun, cor.op =cor.op, cor.fun = cor.fun, scale = scale,
    xlab = xlab, ylab = ylab, zlab =zlab, theta =theta, phi = phi,
    family = family, duplicate = duplicate, pc = pc, ticktype = ticktype,...)
n=nrow(x)
data1<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(i in 1:nboot){
vals[i]=lplot(x[data1[i,],],y[data2[i,]],plotit=FALSE,pr=FALSE)$Strength.Assoc
}
p=mean(est$Strength<vals)
list(p.value=p,Strength.Assoc=est$Strength.Assoc,Explanatory.power=est$Explanatory.power,yhat.values=est$yhat.values)
}

M1M2<-function(m1,m2,test=yuen,alpha=.05,...){
#
# Goal: compare data in col 1 of m1 to col 1 data in m2,
# do this again for col 2, etc.
# control FWE via Hochberg  method
# alpha is the desired family wise error rate
# The argument test is assumed to be a function that returns a p-value stored in $p.value
#
vals=0
if(is.list(m1))m1=matl(m1)
if(is.list(m2))m2=matl(m2)
m1=as.matrix(m1)
m2=as.matrix(m2)
ntest=ncol(m1)
outp=matrix(0,ncol=2,nrow=ntest)
dimnames(outp)=list(NULL,c("p.value","crit.p.value"))
if(ncol(m1)!=ncol(m2))stop("m1 and m2 do not have the same number of columns")
for(i in 1:ncol(m1))vals[i]=test(m1[,i],m2[,i],...)$p.value
outp[,1]=vals
dvec=alpha/(c(1:ntest))
temp2<-order(0-vals)
zvec<-dvec
outp[temp2,2]=zvec
flag=(outp[,1]<=outp[,2])
dd=sum(outp[,1]<=outp[,2])
chk=c(1:ntest)
list(results=outp,number.sig=dd,sig.tests=chk[flag])
}

dbetabin<-function(x,n,r,s){
#
# probability function for the beta-binomial distribution
#
v=lgamma(n+1)+lgamma(r+x)+lgamma(n+s-x)+lgamma(r+s)-lgamma(x+1)-lgamma(n-x+1)-lgamma(r+s+n)-lgamma(r)-lgamma(s)
v=exp(v)
v
}

regci.inter<-function(x,y, regfun = tsreg, nboot = 599, alpha = 0.05, SEED = TRUE,
    pr = TRUE, xout = FALSE, outfun = out, ...){
#
# A function for dealing with the usual regression interaction
# model where the product of  two independent variables is used.
#
if(ncol(x)!=2)stop("This function assumes there are two predictors")
xx=cbind(x,x[,1]*x[,2])
output=regci(xx,y,regfun = regfun, nboot = nboot, alpha =alpha, SEED = SEED, pr =pr,
xout = xout, outfun = outfun,...)
output
}

olshc4.inter<-function(x,y, alpha = 0.05, xout = FALSE, outfun = out, ...){
#
# A function for dealing with the usual regression interaction
# model where the product of  two independent variables is used.
#
if(ncol(x)!=2)stop("This function assumes there are two predictors")
xx=cbind(x,x[,1]*x[,2])
output=olshc4(xx,y, alpha =alpha, xout = xout, outfun = outfun,...)
output
}

ancovaG<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=FALSE,pts=NULL,sm=FALSE,
pr=TRUE,xout=FALSE,outfun=out,test=medpb2,...){
#
# This function generalizes the R function ancova so that any hypothesis testing method
# can be used to compare groups at specified design points.
#
# Compare two independent  groups using the ancova method coupled with method
# indicated by the argument test.
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  Assume data are in x1 y1 x2 and y2
#
#  sm=T will create smooths using bootstrap bagging.
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
xy=elimna(cbind(x1,y1))
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
output=list()
if(is.null(pts[1])){
mat<-matrix(NA,5,3)
dimnames(mat)<-list(NULL,c("X","n1","n2"))
npt<-5
isub<-c(1:5)  # Initialize isub
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
for (i in 1:5){
mat[i,1]=x1[isub[i]]
g1<-y1[near(x1,x1[isub[i]],fr1)]
g2<-y2[near(x2,x1[isub[i]],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
mat[i,2]=length(g1)
mat[i,3]=length(g2)
output[[i]]<-test(g1,g2,...)
}}
if(!is.null(pts[1])){
mat<-matrix(NA,length(pts),3)
dimnames(mat)<-list(NULL,c("X","n1","n2"))
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
for (i in 1:length(pts)){
mat[i,1]=pts[i]
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
mat[i,2]=length(g1)
mat[i,3]=length(g2)
output[[i]]<-test(g1,g2,...)
}}
if(plotit)
runmean2g(x1,y1,x2,y2,fr=fr1,est=mean,tr=tr,sm=sm,xout=xout,outfun=outfun,...)
list(mat,output)
}

ancovampG<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,pts=NULL,SEED=TRUE,test=medpb2,DH=FALSE,FRAC=.5,...){
#
#  This function generalizes the R function ancovamp so that any hypothesis testing method
#  can be used to compare groups at specified design points.
#
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
# Design points are chosen based on depth of points in x1 if pts=NULL
#  Assume data are in x1 y1 x2 and y2
#
#  test: argument test determines the method that will be used to compare groups.
#
#  pts can be a matrix of design points for which groups are compared
#
#  DH=T, groups compared at the deepest (1-FRAC) design points.
#
if(SEED)set.seed(2) # now cov.mve always returns same result
x1=as.matrix(x1)
p=ncol(x1)
p1=p+1
m1=elimna(cbind(x1,y1))
x1=m1[,1:p]
y1=m1[,p1]
x2=as.matrix(x2)
p=ncol(x2)
p1=p+1
m2=elimna(cbind(x2,y2))
x2=m2[,1:p]
y2=m2[,p1]
#
#
#
if(is.null(pts[1])){
x1<-as.matrix(x1)
pts<-ancdes(x1,DH=DH,FRAC=FRAC)
}
pts<-as.matrix(pts)
n1<-1
n2<-1
vecn<-1
mval1<-cov.mve(x1)
mval2<-cov.mve(x2)
for(i in 1:nrow(pts)){
n1[i]<-length(y1[near3d(x1,pts[i,],fr1,mval1)])
n2[i]<-length(y2[near3d(x2,pts[i,],fr2,mval2)])
}
flag<-rep(T,nrow(pts))
for(i in 1:nrow(pts))if(n1[i]<10 || n2[i]<10)flag[i]<-F
pts<-pts[flag,]
if(sum(flag)==1)pts<-t(as.matrix(pts))
if(sum(flag)==0)stop("No comparable design points found, might increase span.")
mat<-matrix(NA,nrow(pts),3)
dimnames(mat)<-list(NULL,c("n1","n2","p.value"))
output=list()
for (i in 1:nrow(pts)){
g1<-y1[near3d(x1,pts[i,],fr1,mval1)]
g2<-y2[near3d(x2,pts[i,],fr2,mval2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
temp=test(g1,g2,...)
if(is.null(temp$p.value))print("Apparently argument test is a function that does not return a p-value")
mat[i,3]=temp$p.value
output[[i]]=temp
mat[i,1]<-length(g1)
mat[i,2]<-length(g2)
if(length(g1)<=5)print(paste("Warning, there are",length(g1)," points corresponding to the design point X=",pts[i,]))
if(length(g2)<=5)print(paste("Warning, there are",length(g2)," points corresponding to the design point X=",pts[i,]))
}
list(points=pts,results=mat)
}

mat2list<-function(m,grp.dat){
#
#  For data in a matrix m, divide the data into groups based
#  on the values in column indicated
#  by the argument grp.dat
#  and store the data in list mode.
#
# This function is like fac2list, only it handles matrices
#
# Example: z=mat2list(m[,2:5],m[,9])
# will divide the rows of data in columns 2-5 into groups based
# on the group id data in column 9
# This is done via the function mat2grp
#
# z[[1]] will contain the data in m[,2:5] that is associated with first group
# z[[2]] will contain the data in m[,2:5] that is associated with second group, etc.
#
# If any entry in grp.dat is NA, this row is eliminated from m
#
if(!is.null(dim(m)))m=as.matrix(m)
if(!is.matrix(m))stop("Data must be stored in a matrix or data frame")
p=ncol(m)
p1=p+1
M=cbind(m,grp.dat)
print(dim(M))
x<-mat2grp(M[,1:p1],p1)
for(i in 1:length(x))x[[i]]=x[[i]][,1:p]
x
}

regpecv<-function(x,y,regfun=tsreg,varfun=pbvar,...){
#
# Estimate prediction error via leave-one-out cross-validation
#
# regfun defaults to Theil-Sen estimator
# function returns measure of prediction error: robust measure of variation
# applied to the n differences y_i-y_{-i}, i=1,...,n
# where y_{-1} is estimate of y when ith vector of observations is omitted.
#
xy=elimna(cbind(x,y))
x=as.matrix(x)
px=ncol(x)
px1=px+1
n=nrow(xy)
vals=NA
for(i in 1:n){
est=regfun(xy[-i,1:px],xy[-i,px1])$coef
vals[i]=xy[i,px1]-(est[1]+sum(est[2:px1]*xy[i,1:px]))
}
pe=varfun(vals)
pe
}


idmatch<-function(m1,m2,id.col1,id.col2=id.col1){
#
#  for the id data in column id.col of matrices m1 and m2
#  pull out data for which both m1 and m2 have matching id's
#  return the data in a matrix, m
#
flag=!is.na(m1[,id.col1])
m1=m1[flag,]  # eliminate any rows where ID is missing
flag=!is.na(m2[,id.col1])
m2=m2[flag,]
M1=NULL
if(sum(duplicated(m1))>0)stop('Duplicate ids detected in m1')
if(sum(duplicated(m2))>0)stop('Duplicate ids detected in m2')
for(i in 1:nrow(m1)){
flag=duplicated(c(m1[i,id.col1],m2[,id.col2]))
if(sum(flag>0)){
if(is.data.frame(m1)){
if(!is.null(dim(M1)))M1=rbind(M1,as.data.frame(m1[i,]))
if(is.null(dim(M1)))M1=as.data.frame(m1[i,])
}
if(!is.data.frame(m1)){
if(!is.null(dim(M1)))M1=rbind(M1,m1[i,])
if(is.null(dim(M1)))M1=matrix(m1[i,],nrow=1)
}
}}
M2=NULL
for(i in 1:nrow(m2)){
flag=duplicated(c(m2[i,id.col2],m1[,id.col1]))
if(sum(flag>0)){
if(is.data.frame(m2)){
if(!is.null(dim(M2)))M2=rbind(M2,as.data.frame(m2[i,]))
if(is.null(dim(M2)))M2=as.data.frame(m2[i,])
}
if(!is.data.frame(m2)){
if(!is.null(dim(M2)))M2=rbind(M2,m2[i,])
if(is.null(dim(M2)))M2=matrix(m2[i,],nrow=1)
}
}}
m=cbind(M1[,id.col1],M1[,-id.col1],M2[,-id.col2])
m
}


rplotCV<-function(x,y,fr=NA,varfun=pbvar,est=tmean,xout=FALSE,outfun=out,eout=FALSE,corfun=pbvar,...){
#
# Estimate prediction error based on
# a running interval smoother in conjunction with
# a leave-one-out cross validation method
#
#  varfun is the measure of variation used on the predicted Y values.
#  est is the measure of location used by the running interval smoother.
#  The estimate is returned in VAR.Y.HAT
#
m=elimna(cbind(x,y))
if(eout){
flag<-outfun(m,plotit=FALSE)$keep
m=m[flag,]
}
x=as.matrix(x)
p=ncol(x)
p1=p+1
x=as.matrix(m[,1:p])
y=m[,p1]
vals=NA
if(is.na(fr)){
if(p==1)fr=.8
if(p>1)fr=1
}
if(xout){
keepit<-outfun(x,plotit=FALSE,...)$keep
x<-x[keepit,]
y<-y[keepit]
}
x=as.matrix(x)
for(i in 1:nrow(x)){
if(p==1)vals[i]=runhat(x[-i,],y[-i],fr=fr,est=est,pts=x[i,],...)
if(p>1)vals[i]=rung3hat(x[-i,],y[-i],fr=fr,pts=t(as.matrix(x[i,])))$rmd
}
dif=y-vals
ans=varfun(elimna(dif))
list(VAR.Y.HAT=ans)
}

SMpre<-function(x,y,est=tmean,fr=NA,varfun=pbvar,model=NULL,adz=TRUE,
xout=FALSE,outfun=out,...){
#
#  Estimate prediction error for all of the models specified by the
#  the argument model, which has list mode.
# Leave-one-out cross-validation is used in conjunction with a running interval smoother
#
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
}
x=as.matrix(x)
p=ncol(x)
if(p>5)stop("Can have at most 5 predictors")
if(is.null(model))model=modgen(p)
mout<-matrix(NA,length(model),3,dimnames=list(NULL,c("error",
"var.used","rank")))
for(imod in 1:length(model)){
nmod=length(model[[imod]])-1
temp=c(nmod:0)
mout[imod,2]=sum(model[[imod]]*10^temp)
mout[imod,1]=rplotCV(x[,model[[imod]]],y,fr=fr,est=est,varfun=varfun)$VAR.Y.HAT
}
if(adz){
va=0
 for(i in 1:30)va[i]=y[i]-tmean(y[-i])
no=pbvar(va)
mout=rbind(mout,c(no,0,NA))
}
mout[,3]=rank(mout[,1])
list(estimates=mout)
}

mch2num<-function(x){
# convert character, stored in matrix, to numeric data.
m=matrix(NA,nrow=nrow(x),ncol=ncol(x))
for(j in 1:ncol(x))m[,j]=as.numeric(x[,j])
m
}

ddepv2<-function(x,est=onestep,alpha=.05,grp=NA,nboot=500,plotit=TRUE,SEED=TRUE,pr=TRUE,...){
#
#   Do ANOVA on dependent groups
#   using the partially centered method plus
#   depth of zero among  bootstrap values.
#
#   An improved version of ddep that better handles heteroscedasticity
#   (A weighted grand mean is used in this version.)
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=2000
#
if(pr)print("Warning: Might not be level robust if the number of groups is relatively large and n is small")
if(pr)print("To avoid this problem, use bd1way or rmmismcp")
if(pr)print("Currently seems that rmmismcp is preferable")
if(is.list(x)){
nv<-NA
for(j in 1:length(x))nv[j]<-length(x[[j]])
if(var(nv) !=0){
stop("The groups are stored in list mode and appear to have different sample sizes")
}
temp<-matrix(NA,ncol=length(x),nrow=nv[1])
for(j in 1:length(x))temp[,j]<-x[[j]]
x<-temp
}
J<-ncol(x)
if(!is.na(grp[1])){ #Select the groups of interest
J<-length(grp)
for(j in 1:J)temp[,j]<-x[,grp[j]]
x<-temp
}
x<-elimna(x) # Remove any rows with missing values.
bvec<-matrix(0,ncol=J,nrow=nboot)
hval<-vector("numeric",J)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
print("Taking bootstrap samples. Please wait.")
n<-nrow(x)
totv<-apply(x,2,est,na.rm=TRUE,...)
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot)bvec[ib,]<-apply(x[data[ib,],],2,est,na.rm=TRUE,...) #nboot by J matrix
#gv<-rep(mean(totv),J) #Grand mean
bplus<-nboot+1
#m1<-rbind(bvec,gv)
center<-totv
cmat<-var(bvec)
wt=1/diag(cmat)
ut=sum(wt)
gv<-rep(sum(wt*totv)/ut,J) #Grand mean
m1<-rbind(bvec,gv)
discen<-mahalanobis(m1,totv,cmat)
#print("Bootstrap complete; computing significance level")
if(plotit && ncol(x)==2){
plot(bvec,xlab="Group 1",ylab="Group 2")
temp.dis<-order(discen[1:nboot])
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
sig.level<-sum(discen[bplus]<=discen)/bplus
list(p.value=sig.level,center=totv,grand.mean=gv)
}

ddeptr<-function(x,na.rm=TRUE,alpha=.05,grp=NA,nboot=500,plotit=TRUE,SEED=TRUE,op=FALSE,tr=.2,...){
#
#   Do ANOVA on dependent groups
#   using the partially centered method plus
#   depth of zero among  bootstrap values.
#
#  The method is like the method used by the R function ddep,
#  but a weighted estimate of the grand mean is used.
#  This helps deal the heteroscedasticity among the marginal distributions.
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#  trimmed means are compared
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=500
#
#   na.rm=T, all rows of data with missing values are removed.
#   na.rm=F will use all of the data assuming missing values occur at random
#
if(is.list(x)){
nv<-NA
for(j in 1:length(x))nv[j]<-length(x[[j]])
if(var(nv) !=0){
stop("The groups are stored in list mode and appear to have different sample sizes")
}
temp<-matrix(NA,ncol=length(x),nrow=nv[1])
for(j in 1:length(x))temp[,j]<-x[[j]]
x<-temp
}
J<-ncol(x)
if(!is.na(grp[1])){ #Select the groups of interest
J<-length(grp)
for(j in 1:J)temp[,j]<-x[,grp[j]]
x<-temp
}
if(na.rm)x<-elimna(x) # Remove any rows with missing values.
bvec<-matrix(0,ncol=J,nrow=nboot)
hval<-vector("numeric",J)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
if(op)print("Taking bootstrap samples. Please wait.")
n<-nrow(x)
wt=apply(x,2,trimse,...)
wt=1/wt^2
ut=sum(wt)
totv<-apply(x,2,tmean,na.rm=TRUE,...)
gv<-rep(sum(wt*totv)/ut,J) #Weighted grand mean
#gv<-rep(mean(totv),J)
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot)bvec[ib,]<-apply(x[data[ib,],],2,tmean,na.rm=TRUE,...) #nboot by J matrix
#gv<-rep(mean(totv),J) #Grand mean
bplus<-nboot+1
m1<-rbind(bvec,gv)
center<-totv
cmat<-var(bvec)
discen<-mahalanobis(m1,totv,cmat)
if(op)print("Bootstrap complete; computing significance level")
if(plotit && ncol(x)==2){
plot(bvec,xlab="Group 1",ylab="Group 2")
temp.dis<-order(discen[1:nboot])
ic<-round((1-alpha)*nboot)
xx<-bvec[temp.dis[1:ic],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
abline(0,1)
}
sig.level<-sum(discen[bplus]<=discen)/bplus
list(p.value=sig.level,center=totv,weighted.grand.mean=gv[1])
}


qcomhd<-function(x,y,q=c(.1,.25,.5,.75,.9),nboot=2000,plotit=TRUE,SEED=TRUE,xlab="Group 1",ylab="Est.1-Est.2",alpha=.05){
#
# Compare quantiles using pb2gen
# via hd estimator. Tied values are allowed.
# When comparing lower or upper quartiles, both power and the probability of Type I error
# compare well to other methods that have been derived.
# q: can be used to specify the quantiles to be compared
# q defaults to comparing the .1,.25,.5,.75, and .9 quantiles
#   Function returns p-values and critical p-values based on Hochberg's method.
#
if(SEED)set.seed(2)
pv=NULL
output=matrix(NA,nrow=length(q),ncol=10)
dimnames(output)<-list(NULL,c("q","n1","n2","est.1","est.2","est.1_minus_est.2","ci.low","ci.up","p_crit","p-value"))
for(i in 1:length(q)){
output[i,1]=q[i]
output[i,2]=length(elimna(x))
output[i,3]=length(elimna(y))
output[i,4]=hd(x,q=q[i])
output[i,5]=hd(y,q=q[i])
output[i,6]=output[i,4]-output[i,5]
temp=pb2gen(x,y,nboot=nboot,est=hd,q=q[i],SEED=FALSE,alpha=alpha,pr=FALSE)
output[i,7]=temp$ci[1]
output[i,8]=temp$ci[2]
output[i,10]=temp$p.value
}
temp=order(output[,10],decreasing=TRUE)
zvec=alpha/c(1:length(q))
output[temp,9]=zvec
#print(output)
output <- data.frame(output)
output$signif=rep("YES",nrow(output))
for(i in 1:nrow(output)){
if(output[temp[i],10]>output[temp[i],9])output$signif[temp[i]]="NO"
if(output[temp[i],10]<=output[temp[i],9])break
}
if(plotit){
xax=rep(output[,4],3)
yax=c(output[,6],output[,7],output[,8])
plot(xax,yax,xlab=xlab,ylab=ylab,type="n")
points(output[,4],output[,6],pch="*")
lines(output[,4],output[,6])
points(output[,4],output[,7],pch="+")
points(output[,4],output[,8],pch="+")
}
output
}

qhdplotsm<-function(x,y,q=.5,xlab="X",ylab="Y",pc=".",
xout=FALSE,outfun=out,nboot=40,fr=1,...){
#
# Plots smooths of quantile regression lines for one or more quantiles
# using rplotsm with Harrell--Davis estimator
#
# q indicates the quantiles to be used.
#
#  EXAMPLE:
#  qhdplotsm(x,y,q=c(.2,.5,.8)) will plot three smooths corresponding to
#  the .2, .5 and .8 quantile regression lines.
#
xy=elimna(cbind(x,y))
x=as.matrix(x)
if(ncol(x)!=1)stop("Only one predictor is allowed")
x=xy[,1]
y=xy[,2]
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag]
y<-y[flag]
}
plot(x,y,xlab=xlab,ylab=ylab,pch=pc)
xord=order(x)
print(q)
for(j in 1:length(q)){
yhat=rplotsm(x,vy,fr=fr,pyhat=TRUE,est=hd,q=q[j],plotit=FALSE,nboot=nboot)$yhat
lines(x[xord],yhat[xord])
}
print("Done")
}

outmah<-function(x,qval=pnorm(3),plotit=TRUE,xlab="VAR 1",ylab="VAR 2"){
#
#  detect outliers using Mahalanobis Distance
#   For demonstration purposes only. Suggest
#   using a method that avoids masking.
#
#  In univariate case, default strategy is to use 3 standard deviation rule
#
x=elimna(x)
x=as.matrix(x)
m=apply(x,2,mean)
v=cov(x)
dis=mahalanobis(x,m,v)
crit<-sqrt(qchisq(qval,ncol(x)))
vec<-c(1:nrow(x))
dis[is.na(dis)]=0
dis<-sqrt(dis)
chk<-ifelse(dis>crit,1,0)
id<-vec[chk==1]
keep<-vec[chk==0]
if(is.matrix(x)){
if(ncol(x)==2 && plotit){
plot(x[,1],x[,2],xlab=xlab,ylab=ylab,type="n")
flag<-rep(T,nrow(x))
flag[id]<-F
points(x[flag,1],x[flag,2])
if(sum(!flag)>0)points(x[!flag,1],x[!flag,2],pch="*")
}}
if(!is.matrix(x))outval<-x[id]
if(is.matrix(x))outval<-x[id,]
list(out.val=outval,out.id=id,keep=keep,dis=dis,crit=crit)
}

difQplot<-function(x,y=NULL,xlab="Quantile",ylab="Effect Size"){
#
#  Plot that provides perspective on the degree a distribution is symmetric about zero.
#  This function plots the sum of q and 1-q quantiles. If the distributions are symmetric
#  the plot should be approximately a horizontal line. If in addition the median
# of the difference scores is zero, the horizontal line will intercept the y-axis at zero.
#
if(is.null(y))dif=x
if(!is.null(y))dif=x-y
x=elimna(x)
qd=NA
for(i in 1:99)qd[i]=hd(dif,.5-i/200)+hd(dif,.5+i/200)
plot(.5-c(1:99)/200,qd,xlab=xlab,ylab=ylab)
}

Dqcomhd<-function(x,y,q=c(1:9)/10,nboot=1000,plotit=TRUE,SEED=TRUE,xlab="Group 1",ylab="Est.1-Est.2",na.rm=TRUE,alpha=.05){
#
# Compare the quantiles of the marginal distributions associated with  two dependent groups
# via hd estimator. Tied values are allowed.
# When comparing lower or upper quartiles, both power and the probability of Type I error
# compare well to other methods have been derived.
#
#  x: data for group 1
#  y: data for group 2
#  q: the quantiles to be compared
#  nboot: Number of bootstrap samples
#
#
if(SEED)set.seed(2)
if(na.rm){
xy=elimna(cbind(x,y))
x=xy[,1]
y=xy[,2]
}
pv=NULL
output=matrix(NA,nrow=length(q),ncol=10)
dimnames(output)<-list(NULL,c("q","n1","n2","est.1","est.2","est.1_minus_est.2","ci.low","ci.up","p_crit","p-value"))
for(i in 1:length(q)){
output[i,1]=q[i]
output[i,2]=length(elimna(x))
output[i,3]=length(elimna(y))
output[i,4]=hd(x,q=q[i])
output[i,5]=hd(y,q=q[i])
output[i,6]=output[i,4]-output[i,5]
if(na.rm){
temp=bootdpci(x,y,est=hd,q=q[i],dif=FALSE,plotit=FALSE,pr=FALSE,nboot=nboot,alpha=alpha,SEED=FALSE)
output[i,7]=temp$output[1,5]
output[i,8]=temp$output[1,6]
output[i,10]=temp$output[1,3]
}
if(!na.rm){
temp=rmmismcp(x,y,est=hd,q=q[i],plotit=FALSE,pr=FALSE,nboot=nboot,alpha=alpha,SEED=FALSE)
output[i,7]=temp$output[1,6]
output[i,8]=temp$output[1,7]
output[i,10]=temp$output[1,4]
}
}
if(plotit){
xax=rep(output[,4],3)
yax=c(output[,6],output[,7],output[,8])
plot(xax,yax,xlab=xlab,ylab=ylab,type="n")
points(output[,4],output[,6],pch="*")
lines(output[,4],output[,6])
points(output[,4],output[,7],pch="+")
points(output[,4],output[,8],pch="+")
}
temp=order(output[,10],decreasing=TRUE)
zvec=alpha/c(1:length(q))
output[temp,9]=zvec
output <- data.frame(output)
output$signif=rep("YES",nrow(output))
for(i in 1:nrow(output)){
if(output[temp[i],10]>output[temp[i],9])output$signif[temp[i]]="NO"
if(output[temp[i],10]<=output[temp[i],9])break
}
output
}


Dqdif<-function(x,y=NULL,q=.25,nboot=1000,plotit=TRUE,xlab="Group 1 - Group 2",SEED=TRUE,alpha=.05){
#
#  Compare two dependent groups by comparing the
#  q and 1-q quantiles of the difference scores
#
# q should be < .5
# if the groups do not differ, then the difference scores should be symmetric
# about zero.
# In particular, the sum of q and 1-q quantiles should be zero.
#
# q indicates the quantiles to be compared. By default, the .25 and .75 quantiles are used.
#
if(SEED)set.seed(2)
if(q>=.5)stop("q should be less than .5")
if(!is.null(y)){
xy=elimna(cbind(x,y))
dif=xy[,1]-xy[,2]
}
if(is.null(y))dif=elimna(x)
n=length(dif)
if(plotit)akerd(dif,xlab=xlab)
bvec=NA
data<-matrix(sample(n,size=n*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib]<-hd(dif[data[ib,]],q=q)+hd(dif[data[ib,]],q=1-q)
}
est1=hd(dif,q=q)
est2=hd(dif,q=1-q)
pv=mean(bvec<0)+.5*mean(bvec==0)
p=2*min(c(pv,1-pv))
low<-round((alpha/2)*nboot)+1
up<-nboot-low
sbvec=sort(bvec)
ci=sbvec[low]
ci[2]=sbvec[up]
list(est.q=est1,est.1.minus.q=est2,conf.interval=ci,p.value=p)
}

qwmwhd<-function(x,y,q=seq(5,40,5)/100,xlab="Quantile",ylab="Sum of q and 1-q Quantiles",plotit=TRUE,alpha=.05,nboot=1000){
#
#  Plot that provides perspective on the degree a distribution is symmetric about zero.
#  This function plots the sum of q and 1-q quantiles of the distribution of D=X-Y, X and Y independent.
#  A 1-alpha confidence interval for the sum is indicated by a +
#  If the distribution is symmetric
#  the plot should be approximately a horizontal line.
#
#  FWE is controlled via Hochberg's method, which was used to determine critical
#  p-values based on the argument
#  alpha.
#
#  Can alter the quantiles compared via the argument
#  q
#  q must be less than .5
#
x=elimna(x)
y=elimna(y)
n1=length(x)
n2=length(y)
output=matrix(NA,ncol=8,nrow=length(q))
dimnames(output)=list(NULL,c("quantile","Est.1","Est.2","SUM","ci.low","ci.up","p_crit","p-value"))
for(i in 1:length(q)){
test=cbmhd(x,y,q=q[i],plotit=FALSE,nboot=nboot)
output[i,1]=q[i]
output[i,2]=test$Est1
output[i,3]=test$Est2
output[i,4]=test$sum
output[i,8]=test$p.value
output[i,5]=test$ci[1]
output[i,6]=test$ci[2]
}
temp=order(output[,8],decreasing=TRUE)
zvec=alpha/c(1:length(q))
output[temp,7]=zvec
output <- data.frame(output)
output$signif=rep("YES",nrow(output))
for(i in 1:nrow(output)){
if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]="NO"
if(output[temp[i],8]<=output[temp[i],7])break
}
if(plotit){
plot(rep(q,3),c(output[,4],output[,5],output[,6]),type="n",xlab=xlab,ylab=ylab)
points(q,output[,6],pch="+")
points(q,output[,5],pch="+")
points(q,output[,4],pch="*")
}
list(n=c(n1,n2),output=output)
}

qwmwhd<-function(x,y,q=seq(5,40,5)/100,xlab="Quantile",ylab="Sum of q and 1-q Quantiles",plotit=TRUE,alpha=.05,nboot=1000){
#
#  Plot that provides perspective on the degree a distribution is symmetric about zero.
#  This function plots the sum of q and 1-q quantiles of the distribution of D=X-Y, X and Y independent.
#  A 1-alpha confidence interval for the sum is indicated by a +
#  If the distribution is symmetric
#  the plot should be approximately a horizontal line.
#
#  FWE is controlled via Hochberg's method, which was used to determine critical
#  p-values based on the argument
#  alpha.
#
#  Can alter the quantiles compared via the argument
#  q
#  q must be less than .5
#
x=elimna(x)
y=elimna(y)
n1=length(x)
n2=length(y)
output=matrix(NA,ncol=8,nrow=length(q))
dimnames(output)=list(NULL,c("quantile","Est.1","Est.2","SUM","ci.low","ci.up","p_crit","p-value"))
for(i in 1:length(q)){
test=cbmhd(x,y,q=q[i],plotit=FALSE,nboot=nboot)
output[i,1]=q[i]
output[i,2]=test$Est1
output[i,3]=test$Est2
output[i,4]=test$sum
output[i,8]=test$p.value
output[i,5]=test$ci[1]
output[i,6]=test$ci[2]
}
temp=order(output[,8],decreasing=TRUE)
zvec=alpha/c(1:length(q))
output[temp,7]=zvec
output <- data.frame(output)
output$signif=rep("YES",nrow(output))
for(i in 1:nrow(output)){
if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]="NO"
if(output[temp[i],8]<=output[temp[i],7])break
}
if(plotit){
plot(rep(q,3),c(output[,4],output[,5],output[,6]),type="n",xlab=xlab,ylab=ylab)
points(q,output[,6],pch="+")
points(q,output[,5],pch="+")
points(q,output[,4],pch="*")
}
list(n=c(n1,n2),output=output)
}


difQpci<-function(x,y=NULL,q=seq(5,40,5)/100,xlab="Quantile",ylab="Group 1 minus Group 2",plotit=TRUE,alpha=.05,nboot=1000,SEED=TRUE,LINE=FALSE){
#
#  x can be a vector, in which case compare quantiels of distribution of data in x
#  x can be a matrix with 2 columns, in which case analysis is done on dif=x[,1]=x[,2]
#  y supplied, then do analysis of dif=x-y
#
#  Plot that provides perspective on the degree a distribution is symmetric about zero.
#  This function plots the sum of q and 1-q quantiles. A 1-alpha confidence interval for the sum is indicated by a +
#  If the distributions are symmetric
#  the plot should be approximately a horizontal line. If in addition the median
#  of the difference scores is zero, the horizontal line will intersect the y-axis at zero.
#
#  Similar to difQplot, only plots fewer quantiles by default and returns p-values for
#  each quantile indicated by the argument q.
#
#  FWE is controlled via Hochberg's method, which was used to determine critical
#  p-values based on the argument
#  alpha.
#
#  Can alter the quantiles compared via the argument
#  q
#  q must be less than .5
#
#  LINE=TRUE. When plotting, a line connecting the estimates will be included.
#
x=as.matrix(x)
if(is.null(y))dif=x
if(ncol(x)>2)stop("Should be at most two groups")
if(ncol(x)==2)dif=x[,1]-x[,2]
if(!is.null(y))dif=x-y
dif=elimna(dif)
nv=length(dif)
output=matrix(NA,ncol=8,nrow=length(q))
dimnames(output)=list(NULL,c("quantile","Est_q","Est_1.minus.q","SUM","ci.low","ci.up","p_crit","p-value"))
for(i in 1:length(q)){
test=Dqdif(dif,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED)
output[i,1]=q[i]
output[i,2]=test$est.q
output[i,3]=test$est.1.minus.q
output[i,8]=test$p.value
output[i,5]=test$conf.interval[1]
output[i,6]=test$conf.interval[2]
}
temp=order(output[,8],decreasing=TRUE)
zvec=alpha/c(1:length(q))
output[temp,7]=zvec
output <- data.frame(output)
output$signif=rep("YES",nrow(output))
for(i in 1:nrow(output)){
if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]="NO"
if(output[temp[i],8]<=output[temp[i],7])break
}
output[,4]=output[,2]+output[,3]
if(plotit){
plot(rep(q,3),c(output[,4],output[,5],output[,6]),type="n",xlab=xlab,ylab=ylab)
points(q,output[,6],pch="+")
points(q,output[,5],pch="+")
points(q,output[,4],pch="*")
if(LINE)lines(q,output[,4],pch="*")
}
list(n=nv,output=output)
}

bsqrm<-function(x,y,alpha=0.05,bend=1.28){
#
#  Computes Bsqrm test statistic. This test statistic is from Ozdemir (2012)
#  "mestse" was used as the standard error of one-step M-estimator and
#  "mad" was used as a measure of scale. Both functions were written by
#  R.Wilcox and can be found from "http://www-rcf.usc.edu/~rwilcox"
#
x<-x[!is.na(x)]  # Remove any missing values in x
y<-y[!is.na(y)]  # Remove any missing values in y
zc<-qnorm(alpha/2)
x2<-(x-median(x))/mad(x)
y2<-(y-median(y))/mad(y)
C<-length(x[abs(x2)>bend])
D<-length(y[abs(y2)>bend])
e<-c(C,D)
alist<-list(x,y)
f<-(sapply(alist,length))-e
s=sapply(alist,mestse)^2
wden=sum(1/s)
w=(1/s)/wden
yplus<-sum(w*(sapply(alist,onestep)))
tt<-((sapply(alist,onestep))-yplus)/sqrt(s)
v<-(f-1)
z<-((4*v^2)+(5*((2*(zc^2))+3)/24))/((4*v^2)+v+(((4*(zc^2))+9)/12))*sqrt(v)*(sqrt(log(1+(tt^2/v))))
teststat<-sum(z^2)
list(teststat=teststat)
}

bsqrmbt<-function(x,y,alpha=.05,bend=1.28,nboot=599,SEED=TRUE){
#
#  Goal: Test hypothesis that two independent groups have
#  equal population M-measures of location.
#  A bootstrap-t method is used.
#   The method used was derived by F. Ozdemir
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
T<-bsqrm(x,y,alpha,bend)$teststat
TT<-0
bsqrmbt<-numeric(2)
xone<-onestep(x,bend=bend)
yone<-onestep(y,bend=bend)
for(j in 1:nboot)
       {
       xx<-(sample(x,length(x),replace=TRUE)-xone)
       yy<-(sample(y,length(y),replace=TRUE)-yone)
       TT[j]<-bsqrm(xx,yy,alpha,bend)$teststat
       }
TT<-sort(TT)
bott<-round(alpha*nboot)+1
bsqrmbt<-TT[nboot-bott]
list(critval=bsqrmbt,teststat=T)
}

qregplots<-function(x, y,qval=.5,q=NULL,op=1,pr=FALSE,xout=FALSE,outfun=out,plotit=FALSE,xlab="X",ylab="Y",...){
#
# Compute the quantile regression line for one or more quantiles and plot the results
# That is, determine the qth (qval) quantile of Y given X using the
#  the Koenker-Bassett approach.
#
#  One predictor only is allowed
#
#  v2=T, uses the function rq in the R library quantreg
#  v2=F, uses an older and slower version
#
#  Example: qregplots(x,y,q=c(.25,.5,.75)) will plot the regression lines for
#   predicting quartiles.
#	
if(!is.null(q))qval=q
x<-as.matrix(x)
if(ncol(x)!=1)stop("Current version allows only one predictor")
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
p<-np-1
x<-X[,1:p]
x<-as.matrix(x)
y<-X[,np]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
est=matrix(NA,ncol=3,nrow=length(qval))
dimnames(est)=list(NULL,c("q","Inter","Slope"))
library(quantreg)
x<-as.matrix(x)
plot(x,y,xlab=xlab,ylab=ylab)
if(ncol(x)!=1)stop("Current version allows only one predictor")
for(j in 1:length(qval)){
coef=coefficients((rq(y~x,tau=qval[j])))
est[j,1]=qval[j]
est[j,2:3]=coef
abline(coef)
}
list(coef = est)
}
acbinomciv2<-function(x=sum(y),nn=length(y),y=NULL,n=NA,alpha=.05,nullval=.5){
#  Compute a p-value when testing the hypothesis that the probability of
#  success for a binomial distribution is equal to
#  nullval, which defaults to .5
#  The Agresti-Coull method is used.
#
#  y is a vector of 1s and 0s.
#  Or can use the argument
#  x = the number of successes observed among
#  n=nn trials.
#
res=acbinomci(x=x,nn=nn,y=y,alpha=alpha)
ci=res$ci
alph<-c(1:99)/100
for(i in 1:99){
irem<-i
chkit<-acbinomci(x=x,nn=nn,y=y,alpha=alph[i])$ci
if(chkit[1]>nullval || chkit[2]<nullval)break
}
p.value<-irem/100
if(p.value<=.1){
iup<-(irem+1)/100
alph<-seq(.001,iup,.001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-acbinomci(x=x,nn=nn,y=y,alpha=alph[i])$ci
if(chkit[1]>nullval || chkit[2]<nullval)break
}}
if(p.value<=.001){
alph<-seq(.0001,.001,.0001)
for(i in 1:length(alph)){
p.value<-alph[i]
chkit<-acbinomci(x=x,nn=nn,y=y,alpha=alph[i])$ci
if(chkit[1]>nullval || chkit[2]<nullval)break
}}
list(n=nn,phat=res$phat,ci=res$ci,p.value=p.value)
}

longreg<-function(x,x.col,y.col,s.id,regfun=tsreg,est=tmean){
#
# x is a data frame or matrix
#
# Longitudinal data.
# For each subject, fit a regression line
# using outcome data in col y.col and predictors, usually times
# when measures were taken, in columns indicated by x.col.
# s.id indicates column where subject's id is stored.
#
# Assuming data are stored as for example in the R variable
# Orthodont,
# which can be accessed via the command  library(nlme)
# For this data set, x.col=2 would indicated that the
# participants age at the time of being measured, is used
# to predict the outcome variable.
#
ymat=long2mat(x,s.id,y.col) # matrix, ith row contains outcome y
#                           for the ith subject.
#
xvals=longcov2mat(x,s.id,x.col)# list mode
n=nrow(ymat)
p=length(x.col)+1
outmat=matrix(NA,nrow=n,ncol=p)
for(i in 1:n)outmat[i,]=regfun(as.matrix(xvals[[i]]),ymat[i,])$coef
typval=apply(outmat,2,est)
list(est.S=outmat,typical.est=typval)
}

psihat<-function(x,est=hd,con,...){
#
#  x has list mode or is a matrix or a data frame
#  compute estimates of linear contrasts
#  indicated by con using estimator est
#  using data in x
#
if(is.matrix(x))x=listm(x)
res=NULL
xbar=lapply(x,est,...)
xbar=as.vector(matl(xbar))
for(d in 1:ncol(con))res[d]=sum(con[,d]*xbar)
res
}

bwmarpb<-function(J,K,x,est=hd,JK=J*K,grp=c(1:JK),nboot=599,
SEED=TRUE,na.rm=TRUE,...){
#
#  Multiple comparisons using
#  a percentile bootstrap for performing a split-plot design
#  using marginal measures of location
#  By default, 20% trimming is used with B=599 bootstrap samples.
#
#  The R variable x is assumed to contain the raw
#  data stored in list mode or in a matrix or a data frame
#  If in list mode, x[[1]] contains the data
#  for the first level of both factors: level 1,1.
#  x[[2]] is assumed to contain the data for level 1 of the
#  first factor and level 2 of the second: level 1,2
#  x[[K]] is the data for level 1,K
#  x[[K+1]] is the data for level 2,1, x[[2K]] is level 2,K, etc.
#
#  If the data are in a matrix, column 1 is assumed to
#  correspond to x[[1]], column 2 to x[[2]], etc.
#
#  When in list mode x is assumed to have length JK, the total number of
#  groups being tested, but a subset of the data can be analyzed
#  using grp
#
if(is.matrix(x))
{
if(ncol(x)!=JK)print("WARNING: number of groups is not equal to JK")
}
if(is.list(x)){
if(length(x)!=JK)print("WARNING: number of groups is not equal to JK")
}
if(is.data.frame(x)){
if(ncol(x)!=JK)print("WARNING: number of groups is not equal to JK")
}
if(SEED)set.seed(2)
if(is.data.frame(x) || is.matrix(x)) {
y <- list()
ik=0
il=c(1:K)-K
for(j in 1:J){
il=il+K
zz=x[,il]
if(na.rm)zz=elimna(zz)
for(k in 1:K){
ik=ik+1
y[[ik]]=zz[,k]
}}
                x <- y
}

data<-list()
for(j in 1:length(x)){
data[[j]]<-x[[grp[j]]] # Now have the groups in proper order.
}
x<-data

#
con=con2way(J,K)
estA=psihat(x,est,con=con$conA,...)
estB=psihat(x,est,con=con$conB,...)
estAB=psihat(x,est,con=con$conAB,...)
set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
# Next determine the n_j values
nvec<-NA
jp<-1-K
for(j in 1:J){
jp<-jp+K
nvec[j]<-length(x[[j]])
}
blist<-list()
testmatA<-matrix(NA,ncol=ncol(con$conA),nrow=nboot)
testmatB<-matrix(NA,ncol=ncol(con$conB),nrow=nboot)
testmatAB<-matrix(NA,ncol=ncol(con$conAB),nrow=nboot)
for(iboot in 1:nboot){
iv<-0
for(j in 1:J){
temp<-sample(nvec[j],replace = T)
for(k in 1:K){
iv<-iv+1
tempx<-x[[iv]]
blist[[iv]]<-tempx[temp]
}}
#
# Now do all linear contrasts on bootstrap samples
testmatA[iboot,]<-psihat(blist,est,con=con$conA,...)
testmatB[iboot,]<-psihat(blist,est,con=con$conB,...)
testmatAB[iboot,]<-psihat(blist,est,con=con$conAB,...)
}
pbA=NA
pbB=NA
pbAB=NA
for(j in 1:ncol(con$conA))pbA[j]=mean(testmatA[,j]<0)+.5*mean(testmatA[,j]==0)
for(j in 1:ncol(con$conB))pbB[j]=mean(testmatB[,j]<0)+.5*mean(testmatB[,j]==0)
for(j in 1:ncol(con$conAB))pbAB[j]=mean(testmatAB[,j]<0)+.5*mean(testmatAB[,j]==0)
for(j in 1:ncol(con$conA))pbA[j]=2*min(c(pbA[j],1-pbA[j]))
for(j in 1:ncol(con$conB))pbB[j]=2*min(c(pbB[j],1-pbB[j]))
for(j in 1:ncol(con$conAB))pbAB[j]=2*min(c(pbAB[j],1-pbAB[j]))
p.valueA=pbA
p.valueB=pbB
p.valueAB=pbAB
pbA=cbind(estA,p.valueA)
pbB=cbind(estB,p.valueB)
pbAB=cbind(estAB,p.valueAB)
list(FacA=pbA,FacB=pbB,p.FacAB=pbAB,conA=con$conA,conB=con$conB,conAB,con$conAB)
}

con1way<-function(J){
#
#   Create contrast coefficients for all pairwise comparisons
Ja=(J^2-J)/2
con<-matrix(0,J,Ja)
id<-0
for (j in 1:J){
for(k in 1:J){
if(j<k){
id<-id+1
con[j,id]<-1
con[k,id]<-0-1
}}}
con
}

logrsm<-function(x,y,fr=1,plotit=TRUE,pyhat=FALSE,xlab="X",ylab="Y",STAND=TRUE,
xout=FALSE,outfun=outpro,...){
#
#  Do a smooth as described by Hosmer and Lemeshow, p. 85
#
#  Assuming there is only one predictor
#
# xout=T will remove outliers from among the x values and then fit
# the regression line.
#  Default: a mad-median rule is used.
#
x<-as.matrix(x)
if(ncol(x)>1)stop("With more than one predictor, use logSM")
xy=elimna(cbind(x,y))
x=xy[,1:ncol(x)]
y=xy[,ncol(xy)]
x<-as.vector(x)
if(xout){
flag<-outfun(x,...)$keep
x<-x[flag]
y<-y[flag]
}
if(STAND)x<-(x-median(x))/mad(x)
m1<-outer(x,x,"-")^2
m2<-exp(-1*m1)*(sqrt(m1)<=fr)
m3<-matrix(y,length(y),length(y))*m2
yhat<-apply(m3,2,sum)/apply(m2,2,sum) #sum over rows for each column
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
xor<-order(x)
lines(x[xor],yhat[xor])
}
output<-"Done"
if(pyhat)output<-yhat
list(output=output)
}

coefalpha<-function(x){
library(psych)
x=elimna(x)
res=alpha(x)
res
}


z.power<-function(n,alpha=.05,del=NULL,var=NULL){
 q=qnorm(1-alpha/2)
 sig=sqrt(var)
 p1=pnorm(0-q-(sqrt(n)*del)/sig)
 p2=1-pnorm(q-(sqrt(n)*del)/sig)
 p=p1+p2
 list(power=p)
 }

hdpb<-function(x,est=hd,alpha=.05,nboot=2000,SEED=TRUE,nv=0,...){
#
#   Compute a bootstrap, .95 confidence interval for the
#   measure of location corresponding to the argument est.
#   By default, the Harrell-Davis estimator is used
#
#   The default number of bootstrap samples is nboot=2000
#
#   The parameter q determines the quantile estimated via the function hd
#    This function is the same as onesampb, only for convenience it defaults
#   to using an estimate of the median.
#
#    nv=null value when  computing a p-value
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#print("Taking bootstrap samples. Please wait.")
x=elimna(x)
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,est,...)
bvec<-sort(bvec)
low<-round((alpha/2)*nboot)
up<-nboot-low
low<-low+1
pv=mean(bvec>nv)+.5*mean(bvec==nv)
pv=2*min(c(pv,1-pv))
estimate=est(x,...)
list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv)
}

vecnorm<-function(x, p=2) sum(x^p)^(1/p)

regYvar<-function(x,y,regfun=tsreg,pts=x,nboot=100,xout=FALSE,outfun=out,SEED=TRUE,...){
#
#  Estimate standard error of predicted value of Y using regression estimator
#  corresponding to the points in
#  pts
#  regfun
#  Theil--Sen is used by default.
#
xy=elimna(cbind(x,y))
x<-as.matrix(x)
p=ncol(x)
p1=p+1
vals=NA
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
nv=length(y)
x<-as.matrix(x)
pts=as.matrix(pts)
nvpts=nrow(pts)
bvec=matrix(NA,nrow=nboot,ncol=nvpts)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec[ib,]=regYsub(x[data[ib,],],y[data[ib,]],pts,p1=p1,regfun=regfun,...)
}
sqsd=apply(bvec,2,var)
sqsd
}

regYsub<-function(x,y,xr,p1,regfun=tsreg,...){
est=regfun(x,y,...)$coef
xr=as.matrix(xr)
yhat=est[1]+xr%*%est[2:p1]
yhat
}

regYci<-function(x,y,regfun=tsreg,pts=x,nboot=100,xout=FALSE,outfun=out,SEED=TRUE,alpha=.05,crit=NULL,null.value=0,plotPV=FALSE,scale=FALSE,span=.75,xlab='X',xlab1='X1',xlab2='X2',ylab='p-values',theta=50,phi=25,...){
#
#  Compute confidence interval for the typical value of Y, given X, based on the Theil--Sen estimator
#
xy=elimna(cbind(x,y))
x<-as.matrix(x)
p=ncol(x)
p1=p+1
vals=NA
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
x=as.matrix(x)
}
if(SEED)set.seed(2)
if(is.null(crit))crit=qnorm(1-alpha/2)
sqsd=regYvar(x,y,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED)
sd=sqrt(sqsd)
est=regYhat(x,y,regfun=regfun,xr=pts,...)
pv=2*(1-pnorm(abs(est-null.value)/sd))
est=cbind(est,est-crit*sd,est+crit*sd,pv)
dimnames(est)=list(NULL,c("Pred. Y","Lower.ci","Upper.ci","p.value"))
if(plotPV){
if(ncol(x)>2)stop('Can plot only with one or two independent variables')
if(ncol(x)==1)lplot(pts,pv,xlab=xlab,ylab=ylab,span=span)
if(ncol(x)==2)lplot(pts,pv,xlab=xlab1,ylab=xlab2,zlab=ylab,span=span,ticktype='detail',scale=scale,theta=theta,phi=phi)
}
est
}

regYband<-function(x,y,regfun=tsreg,pts=x,npts=20,nboot=100,xout=FALSE,outfun=out,SEED=TRUE,alpha=.05,crit=NULL,xlab="X",ylab="Y",SCAT=TRUE,...){
#
# Plot confidence band for the predicted Y value
#
xy=elimna(cbind(x,y))
x<-as.matrix(x)
p=ncol(x)
if(p!=1)stop("This function assumes a single predictor only")
p1=p+1
vals=NA
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
if(SEED)set.seed(2)
pts=seq(min(x),max(x),length.out=20)
res=regYci(x,y,regfun=regfun,pts=pts,nboot=nboot,xout=FALSE,SEED=SEED,alpha=alpha,crit=crit,...)
plot(c(x,pts,pts),c(y,res[,2],res[,3]),xlab=xlab,ylab=ylab,type="n")
abline(regfun(x,y,...)$coef)
if(SCAT)points(x,y)
lines(pts,res[,2],lty=2)
lines(pts,res[,3],lty=2)
}

ols.pred.ci<-function(x,y,xlab="X",ylab="Y",alpha=.05,xout=FALSE,RETURN=FALSE,newx=NULL){
#
# plot the ols regression line and a 1-alpha
# confidence interval for the predicted values
#
#  RETURN=T means the function will return predicted values and
#  and confidence interval for the x values indicated by the argument
#  newx
#  newx=NULL, means predicted Y will be for seq(min(x), max(x), 0.1)
#
# xout=T removes leverage points.
#
if(ncol(as.matrix(x))!=1)stop("One predictor is allowed")
xy=elimna(cbind(x,y))
x=xy[,1]
y=xy[,2]
if(xout){
flag=out(x)$keep
x=x[flag]
y=y[flag]
}
tmp.lm=lm(y~x)
if(is.null(newx))newx=seq(min(x), max(x), 0.1)
a=predict(tmp.lm,interval="confidence",level=1-alpha,newdata=data.frame(x=newx))
plot(x,y,xlab=xlab,ylab=ylab)
abline(ols(x,y,plotit=FALSE)$coef[,1])
lines(newx,a[,2],lty=2)
lines(newx,a[,3],lty=2)
res=NULL
if(RETURN)res=a
res
}

regYhat<-function(x,y,xr=x,regfun=tsreg,xout=FALSE,outfun=out,pr=FALSE,...){
#
#  For convenience, return estimate of Y based on data in xr using
#  regression line based on regfun
#
xy=elimna(cbind(x,y))
x<-as.matrix(x)
p=ncol(x)
p1=p+1
vals=NA
x<-xy[,1:p]
y<-xy[,p1]
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep
m<-m[flag,]
x<-m[,1:p]
y<-m[,p1]
}
est=regfun(x,y,...)$coef
xr=as.matrix(xr)
yhat=est[1]+xr%*%est[2:p1]
yhat
}

reg1way<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,AD=FALSE,alpha=.05,pr=TRUE,...){
#
#  Test hypothesis that for two or more independent groups, all regression parameters
#  (the intercepts and slopes) are equal
#  By default the Theil--Sen estimator is used
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Johansen MANOVA type test statistic.
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, which defaults to the MVE method.
#
#  OUTPUT:
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#
if(pr){
if(!xout)print("Might want to consider xout=T to  remove leverage points")
}
if(SEED)set.seed(2)
if(!is.list(x))stop("Argument x should have list mode")
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared")
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}}
x=lapply(x,as.matrix)
K=p1
est=matrix(NA,nrow=J,ncol=p1)
grpnum=NULL
for(j in 1:J)grpnum[j]=paste("Group",j)
vlabs="Intercept"
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)=list(grpnum,vlabs)
ecov=list()
ecovinv=list()
W=rep(0,p1)
gmean=rep(0,p1)
for(j in 1:J){
est[j,]=regfun(x[[j]],y[[j]],xout=FALSE,...)$coef
nv.keep[j]=nrow(x[[j]])
vals=matrix(NA,nrow=nboot,ncol=p1)
data<-matrix(sample(length(y[[j]]),size=length(y[[j]])*nboot,replace=TRUE),ncol=nboot)
data=listm(data)
bvec<-lapply(data,regbootMC,x[[j]],y[[j]],regfun,...)
# bvec is a p+1 by nboot matrix.
vals=t(matl(bvec))
ecov[[j]]=var(vals)
ecovinv[[j]]=solve(ecov[[j]])  #W_j
gmean=gmean+ecovinv[[j]]%*%est[j,]
W=W+ecovinv[[j]]
}
estall=solve(W)%*%gmean
F=0
for(k in 1:K){
for(m in 1:K){
for(j in 1:J){
F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m])
}}}
pvalad=NULL
# if xout=F or AD=T, compute corrected critical value, stemming from Johansen
df=K*(J-1)
if(!xout || AD){
iden=diag(p1)
Aw=0
for(j in 1:J){
temp=iden-solve(W)%*%ecovinv[[j]]
tempsq=temp%*%temp
Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1)
}
Aw=Aw/2
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
crit=qchisq(alval[i],df)
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
if(F<critad)break
}
pvalad=1-irem/1000
}
#
pval=1-pchisq(F,df)
crit=qchisq(1-alpha,df)
critad=NULL
if(!xout || AD)critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
est=data.frame(est)
hlabs=NULL
vlabs="Intercept"
for(j in 1:J)hlabs[j]=paste("Group",j)
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)<-list(hlabs,vlabs)
list(n=nv,n.keep=nv.keep,test.stat=F,crit.value=crit,adjusted.crit=critad,p.value=pval,adjusted.p.value=pvalad,est=est)
}


ancGpar<-function(x1,y1,x2,y2,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,eout=FALSE,outfun=outpro,STAND=TRUE,plotit=TRUE,xlab="X",ylab="Y",ISO=FALSE,...){
#
#  Test hypothesis that for two independent groups, all regression parameters are equal
#  By default the Theil--Sen estimator is used
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Johansen type test statistic.
#
x1=as.matrix(x1)
p=ncol(x1)
p1=p+1
xy=elimna(cbind(x1,y1))
x1=xy[,1:p]
y1=xy[,p1]
x2=as.matrix(x2)
p=ncol(x2)
p1=p+1
xy=elimna(cbind(x2,y2))
x2=xy[,1:p]
y2=xy[,p1]
if(plotit){
xx1=x1
yy1=y1
xx2=x2
yy2=y2
if(ncol(as.matrix(x1))==1){
if(eout){
flag=outfun(cbind(x1,y1),plotit=FALSE,...)$keep
xx1=x1[flag]
yy1=y1[flag]
flag=outfun(cbind(x2,y2),plotit=FALSE,...)$keep
xx2=x2[flag]
yy2=y2[flag]
}
if(xout){
flag=outfun(xx1,plotit=FALSE,...)$keep
xx1=x1[flag]
yy1=y1[flag]
flag=outfun(xx2,plotit=FALSE,...)$keep
xx2=x2[flag]
yy2=y2[flag]
}
plot(c(xx1,xx2),c(yy1,yy2),type="n",xlab=xlab,ylab=ylab)
points(xx1,yy1)
points(xx2,yy2,pch="+")
abline(regfun(xx1,yy1,...)$coef)
abline(regfun(xx2,yy2,...)$coef,lty=2)
}}
x=list()
y=list()
x[[1]]=x1
x[[2]]=x2
y[[1]]=y1
y[[2]]=y2
if(!ISO)output=reg1way(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun,SEED=SEED,STAND=STAND,...)
if(ISO)output=reg1wayISO(x,y,regfun=regfun,nboot=nboot,xout=xout,outfun=outfun,SEED=SEED,STAND=STAND,...)
output
}


ancts<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab="X",ylab="Y",...){
#
# Compare the regression lines of two independent groups at specified design points
# using a robust regression estimator. 
# By default, use the Theil--Sen estimator
#
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
if(identical(outfun,boxplot))stop('Use outfun=outbox')
if(SEED)set.seed(2)
FLAG=pts
xy=elimna(cbind(x1,y1))
if(ncol(xy)>2)stop("Only one covariate is allowed. Try using ancpar")
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
if(ncol(xy)>2)stop("Only one covariate is allowed")
x2=xy[,1]
y2=xy[,2]
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,9)
dimnames(mat)<-list(NULL,c("X","Est1","Est2","DIF","TEST","se","ci.low","ci.hi","p.value"))
pts=x1[isub]
mat[,1]=pts
sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=xout,outfun=outfun,...)
sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=xout,outfun=outfun,...)
est1=regYhat(x1,y1,xr=pts,xout=xout,outfun=outfun,regfun=regfun,...)
est2=regYhat(x2,y2,xr=pts,xout=xout,outfun=outfun,regfun=regfun,...)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd1+sqsd2)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
pv=2*(1-pnorm(abs(tests)))
mat[,9]=pv
crit<-smmcrit(Inf,5)
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(!is.null(FLAG)){
n1=NA
n2=NA
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c("X","Est1","Est2","DIF","TEST","se","ci.low","ci.hi","p.value"))
mat[,1]<-pts
sqsd1=regYvar(x1,y1,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=xout,outfun=outfun,...)
sqsd2=regYvar(x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=FALSE,xout=xout,outfun=outfun,...)
est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=xout,outfun=outfun,...)
est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=xout,outfun=outfun,...)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd1+sqsd2)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
pv=2*(1-pnorm(abs(tests)))
mat[,9]=pv
crit<-smmcrit(Inf,length(pts))
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(plotit){
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
plot(c(x1,x2),c(y1,y2),type="n",xlab=xlab,ylab=ylab)
points(x1,y1,pch="o")
points(x2,y2,pch="+")
abline(regfun(x1,y1)$coef)
abline(regfun(x2,y2)$coef,lty=2)
}
list(output=mat)
}

ancJN<-ancts

block.diag<-function(mat){
#
# mat is assumed to have list mode with
# mat[[1]]...mat[[p]] each having n-by-n matrices
#
# Create a np-by-np block diagonal matrix
#
# So p is the number of blocks
#
if(!is.list(mat))stop("mat should have list mode")
np<-length(mat)*ncol(mat[[1]])
m<-matrix(0,np,np)
n=nrow(mat[[1]])
p=length(mat)
ilow<-1-n
iup<-0
for(i in 1:p){
ilow<-ilow+n
iup<-iup+n
m[ilow:iup,ilow:iup]<-mat[[i]]
}
m
}

reg1wayMC<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,
STAND=TRUE,alpha=.05,pr=TRUE,AD=FALSE,...){
#
#  Test hypothesis that for two or more independent groups, all regression parameters are equal
#  By default the Theil--Sen estimator is used
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Johansen MANOVA type test statistic
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun
#
#  OUTPUT:
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#
library(parallel)
if(pr){
if(!xout)print("Might want to consider xout=T to  remove leverage points")
}
if(SEED)set.seed(2)
if(!is.list(x))stop("Argument x should have list mode")
if(pr){
if(xout)print("xout=T, so an adjusted critical is not computed and apparently not needed")
}
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared")
nv=NULL
nv.keep=NULL
nv.all=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv.all[j]=c(nv,nrow(x[[j]]))
}
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}}
x=lapply(x,as.matrix)
p1=ncol(x[[1]])+1
K=p1
est=matrix(NA,nrow=J,ncol=p1)
hlabs=NULL
vlabs="Intercept"
for(j in 1:J)hlabs[j]=paste("Group",j)
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)<-list(hlabs,vlabs)
nv=NA
ecov=list()
ecovinv=list()
W=rep(0,p1)
gmean=rep(0,p1)
for(j in 1:J){
est[j,]=regfun(x[[j]],y[[j]])$coef
nv.keep[j]=nrow(x[[j]])
nv[j]=nv.keep[j]
vals=matrix(NA,nrow=nboot,ncol=p1)
data<-matrix(sample(nv[j],size=nv[j]*nboot,replace=TRUE),ncol=nboot)
data=listm(data)
bvec<-mclapply(data,regbootMC,x[[j]],y[[j]],regfun,...)
vals=t(matl(bvec))
ecov[[j]]=var(vals)
ecovinv[[j]]=solve(ecov[[j]])  #W_j
gmean=gmean+ecovinv[[j]]%*%est[j,]
W=W+ecovinv[[j]]
}
estall=solve(W)%*%gmean
F=0
for(k in 1:K){
for(m in 1:K){
for(j in 1:J){
F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m])
}}}
df=K*(J-1)
pvalad=NULL
# if xout=F, compute corrected critical value, stemming from Johansen
df=K*(J-1)
if(!xout || AD){
iden=diag(p1)
Aw=0
for(j in 1:J){
temp=iden-solve(W)%*%ecovinv[[j]]
tempsq=temp%*%temp
Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1)
}
Aw=Aw/2
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
crit=qchisq(alval[i],df)
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
if(F<critad)break
}
pavida=1-irem/1000
}
#
pval=1-pchisq(F,df)
crit=qchisq(1-alpha,df)
critad=NULL
if(AD)critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
est=data.frame(est)
list(n=nv.all,n.keep=nv.keep,test.stat=F,crit.value=crit,adjusted.crit=critad,p.value=pval,adjusted.p.value=pvalad,est=est)
}





CLASSanc<-function(x1,y1,x2,y2,xout=FALSE,outfun=out,...){
#
#  Perform classic ANCOVA
#  x1 and x2 contain covariate
#  NOT RECOMMENDED. ONLY INCLUDED IN CASE
#  YOU WANT TO COMPARE THE RESULTS WITH A ROBUST METHOD
#
#  ONE COVARIATE ONLY
#
x1=as.matrix(x1)
if(ncol(x1)!=1)stop("This function allows one covariate only") 
if(xout){
flag=outfun(x1,plotit=FALSE,...)$keep
x1=x1[flag]
y1=y1[flag]
flag=outfun(x2,plotit=FALSE,...)$keep
x2=x2[flag]
y2=y2[flag]
}
x=c(x1,x2)
y=c(y1,y2)
g=c(rep(1,length(y1)),rep(2,length(y2)))
model=lm(y~as.factor(g)*x)
res1=summary.aov(model)
model=lm(y~as.factor(g)+x)
res2=summary.aov(model)
list(slope.test=res1,ancova=res2)
}

anctsmp<-function(x1,y1,x2,y2,regfun=tsreg,alpha=.05,pts=NULL,SEED=TRUE,nboot=100,xout=FALSE,outfun=out,...){
#
# Compare two independent  groups using a generalization of the ancts function that
#  allows more than one covariate.
#
# Design points are chosen based on depth of points in x1 if pts=NULL
#  Assume data are in x1 y1 x2 and y2
#
if(SEED)set.seed(2) # now cov.mve always returns same result
x1=as.matrix(x1)
p=ncol(x1)
p1=p+1
m1=elimna(cbind(x1,y1))
x1=m1[,1:p]
y1=m1[,p1]
x2=as.matrix(x2)
p=ncol(x2)
p1=p+1
m2=elimna(cbind(x2,y2))
x2=m2[,1:p]
y2=m2[,p1]
#
if(xout){
m<-cbind(x1,y1)
flag<-outfun(x1,plotit=FALSE,...)$keep
m<-m[flag,]
x1<-m[,1:p]
y1<-m[,p1]
m<-cbind(x2,y2)
flag<-outfun(x2,plotit=FALSE,...)$keep
m<-m[flag,]
x2<-m[,1:p]
y2<-m[,p1]
}
nv=c(length(y1),length(y2))
if(is.null(pts[1])){
x1<-as.matrix(x1)
pts<-ancdes(x1)
}
pts<-as.matrix(pts)
ntests=nrow(pts)
mat<-matrix(NA,ntests,8)
dimnames(mat)<-list(NULL,c("Est 1", "Est 2","DIF","TEST","se","ci.low","ci.hi","p.value"))
sqsd1=regYvar(x1,y1,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=xout,outfun=outfun,...)
sqsd2=regYvar(x2,y2,pts=pts,regfun=regfun,nboot=nboot,SEED=FALSE,xout=xout,outfun=outfun,...)
est1=regYhat(x1,y1,xr=pts,xout=xout,outfun=outfun,...)
est2=regYhat(x2,y2,xr=pts,xout=xout,outfun=outfun,...)
mat[,1]=est1
mat[,2]=est2
est=est1-est2
mat[,3]=est
sd=sqrt(sqsd1+sqsd2)
mat[,5]=sd
tests=(est1-est2)/sd
mat[,4]=tests
pv=2*(1-pnorm(abs(tests)))
mat[,8]=pv
crit=NULL
if(ntests==1)crit=qnorm(1-alpha/2)
if(length(pts)>1){
if(ntests<=28){
if(alpha==.05)crit<-smmcrit(Inf,ntests)
if(alpha==.01)crit<-smmcrit01(Inf,ntests)
}
if(ntests>28)crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha)
if(is.null(crit))crit=smmvalv2(dfvec=rep(Inf,nrow(pts)),alpha=alpha)
}
mat[,6]=est-crit*sd
mat[,7]=est+crit*sd
list(n=nv,points=pts,output=mat)
}

ancJNmcp=anctsmp

ancpar<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab="X",ylab="Y",...){
#
# Compare the regression lines of two independent groups at specifed desing points.
# By default, use the Theil--Sen estimator
#
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#  For p>1 predictors, pts should be a matrix with p columns
#
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=ncol(x2))stop("x1 and x2 have different number of columns")
if(ncol(x1)==1)output=ancts(x1,y1,x2,y2,pts=pts,regfun=regfun,fr1=fr1,fr2=fr2,alpha=alpha,
plotit=plotit,xout=xout,outfun=outfun,nboot=nboot,SEED=SEED,xlab=xlab,ylab=ylab,...)
if(ncol(x1)>1)output=anctsmp(x1,y1,x2,y2,regfun=regfun,alpha=alpha,pts=pts,SEED=SEED,xout=xout,outfun=outfun,nboot=nboot,...)
output
}


 ols.coef<-function(x,y,xout=FALSE){
 # In some cases, want the OLS estimate returned in $coef
 res=ols(x,y,xout=xout)$coef[,1]
 list(coef=res)
 }


reg2ciMC<-function(x,y,x1,y1,regfun=tsreg,nboot=599,alpha=.05,plotit=TRUE,SEED=TRUE,
xout=FALSE,outfun=outpro,pr=TRUE,...){
#
#   Compute a .95 confidence interval for the difference between the
#   the intercepts and slopes corresponding to two independent groups.
#   The default regression method is Theil-Sen.
#
#   Same as reg2ci, only takes advantage of a multi-core processor
#
#   The predictor values for the first group are
#   assumed to be in the n by p matrix x.
#   The predictors for the second group are in x1
#
#   The default number of bootstrap samples is nboot=599
#
#   regfun can be any R function that returns the coefficients in
#   the vector regfun$coef, the first element of which contains the
#   estimated intercept, the second element contains the estimate of
#   the first predictor, etc.
#
library(parallel)
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
x1<-as.matrix(x1)
xx1<-cbind(x1,y1)
xx1<-elimna(xx1)
x1<-xx1[,1:ncol(x1)]
x1<-as.matrix(x1)
y1<-xx1[,ncol(x1)+1]
x=as.matrix(x)
x1=as.matrix(x1)
if(xout){

flag1=outfun(x,...)$keep
flag2=outfun(x1,...)$keep
x=x[flag1,]
y=y[flag1]
x1=x1[flag2,]
y1=y1[flag2]
}
n=length(y)
n[2]=length(y1)
x<-as.matrix(x)
x1<-as.matrix(x1)
est1=regfun(x,y)$coef
est2=regfun(x1,y1)$coef
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#
data<-matrix(sample(length(y),size=length(y)*nboot,replace=T),nrow=nboot)
data=listm(t(data))
bvec<-mclapply(data,regbootMC,x,y,regfun,mc.preschedule=TRUE,xout=FALSE)
bvec=matl(bvec)
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=T),nrow=nboot)
data=listm(t(data))
bvec1<-mclapply(data,regbootMC,x1,y1,regfun,mc.preschedule=TRUE,xout=FALSE)
bvec1=matl(bvec1)
bvec<-bvec-bvec1
p1<-ncol(x)+1
regci<-matrix(0,p1,6)
dimnames(regci)<-list(NULL,
c("Parameter","ci.lower","ci.upper","p.value","Group 1","Group 2"))
ilow<-round((alpha/2)*nboot)+1
ihi<-nboot-(ilow-1)
for(i in 1:p1){
temp<-sum(bvec[i,]<0)/nboot+sum(bvec[i,]==0)/(2*nboot)
regci[i,4]<-2*min(temp,1-temp)
bsort<-sort(bvec[i,])
regci[i,2]<-bsort[ilow]
regci[i,3]<-bsort[ihi]
regci[,1]<-c(0:ncol(x))
}
regci[,5]=est1
regci[,6]=est2
if(ncol(x)==1 && plotit){
plot(c(x,x1),c(y,y1),type="n",xlab="X",ylab="Y")
points(x,y)
points(x1,y1,pch="+")
abline(regfun(x,y)$coef)
abline(regfun(x1,y1)$coef,lty=2)
}
list(n=n,output=regci)
}

reg2difplot<-function(x1,y1,x2,y2,regfun=tsreg,pts=x1,xlab="VAR 1",ylab="VAR 2",zlab="Group 2 minus Group 1",xout=FALSE,outfun=out,ALL=TRUE,pts.out=FALSE,...){
#
# Fit a regression model to both groups assuming have two predictors.
# Get predicted Y values based on points in pts. By default, use
# pts=x1
#
#  x1 a matrix containing two predictors
#  x2 a matrix containing two predictors
#
#  Compute differences in predicted values and plot the results as a function of the points in pts
#  pts=x1 by default.
#  ALL=T, pts is taken to be all points in x1 and x2.
#
#  pts.out=T will remove leverage points from pts.
#
if(!is.matrix(x1))stop("x1 should be a matrix")
if(!is.matrix(x2))stop("x2 should be a matrix")
if(!is.matrix(pts))stop("pts should be a matrix")
if(ncol(x1)!=2)stop("x1 should be a matrix with two columns")
if(ncol(x2)!=2)stop("x2 should be a matrix with two columns")
if(ncol(pts)!=2)stop("pts should be a matrix with two columns")
if(ALL)pts=rbind(x1,x2)
if(pts.out){
flag=outfun(pts,plotit=FALSE,...)$keep
pts=pts[flag,]
}
e1=regYhat(x1,y1,xout=xout,regfun=regfun,outfun=outfun,xr=pts,...)
e2=regYhat(x2,y2,xout=xout,regfun=regfun,outfun=outfun,xr=pts,...)
library(scatterplot3d)
scatterplot3d(cbind(pts,e2-e1),xlab=xlab,ylab=ylab,zlab=zlab)
}

cbmhd<-function(x,y,alpha=.05,q=.25,plotit=FALSE,pop=0,fr=.8,rval=15,xlab="",ylab="",nboot=600,SEED=TRUE){
#
#  Compute a confidence interval for the sum of the qth and (1-q)th quantiles
#  of the distribution of D=X-Y, where X and Y are two independent random variables.
#  The Harrell-Davis estimator is used
#  If the distribution of X and Y are identical, then in particular the
#  distribution of D=X-Y is symmetric about zero.
#
#  plotit=TRUE causes a plot of the difference scores to be created
#  pop=0 adaptive kernel density estimate
#  pop=1 results in the expected frequency curve.
#  pop=2 kernel density estimate (Rosenblatt's shifted histogram)
#  pop=3 boxplot
#  pop=4 stem-and-leaf
#  pop=5 histogram
#
if(SEED)set.seed(2)
if(q>=.5)stop("q should be less than .5")
if(q<=0)stop("q should be greater than 0")
x<-x[!is.na(x)]
y<-y[!is.na(y)]
n1=length(x)
n2=length(y)
m<-outer(x,y,FUN="-")
q2=1-q
est1=hd(m,q)
est2=hd(m,q2)
data1<-matrix(sample(n1,size=n1*nboot,replace=TRUE),nrow=nboot)
data2<-matrix(sample(n2,size=n2*nboot,replace=TRUE),nrow=nboot)
bvec=NA
for(i in 1:nboot){
mb=outer(x[data1[i,]],y[data2[i,]],"-")
bvec[i]=hd(mb,q)+hd(mb,q2)
}
p=mean(bvec>0)+.5*mean(bvec==0)
p=2*min(c(p,1-p))
sbv=sort(bvec)
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
ci=sbv[ilow]
ci[2]=sbv[ihi]
if(plotit){
if(pop==1 || pop==0){
if(length(x)*length(y)>2500){
print("Product of sample sizes exceeds 2500.")
print("Execution time might be high when using pop=0 or 1")
print("If this is case, might consider changing the argument pop")
print("pop=2 might be better")
}}
MM=as.vector(m)
if(pop==0)akerd(MM,xlab=xlab,ylab=ylab)
if(pop==1)rdplot(MM,fr=fr,xlab=xlab,ylab=ylab)
if(pop==2)kdplot(MM,rval=rval,xlab=xlab,ylab=ylab)
if(pop==3)boxplot(MM)
if(pop==4)stem(MM)
if(pop==5)hist(MM,xlab=xlab)
if(pop==6)skerd(MM)
}
list(q=q,Est1=est1,Est2=est2,sum=est1+est2,ci=ci,p.value=p)
}

reg1wayISO<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,STAND=TRUE,alpha=.05,pr=TRUE,...){
#
#  Test hypothesis that for two or more independent groups, all slope parameters
#  are equal.
#  By default the Theil--Sen estimator is used
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Johansen MANOVA type test statistic.
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun
#
if(SEED)set.seed(2)
if(pr){
if(!xout)print("Might want to consider xout=T to  remove leverage points")
}
if(!is.list(x))stop("Argument x should have list mode")
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared")
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}}
x=lapply(x,as.matrix)
K=p1
est=matrix(NA,nrow=J,ncol=p1)
nv.keep=NULL
ecov=list()
ecovinv=list()
W=rep(0,p1)
gmean=rep(0,p)
for(j in 1:J){
est[j,]=regfun(x[[j]],y[[j]],xout=FALSE,...)$coef
nv.keep[j]=nrow(x[[j]])
vals=matrix(NA,nrow=nboot,ncol=p1)
data<-matrix(sample(length(y[[j]]),size=length(y[[j]])*nboot,replace=TRUE),ncol=nboot)
data=listm(data)
bvec<-lapply(data,regbootMC,x[[j]],y[[j]],regfun,...)
# bvec is a p+1 by nboot matrix.
vals=t(matl(bvec))
ecov[[j]]=var(vals)
ecovinv[[j]]=solve(ecov[[j]])  #W_j
gmean=gmean+ecovinv[[j]][2:K,2:K]%*%est[j,2:K]
W=W+ecovinv[[j]]
}
estall=solve(W[2:K,2:K])%*%gmean
estall=c(0,estall)
F=0
for(k in 2:K){
for(m in 2:K){
for(j in 1:J){
F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m])
}}}
df=p*(J-1)
pvalad=NULL
AD=FALSE # Seems adjusted critical is not needed
if(AD){
iden=diag(p1)
Aw=0
for(j in 1:J){
temp=iden-solve(W)%*%ecovinv[[j]]
tempsq=temp%*%temp
Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1)
}
Aw=Aw/2
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
crit=qchisq(alval[i],df)
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
if(F<critad)break
}
pvalad=1-irem/1000
}
#
pval=1-pchisq(F,df)
crit=qchisq(1-alpha,df)
critad=NULL
if(AD)critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
est=data.frame(est)
hlabs=NULL
vlabs="Intercept"
for(j in 1:J)hlabs[j]=paste("Group",j)
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)<-list(hlabs,vlabs)
list(n=nv,n.keep=nv.keep,test.stat=F,crit.value=crit,p.value=pval,est=est)
}

reg1wayISOMC<-function(x,y,regfun=tsreg,nboot=100,SEED=TRUE,xout=FALSE,outfun=outpro,
STAND=TRUE,alpha=.05,pr=TRUE,...){
#
#  Test hypothesis that for two or more independent groups, all regression parameters are equal
#  By default the Theil--Sen estimator is used
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Johansen MANOVA type test statistic
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun
#
#  OUTPUT:
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#
library(parallel)
if(SEED)set.seed(2)
if(pr){
if(!xout)print("Might want to consider xout=T to  remove leverage points")
}
if(!is.list(x))stop("Argument x should have list mode")
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop("Something is wrong. Number of covariates differs among the groups being compared")
nv=NULL
nv.keep=NULL
nv.all=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv.all[j]=c(nv,nrow(x[[j]]))
}
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}}
x=lapply(x,as.matrix)
p1=ncol(x[[1]])+1
K=p1
est=matrix(NA,nrow=J,ncol=p1)
hlabs=NULL
vlabs="Intercept"
for(j in 1:J)hlabs[j]=paste("Group",j)
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)<-list(hlabs,vlabs)
nv=NA
ecov=list()
ecovinv=list()
W=rep(0,p1)
gmean=rep(0,p)
for(j in 1:J){
est[j,]=regfun(x[[j]],y[[j]])$coef
nv.keep[j]=nrow(x[[j]])
nv[j]=nv.keep[j]
vals=matrix(NA,nrow=nboot,ncol=p1)
data<-matrix(sample(nv[j],size=nv[j]*nboot,replace=TRUE),ncol=nboot)
data=listm(data)
bvec<-mclapply(data,regbootMC,x[[j]],y[[j]],regfun,...)
vals=t(matl(bvec))
ecov[[j]]=var(vals)
ecovinv[[j]]=solve(ecov[[j]])  #W_j
gmean=gmean+ecovinv[[j]][2:K,2:K]%*%est[j,2:K]
W=W+ecovinv[[j]]
}
estall=solve(W[2:K,2:K])%*%gmean
estall=c(0,estall)
F=0
for(k in 2:K){
for(m in 2:K){
for(j in 1:J){
F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m])
}}}
df=p*(J-1)
#
df=p*(J-1)
#
pval=1-pchisq(F,df)
crit=qchisq(1-alpha,df)
critad=NULL
#if(!xout || AD)critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
est=data.frame(est)
list(n=nv.all,n.keep=nv.keep,test.stat=F,crit.value=crit,p.value=pval,est=est)
}





difQpciMC<-function(x,y=NULL,q=seq(5,40,5)/100,xlab='Quantile',ylab='Group 1 minus Group 2',plotit=TRUE,
SEED=TRUE,alpha=.05,nboot=1000){
#
#  Plot that provides perspective on the degree a distribution is symmetric about zero.
#  This function plots the sum of q and 1-q quantiles. A 1-alpha confidence interval for the sum is indicated by a +
#  If the distributions are symmetric
#  the plot should be approximately a horizontal line. If in addition the median
#  of the difference scores is zero, the horizontal line will intersect the y-axis at zero.
#
#  Similar to difQplot, only plots fewer quantiles by default and returns p-values for
#  each quantile indicated by the argument q.
#
#  FWE is controlled via Hochberg's method, which was used to determine critical
#  p-values based on the argument
#  alpha.
#
#  Can alter the quantiles compared via the argument
#  q
#  q must be less than .5
#
x=as.matrix(x)
if(is.null(y))dif=x
if(ncol(x)>2)stop("Should be at most two groups")
if(ncol(x)==2)dif=x[,1]-x[,2]
if(!is.null(y))dif=x-y
dif=elimna(dif)
dif=as.matrix(dif)
nv=length(dif)
output=matrix(NA,ncol=8,nrow=length(q))
dimnames(output)=list(NULL,c('quantile','Est_q','Est_1.minus.q','SUM','ci.low','ci.up','p_crit','p-value'))
for(i in 1:length(q)){
test=DqdifMC(dif,q=q[i],plotit=FALSE,nboot=nboot,SEED=SEED)
output[i,1]=q[i]
output[i,2]=test$est.q
output[i,3]=test$est.1.minus.q
output[i,8]=test$p.value
output[i,5]=test$conf.interval[1]
output[i,6]=test$conf.interval[2]
}
temp=order(output[,8],decreasing=TRUE)
zvec=alpha/c(1:length(q))
output[temp,7]=zvec
output <- data.frame(output)
output$signif=rep('YES',nrow(output))
for(i in 1:nrow(output)){
if(output[temp[i],8]>output[temp[i],7])output$signif[temp[i]]='NO'
if(output[temp[i],8]<=output[temp[i],7])break
}
output[,4]=output[,2]+output[,3]
if(plotit){
plot(rep(q,3),c(output[,4],output[,5],output[,6]),type='n',xlab=xlab,ylab=ylab)
points(q,output[,6],pch='+')
points(q,output[,5],pch='+')
points(q,output[,4],pch='*')
}
list(n=nv,output=output)
}
tsregF<-function(x,y,xout=FALSE,outfun=out,iter=10,varfun=pbvar,
corfun=pbcor,plotit=FALSE,tol=.0001,...){
#
#  Compute Theil-Sen regression estimator
#
#  Use Gauss-Seidel algorithm
#  when there is more than one predictor
#
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(ncol(x)==1){
temp1<-tsp1reg(x,y)
coef<-temp1$coef
res<-temp1$res
}
if(ncol(x)>1){
for(p in 1:ncol(x)){
temp[p]<-tsp1reg(x[,p],y)$coef[2]
}
res<-y-x%*%temp
alpha<-median(res)
r<-matrix(NA,ncol=ncol(x),nrow=nrow(x))
tempold<-temp
for(it in 1:iter){
for(p in 1:ncol(x)){
r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p]
temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2]
}
if(max(abs(temp-tempold))<tol)break
alpha<-median(y-x%*%temp)
tempold<-temp
}
coef<-c(alpha,temp)
res<-y-x%*%temp-alpha
}
yhat<-y-res
stre=NULL
temp=varfun(y)
if(temp==0)print('Warning: When computing strength of association, measure of variation=0')
e.pow=NULL
if(temp>0){
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
e.pow=as.numeric(e.pow)
stre=sqrt(e.pow)
}}
res=NULL
list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow)
}


outproMC<-function(m,gval=NA,center=NA,plotit=T,op=T,MM=F,cop=3,
xlab="VAR 1",ylab="VAR 2",STAND=FALSE,tr=.2,q=.5,pr=TRUE,...){
#
# same as function outpro, only it takes advantage of multiple core
# processors
#
# Detect outliers using a modification of the
# Stahel-Donoho  projection method.
#
# Determine center of data cloud, for each point,
# connect it with center, project points onto this line
# and use distances between projected points to detect
# outliers. A boxplot method is used on the
# projected distances.
#
# plotit=T creates a scatterplot when working with 
# bivariate data.
#
# op=T 
# means the .5 depth contour is plotted 
# based on data with outliers removed.
# 
# op=F
# means .5 depth contour is plotted without removing outliers.
#
#  MM=F  Use interquatile range when checking for outliers
#  MM=T  uses MAD.
#
#  If value for center is not specified,
#  there are four options for computing the center of the
#  cloud of points when computing projections:
#
#  cop=2 uses MCD center
#  cop=3 uses median of the marginal distributions.
#  cop=4 uses MVE center
#  cop=5 uses TBS
#  cop=6 uses rmba (Olive's median ball algorithm)#  cop=7 uses the spatial (L1) median
#
#  args q and tr having are not used by this function. They are included to deal
#  with situations where smoothers have optional arguments for q and tr
#
#  STAND=T means that marginal distributions are standardized before
#  checking for outliers

#  When using cop=2, 3 or 4, default critical value for outliers 
#  is square root of the .975 quantile of a 
#  chi-squared distribution with p degrees 
#  of freedom.
#
#  Donoho-Gasko (Tukey) median is marked with a cross, +.
#
library(parallel)
library(MASS)
m<-as.matrix(m)
if(pr){
if(!STAND){
if(ncol(m)>1)print('STAND=FALSE. If measures are on different scales, might want to use STAND=TRUE')
}}
if(ncol(m)==1){
dis<-(m-median(m))^2/mad(m)^2
dis<-sqrt(dis)
crit<-sqrt(qchisq(.975,1))
chk<-ifelse(dis>crit,1,0)
vec<-c(1:nrow(m))
outid<-vec[chk==1]
keep<-vec[chk==0]
}
if(ncol(m)>1){
if(STAND)m=standm(m,est=median,scat=mad)
if(is.na(gval) && cop==1)gval<-sqrt(qchisq(.95,ncol(m)))
if(is.na(gval) && cop!=1)gval<-sqrt(qchisq(.975,ncol(m)))
m<-elimna(m) # Remove missing values
if(cop==1 && is.na(center[1])){
if(ncol(m)>2)center<-dmean(m,tr=.5,cop=1)
if(ncol(m)==2){
tempd<-NA
for(i in 1:nrow(m))
tempd[i]<-depth(m[i,1],m[i,2],m)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center<-m[flag,]
if(sum(flag)>1)center<-apply(m[flag,],2,mean)
}}
if(cop==2 && is.na(center[1])){
center<-cov.mcd(m)$center
}
if(cop==4 && is.na(center[1])){
center<-cov.mve(m)$center
}
if(cop==3 && is.na(center[1])){
center<-apply(m,2,median)
}
if(cop==5 && is.na(center[1])){
center<-tbs(m)$center
}
if(cop==6 && is.na(center[1])){
center<-rmba(m)$center
}
if(cop==7 && is.na(center[1])){
center<-spat(m)
}
flag<-rep(0, nrow(m))
outid <- NA
vec <- c(1:nrow(m))
cenmat=matrix(rep(center,nrow(m)),ncol=ncol(m),byrow=T)
Amat=m-cenmat
B=listm(t(Amat))  # so rows are now in B[[1]]...B[[n]]
dis=mclapply(B,outproMC.sub,Amat)
flag=mclapply(dis,outproMC.sub2,MM,gval)
flag=matl(flag)
flag=apply(flag,1,max)
}
if(sum(flag) == 0) outid <- NA
if(sum(flag) > 0)flag<-(flag==1)
outid <- vec[flag]
idv<-c(1:nrow(m))
keep<-idv[!flag]
if(ncol(m)==2){
if(plotit){
plot(m[,1],m[,2],type="n",xlab=xlab,ylab=ylab)
points(m[keep,1],m[keep,2],pch="*")
if(length(outid)>0)points(m[outid,1],m[outid,2],pch="o")
if(op){
tempd<-NA
keep<-keep[!is.na(keep)]
mm<-m[keep,]
for(i in 1:nrow(mm))tempd[i]<-depth(mm[i,1],mm[i,2],mm)
mdep<-max(tempd)
flag<-(tempd==mdep)
if(sum(flag)==1)center<-mm[flag,]
if(sum(flag)>1)center<-apply(mm[flag,],2,mean)
m<-mm
}
points(center[1],center[2],pch="+")
x<-m
temp<-fdepth(m,plotit=F)
flag<-(temp>=median(temp))
xx<-x[flag,]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}}
list(out.id=outid,keep=keep)
}



olsJ2<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,
STAND=TRUE,plotit=TRUE,xlab="X",ylab="Y",ISO=FALSE,...){
#
#  Test hypothesis that for two independent groups, all regression parameters are equal
#  Least squares regression is used.
#
#  Strategy: Use HC4 estimate of standard errors followed by
#  Johansen type test statistic.
#
#  ISO=TRUE, test slopes, ignoring intercept.
#
x1=as.matrix(x1)
p=ncol(x1)
p1=p+1
xy=elimna(cbind(x1,y1))
x1=xy[,1:p]
y1=xy[,p1]
x2=as.matrix(x2)
p=ncol(x2)
p1=p+1
xy=elimna(cbind(x2,y2))
x2=xy[,1:p]
y2=xy[,p1]
if(plotit){
xx1=x1
yy1=y1
xx2=x2
yy2=y2
if(ncol(as.matrix(x1))==1){
if(xout){
flag=outfun(xx1,plotit=FALSE,...)$keep
xx1=x1[flag]
yy1=y1[flag]
flag=outfun(xx2,plotit=FALSE,...)$keep
xx2=x2[flag]
yy2=y2[flag]
}
plot(c(xx1,xx2),c(yy1,yy2),type="n",xlab=xlab,ylab=ylab)
points(xx1,yy1)
points(xx2,yy2,pch="+")
abline(lsfit(xx1,yy1,...)$coef)
abline(lsfit(xx2,yy2,...)$coef,lty=2)
}}
x=list()
y=list()
x[[1]]=x1
x[[2]]=x2
y[[1]]=y1
y[[2]]=y2
if(!ISO)output=ols1way(x,y,xout=xout,outfun=outfun,STAND=STAND,...)
if(ISO)output=ols1wayISO(x,y,xout=xout,outfun=outfun,STAND=STAND,...)
output
}
ebarplot.med<-function(x,y=NULL,alpha=.05,nse=1, liw = uiw, aui=NULL, ali=aui,
err="y", tr=0,ylim=NULL, sfrac = 0.01, gap=0, add=FALSE,
col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab="Group",
                    ylab=NULL, ...) {
# plots error bars using the data in
# x, which is assumed to be a matrix with J columns (J groups) or
# x has list mode.
# nse indicates how many standard errors to use when plotting.
#
# Designed specifically for medians
# Uses distribution-free confidence intervals
#
# Missing values are automatically removed.
#
if(!is.null(y)){
if(is.matrix(x))stop("When y is given, x should not be a matrix")
if(is.list(x))stop("When y is given, x should not be in list mode")
rem=x
x=list()
x[[1]]=rem
x[[2]]=y
}
if(is.matrix(x))x<-listm(x)
mval<-NA
if(!is.list(x) && is.null(y))stop("This function assumes there
 are  two or more groups")
aui=NA
ali=NA
for(j in 1:length(x)){
mval[j]<-median(x[[j]],na.rm=TRUE)
temp=sint(x[[j]],alpha=alpha,pr=FALSE)
ali[j]=temp[1]
aui[j]=temp[2]
}

plotCI(mval,y=NULL,, liw = uiw, aui=aui, ali=ali,
                    err="y", ylim=NULL, sfrac = 0.01, gap=0, add=FALSE,
                    col=par("col"), lwd=par("lwd"), slty=par("lty"), xlab=xlab,
                    ylab=ylab)
}
MULtsreg<-function(x,y,tr=.2,RMLTS=T){
# Multivariate Least Trimmed Squares Estimator
# Input:
#   x: data-matrix (n,p)
#   y: data-matrix (n,q)
#   tr: proportion of trimming
#   This function calls an R function written by Kristel Joossens
#
# Output:
#     If MLTS=T coef: matrix (p,q) of MLTS-regression coefficients
#     IF MLTS=F betaR : matrix (p,q) of RMLTS-regression coefficients
#
# Ref: Agullo,J., Croux, C., and Van Aelst, S. (2008)
#      The Multivariate Least Trimmed Squares Estimator,
#      Journal of multivariate analysis, 99, 311-338.
#
x=as.matrix(x)
xy=elimna(cbind(x,y))
xx=as.matrix(cbind(rep(1,nrow(xy)),xy[,1:ncol(x)]))
p1=ncol(x)+1
y=as.matrix(xy[,p1:ncol(xy)])
outp=mlts(xx,y,tr)
if(!RMLTS)coef=outp$beta
if(RMLTS)coef=outp$betaR
list(coef=coef)
}
mlts<-function(x,y,gamma,ns=500,nc=10,delta=0.01)
{
  d <- dim(x); n <- d[1]; p <- d[2]
  q <- ncol(y)
  h <- floor(n*(1-gamma))+1
  obj0 <- 1e10
  for (i in 1:ns)
  { sorted <- sort(runif(n),na.last = NA,index.return=TRUE)
    istart <- sorted$ix[1:(p+q)]
    xstart <- x[istart,]
    ystart <- y[istart,]
    bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart)
    sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/q
    for (j in 1:nc)
    { res  <-  y - x %*% bstart
      tres <- t(res)
      dist2 <- colMeans(solve(sigmastart,tres)*tres)
      sdist2 <- sort(dist2,na.last = NA,index.return = TRUE)
      idist2 <- sdist2$ix[1:h]
      xstart <- x[idist2,]
      ystart <- y[idist2,]
      bstart <- solve(t(xstart)%*%xstart,t(xstart)%*%ystart)
      sigmastart <- (t(ystart-xstart%*%bstart))%*%(ystart-xstart%*%bstart)/(h-p)
    }
    obj <- det(sigmastart)
    if (obj < obj0)
    { result.beta <- bstart
      result.sigma <- sigmastart
      obj0 <- obj
    }
  }
  cgamma <- (1-gamma)/pchisq(qchisq(1-gamma,q),q+2)
  result.sigma <- cgamma * result.sigma
  res <- y - x %*% result.beta
  tres<-t(res)
  result.dres <- colSums(solve(result.sigma,tres)*tres)
  result.dres <- sqrt(result.dres)

  qdelta <- sqrt(qchisq(1-delta,q))
  good  <- (result.dres <= qdelta)
  xgood <- x[good,]
  ygood <- y[good,]
  result.betaR <- solve(t(xgood)%*%xgood,t(xgood)%*%ygood)
  result.sigmaR <- (t(ygood-xgood%*%result.betaR)) %*%
    (ygood-xgood%*%result.betaR)/(sum(good)-p)
  cdelta <- (1-delta)/pchisq(qdelta^2,q+2)
  result.sigmaR<-cdelta*result.sigmaR
  resR<-y-x%*%result.betaR
  tresR<-t(resR)
  result.dresR <- colSums(solve(result.sigmaR,tresR)*tresR)
  result.dresR <- sqrt(result.dresR)
  list(beta=result.beta,sigma=result.sigma,dres=result.dres,
    betaR=result.betaR,sigmaR=result.sigmaR,dresR=result.dresR)
}

ancCR<-function(x1,y1,x2,y2){
v=optim(0,JNH_sub1,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par             
v[2]=optim(0,JNH_sub2,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par
a=min(v)
v=c(a,max(v))
}


tsregNW<-function(x,y,xout=FALSE,outfun=out,iter=10,varfun=pbvar,
corfun=pbcor,plotit=FALSE,tol=.0001,...){
#
#  Compute Theil-Sen regression estimator
#
#  Use Gauss-Seidel algorithm
#  when there is more than one predictor
#
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(ncol(x)==1){
temp1<-tsp1reg(x,y)
coef<-temp1$coef
res<-temp1$res
}
if(ncol(x)>1){
for(p in 1:ncol(x)){
temp[p]<-tsp1reg(x[,p],y)$coef[2]
}
res<-y-x%*%temp
alpha<-median(res)
r<-matrix(NA,ncol=ncol(x),nrow=nrow(x))
tempold<-temp
for(it in 1:iter){
for(p in 1:ncol(x)){
r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p]
temp[p]<-tsp1reg(x[,p],r[,p],plotit=FALSE)$coef[2]
}
if(max(abs(temp-tempold))<tol)break
alpha<-median(y-x%*%temp)
tempold<-temp
}
coef<-c(alpha,temp)
res<-y-x%*%temp-alpha
}
yhat<-y-res
stre=NULL
temp=varfun(y)
#if(temp==0)print('Warning: When computing strength of association, measure of variation=0')
e.pow=NULL
if(temp>0){
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
e.pow=as.numeric(e.pow)
stre=sqrt(e.pow)
}}
res=NULL
list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow)
}


reg2cimcp<-function(x,y,regfun=tsreg,nboot=599,alpha=0.05, 
SEED=TRUE,xout=FALSE,outfun=out,...){
#
# Like reg2ci only x1 etc have list mode containing
# data for J>1 groups. For all pairs of groups are compared via a 
# call to reg2ci.
#
#  x list mode contain a matrix of predictors.
#  x[[1]] contains predictors for first group
#  y[[1]] dependent variable for first group.
#
#
if(!is.list(x))stop('x and y should have list mode')
J=length(x) # number of groups
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
res=reg2ci(x[[j]],y[[j]],x[[k]],y[[k]],regfun=regfun,nboot=nboot,alpha=alpha,
plotit=FALSE,xout=xout,outfun=outfun,WARN=FALSE,...)
print(paste('Group', j,'Group', k))
print(res)
}}}
}


epowv2<-function(x,y,pcor=FALSE,regfun=tsreg,corfun=pbcor,varfun=pbvar,xout=FALSE,outfun=outpro,plotit=FALSE,...){
#
# Estimate the explanatory correlation between x and y
#
# It is assumed that x is a vector or a matrix having one column only
xx<-elimna(cbind(x,y)) # Remove rows with missing values
p1=ncol(xx)
p=p1-1
x<-xx[,1:p]
y<-xx[,p1]
x<-as.matrix(x)
if(xout){
flag<-outfun(x,plotit=plotit,...)$keep
x=x[flag,]
y=y[flag]
}
coef<-regfun(x,y)$coef
yhat<-x %*% coef[2:p1] + coef[1]
stre=NULL
temp=varfun(y)
e.pow=NULL
if(temp>0)e.pow<-varfun(yhat)/temp
if(e.pow>1)e.pow=corfun(y,yhat)$cor^2
list(Strength.Assoc=e.pow,Explanatory.Power=sqrt(e.pow))
}
rmblo<-function(x,y){
#
# Remove only bad leverage points and return the
# remaining data
#
x=as.matrix(x)
p=ncol(x)
p1=p+1
xy=elimna(cbind(x,y))
x=xy[,1:p]
y=xy[,p1]
temp1=reglev(x,y,plotit=FALSE)
ad1=c(temp1$levpoints,temp1$regout)
flag1=duplicated(ad1)
if(sum(flag1)>0){
flag1=ad1[flag1]
x=as.matrix(x)
x1=x[-flag1,]
y1=y[-flag1]
xy=cbind(x1,y1)
}
list(x=xy[,1:p],y=xy[,p1])
}



ols1way<-function(x,y,xout=FALSE,outfun=outpro,STAND=FALSE,alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){
#
#  Test hypothesis that for two or more independent groups, all regression parameters
#  (the intercepts and slopes) are equal
#  using OLS  estimator.
#
#  (To compare slopes only, use ols1way2g)
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Johansen MANOVA type test statistic.
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the MVE method.
#
#   BLO=TRUE, only bad leverage points are removed.
#
#  OUTPUT: 
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#
if(!is.list(x))stop('Argument x should have list mode')
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(xout){
temp1=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp1[[j]]$keep,]
y[[j]]=y[[j]][temp1[[j]]$keep]
}
if(BLO){
for(j in 1:J){
temp=reglev(x[[j]],y[[j]],plotit=FALSE)
ad1=c(temp1[[j]]$out.id,temp$regout)
flag1=duplicated(ad1)
if(sum(flag1)>0){
flag1=ad1[flag1]
x[[j]]=as.matrix(x[[j]])
x[[j]]=x[[j]][-flag1,]
y[[j]]=y[[j]][-flag1]
}}}}
x=lapply(x,as.matrix)
K=p1
est=matrix(NA,nrow=J,ncol=p1)
grpnum=NULL
for(j in 1:J)grpnum[j]=paste("Group",j)
vlabs="Intercept"
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)=list(grpnum,vlabs)
ecov=list()
ecovinv=list()
W=rep(0,p1)
gmean=rep(0,p1)
for(j in 1:J){
est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef
nv.keep[j]=nrow(x[[j]])
ecov[[j]]=olshc4(x[[j]],y[[j]],HC3=HC3)$cov
ecovinv[[j]]=solve(ecov[[j]])  #W_j
gmean=gmean+ecovinv[[j]]%*%est[j,]
W=W+ecovinv[[j]]
}
estall=solve(W)%*%gmean
F=0
for(k in 1:K){
for(m in 1:K){
for(j in 1:J){
F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m])
}}}
pvalad=NULL
df=K*(J-1)
iden=diag(p1)
Aw=0
for(j in 1:J){
temp=iden-solve(W)%*%ecovinv[[j]]
tempsq=temp%*%temp
Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1)
}
Aw=Aw/2
crit=qchisq(alpha,df)
crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
crit=qchisq(alval[i],df)
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
if(F<critad)break
pvalad=1-irem/1000
}
#
pval=1-pchisq(F,df)
crit=qchisq(1-alpha,df)
critad=NULL
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
est=data.frame(est)
list(n=nv,n.keep=nv.keep,test.stat=F,crit.value=crit,adjusted.crit=critad,p.value=pval,adjusted.p.value=pvalad,est=est)
}
ols1wayISO<-function(x,y,xout=FALSE,outfun=outpro,STAND=FALSE,alpha=.05,pr=TRUE,BLO=FALSE,...){
#
#  Test hypothesis that for two or more independent groups, all slope parameters
#  are equal using OLS  estimator.
#
#  Use Johansen MANOVA type test statistic in conjunction with HC4 estimate of covariances.
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the MVE method.
#
#   BLO=TRUE, only bad leverage points are removed.
#
#  OUTPUT: 
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#
if(!is.list(x))stop('Argument x should have list mode')
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}
if(BLO){
for(j in 1:J){
temp=reglev(x[[j]],y[[j]],plotit=FALSE)
ad1=c(temp1[[j]]$out.id,temp$regout)
flag1=duplicated(ad1)
if(sum(flag1)>0){
flag1=ad1[flag1]
x[[j]]=as.matrix(x[[j]])
x[[j]]=x[[j]][-flag1,]
y[[j]]=y[[j]][-flag1]
}}}}
x=lapply(x,as.matrix)
K=p1
est=matrix(NA,nrow=J,ncol=p1)
grpnum=NULL
for(j in 1:J)grpnum[j]=paste("Group",j)
vlabs="Intercept"
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)=list(grpnum,vlabs)
ecov=list()
ecovinv=list()
W=rep(0,p1)
gmean=rep(0,p)
for(j in 1:J){
est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef
nv.keep[j]=nrow(x[[j]])
ecov[[j]]=olshc4(x[[j]],y[[j]])$cov
ecovinv[[j]]=solve(ecov[[j]])  #W_j
gmean=gmean+ecovinv[[j]][2:K,2:K]%*%est[j,2:K]
W=W+ecovinv[[j]]
}
estall=solve(W[2:K,2:K])%*%gmean
estall=c(0,estall)
F=0
for(k in 2:K){
for(m in 2:K){
for(j in 1:J){
F=F+ecovinv[[j]][k,m]*(est[j,k]-estall[k])*(est[j,m]-estall[m])
}}}
pvalad=NULL
df=p*(J-1)
# Adjust critical value:
iden=diag(p)
Aw=0
for(j in 1:J){
temp=iden-solve(W[2:K,2:K])%*%ecovinv[[j]][2:K,2:K]
tempsq=temp%*%temp
Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[j]-1)
}
Aw=Aw/2
crit=qchisq(alpha,df)
crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
crit=qchisq(alval[i],df)
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
if(F<critad)break
pvalad=1-irem/1000
}
#
pval=1-pchisq(F,df)
crit=qchisq(1-alpha,df)  #UNADJUSTED CRITICAL VALUE
critad=NULL
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
est=data.frame(est)
list(n=nv,n.keep=nv.keep,test.stat=F,crit.value=crit,adjusted.crit=critad,p.value=pval,adjusted.p.value=pvalad,est=est)
}
corbMC<-function(x,y,corfun=pbcor,nboot=599,SEED=TRUE,...){
#
#   Compute a .95 confidence interval for a correlation.
#   The default correlation is the percentage bend.
#
#   The function corfun is any R function that returns a
#   correlation coefficient in corfun$cor. The functions pbcor and
#   wincor follow this convention.
#
#   When using Pearson's correlation, and when n<250, use
#   lsfitci instead.
#
#   The default number of bootstrap samples is nboot=599
#
library(parallel)
m1=cbind(x,y)
m1<-elimna(m1)  # Eliminate rows with missing values
nval=nrow(m1)
x<-m1[,1]
y<-m1[,2]
est<-corfun(x,y,...)$cor
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#
#  If you use corfun=scor, set plotit=F
#
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec<-mclapply(data,corbsub,x,y,corfun,...) 
bvec=matl(bvec)   # A 1 by nboot matrix.
ihi<-floor(.975*nboot+.5)
ilow<-floor(.025*nboot+.5)
bsort<-sort(bvec)
corci<-1
corci[1]<-bsort[ilow]
corci[2]<-bsort[ihi]
phat <- sum(bvec < 0)/nboot
sig <- 2 * min(phat, 1 - phat)
list(cor.ci=corci,p.value=sig,cor.est=est)
}
corbsubMC<-function(isub,x,y,corfun,...){
isub=as.vector(isub)
corbsub<-corfun(x[isub],y[isub],...)$cor
corbsub
}
scorci<-function(x,y,nboot=599,SEED=TRUE,plotit=TRUE,STAND=FALSE,corfun=pcor,cop=3,...){
#
#   Compute a .95 confidence interval for the skipped correlation.
#   A multi-core processor is assumed to be available and the R
#   package multicore must be installed.
#
#   The default number of bootstrap samples is nboot=599
#
if(ncol(as.matrix(x))!=1)stop('x should be a single vector')
m1=cbind(x,y)
m1<-elimna(m1)  # Eliminate rows with missing values
nval=nrow(m1)
x<-m1[,1]
y<-m1[,2]
est<-scor(x,y,plotit=plotit,STAND=STAND,corfun=corfun,SEED=TRUE,cop=cop,...)$cor.values
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec<-lapply(data,scorsubMC,x,y,STAND=STAND,corfun=corfun,cop=cop,...) 
bvec=matl(bvec)   # A 1 by nboot matrix.
bvec=as.vector(bvec)
ihi<-floor(.975*nboot+.5)
ilow<-floor(.025*nboot+.5)
bsort<-sort(bvec)
#print(bsort)
corci<-1
corci[1]<-bsort[ilow]
corci[2]<-bsort[ihi]
phat <- sum(bvec < 0)/nboot
sig <- 2 * min(phat, 1 - phat)
list(cor.ci=corci,p.value=sig,cor.est=est)
}
scorsubMC<-function(isub,x,y,pr=FALSE,STAND=FALSE,corfun=corfun,cop=cop,...){
isub=as.vector(isub)
corbsub<-scor(x[isub],y[isub],plotit=FALSE,pr=FALSE,STAND=STAND,corfun=corfun,cop=cop,SEED=FALSE,...)$cor.values
corbsub
}
normTmm<-function(x,SEED=TRUE,nboot=2000){
#
# Test that the tails of the distribution of x
# have more outliers than expected under normality
#
if(SEED)set.seed(45)
no=out(x,SEED=FALSE)$n.out
val=NA
x=elimna(x)
n=length(x)
for(i in 1:nboot)val[i]=out(rnorm(n),SEED=FALSE)$n.out
list(n.out=no,p.value=mean(val>=no))
}

rplot<-function(x,y,est=tmean,scat=TRUE,fr=NA,plotit=TRUE,pyhat=FALSE,efr=.5,
theta=50,phi=25,scale=FALSE,expand=.5,SEED=TRUE,varfun=pbvar,outfun=outpro,
nmin=0,xout=FALSE,out=FALSE,eout=FALSE,xlab='X',ylab='Y',zscale=FALSE,
zlab=' ',pr=TRUE,duplicate='error',ticktype='simple',LP=TRUE,...){
# duplicate='error'
# In some situations where duplicate values occur, when plotting with
# two predictors, it is necessary to set duplicate='strip'
#
# LP=TRUE, the plot of the smooth is further smoothed via lplot (lowess)
# To get a plot as done with old version set
# LP=FALSE
# 
#  zscale=TRUE will standardize the dependent variable when plotting with 2 independent variables.
# 
# efr is the span when computing explanatory strength of associaion
#
x<-as.matrix(x)
p=ncol(x)
xx<-cbind(x,y)
xx<-elimna(xx)
if(eout){
flag=outfun(xx,...)$keep
xx=xx[flag,]
}
if(xout){
flag=outfun(xx[,1:p],...)$keep
xx=xx[flag,]
}
x<-xx[,1:p]
x<-as.matrix(x)
p1=ncol(x)+1
y<-xx[,p1]
if(ncol(x)==1){
if(is.na(fr))fr<-.8
val<-rungen(x,y,est=est,scat=scat,fr=fr,plotit=plotit,pyhat=TRUE,
xlab=xlab,ylab=ylab,LP=LP,...)
val2<-rungen(x,y,est=est,fr=efr,plotit=FALSE,pyhat=TRUE,LP=FALSE,...)$output
val<-val$output
}
if(ncol(x)>1){
if(ncol(x)==2 && !scale){
if(pr){print('scale=FALSE is specified.')
print('If there is dependence, might want to use scale=T')
}}
if(is.na(fr))fr<-1
val<-rung3d(x,y,est=est,fr=fr,plotit=plotit,pyhat=TRUE,SEED=SEED,nmin=nmin,LP=LP,
scale=scale,phi=phi,theta=theta,expand=expand,zscale=zscale,pr=FALSE,
duplicate='error',xlab=xlab,ylab=ylab,zlab=zlab,ticktype=ticktype,...)
}
#E.power=rplotCV(x,y,fr=fr,varfun=varfun,est=est)$VAR.Y.HAT/varfun(y)
E.power=varfun(val)/varfun(y)
if(E.power>1)E.power=.99
stra=sqrt(E.power)
# Best correction at the moment. Not sure when or if needed. 
# Maybe a correlation option is better, but need to check this.
if(!pyhat)val <- NULL
list(Strength.Assoc=stra,Explanatory.Power = E.power, yhat = val)
}

Rfit<-function(x,y,xout=FALSE,outfun=outpro,...){
#
#  Fit regression line using rank-based method based
#  Jaeckel's dispersion function
#  via the R package Rfit
#
library(Rfit)
if(xout){
m<-cbind(x,y)
flag<-outfun(x,plotit=FALSE,...)$keep        
m<-m[flag,]                                  
x<-m[,1:p]                                                                
y<-m[,p1]                                                                 
}    
fit=rfit(y~x)
output=summary(fit)
list(summary=output[1]$coefficients,coef=output[1]$coefficients[,1],Drop_test=output[2]$dropstat,
 Drop_test_p.value=output[3]$droppval,Mult_R_squared=output[4]$R2)
}       

regunstack<-function(x,grp,xcols,ycol){
#
#  x is assumed to be a matrix or a data frame
#
# sort data in x into group indicated by col grp of x,
# Designed for a one-way ANOVA where goal is to compare slopes
# corresponding to two or more groups.
#
# returns the independent variables in x having list mode
# x[[1]] would be a matrix for group 1, x[[2]] a matrix for group 2, etc
# y[[1]] is the dependent variable for group 1, etc. 
#
# xcols indicates the columns of x containing independent variables
# ycol  indicates the column of x containing  dependent variables
#
x=elimna(x)
val=sort(unique(x[,grp]))
xs=list()
ys=list()
for(i in 1:length(val)){
flag=(x[,grp]==val[i])
xs[[i]]=x[flag,xcols]
ys[[i]]=x[flag,ycol]
}
list(num.grps=length(val),x=xs,y=ys)
}




ols1way2g<-function(x,y,grp=c(1,2),iv=1,xout=FALSE,outfun=outpro,STAND=FALSE,alpha=.05,pr=TRUE,BLO=FALSE,...){
#
#  Test hypothesis that for two or more independent groups, all slope parameters
#  are equal using OLS  estimator.
#
#  (ols1way tests the hypothesis that all parameters are equal,	not just slopes.)
#
#  Use Johansen MANOVA type test statistic in conjunction with HC4 estimate of covariances.
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the MVE method.
#
#   BLO=TRUE, only bad leverage points are removed.
#
#  OUTPUT: 
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#
if(!is.list(x))stop('Argument x should have list mode')
iv1=iv+1
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}
if(BLO){
for(j in 1:J){
temp=reglev(x[[j]],y[[j]],plotit=FALSE)
ad1=c(temp1[[j]]$out.id,temp$regout)
flag1=duplicated(ad1)
if(sum(flag1)>0){
flag1=ad1[flag1]
x[[j]]=as.matrix(x[[j]])
x[[j]]=x[[j]][-flag1,]
y[[j]]=y[[j]][-flag1]
}}}}
x=lapply(x,as.matrix)
K=p1
est=matrix(NA,nrow=J,ncol=p1)
grpnum=NULL
for(j in 1:J)grpnum[j]=paste("Group",j)
vlabs="Intercept"
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)=list(grpnum,vlabs)
ecov=list()
ecovinv=list()
W=rep(0,p1)
gmean=rep(0,K)
for(j in 1:J){
est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef
nv.keep[j]=nrow(x[[j]])
ecov[[j]]=olshc4(x[[j]],y[[j]])$cov
ecovinv[[j]]=solve(ecov[[j]])  #W_j
gmean=gmean+ecovinv[[j]]%*%est[j,]
W=W+ecovinv[[j]]
}
estall=solve(W)%*%gmean
F=0
for(j in 1:2){
F=F+ecovinv[[grp[j]]][iv1,iv1]*(est[grp[j],iv1]-estall[iv1])*(est[grp[j],iv1]-estall[iv1])
}
pvalad=NULL
df=1
# Adjust critical value:
iden=1
Aw=0
for(j in 1:J){
temp=iden-solve(W[iv1,iv1])%*%ecovinv[[grp[j]]][iv1,iv1]
tempsq=temp%*%temp
Aw=Aw+(sum(diag(tempsq))+(sum(diag(temp)))^2)/(nv[grp[j]]-1)
}
Aw=Aw/2
crit=qchisq(alpha,df)
crit=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
crit=qchisq(alval[i],df)
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
if(F<critad)break
pvalad=1-irem/1000
}
#
pval=1-pchisq(F,df)
if(is.null(pvalad))pvalad=pval
crit=qchisq(1-alpha,df)  #UNADJUSTED CRITICAL VALUE
critad=NULL
critad=crit+(crit/(2*df))*(Aw+3*Aw*crit/(df+2))
est=data.frame(est)
list(n=nv,n.keep=nv.keep,test.stat=F,crit.value=crit,
adjusted.crit=critad,p.value=pval,adjusted.p.value=pvalad,est=est)
}

olsW2g<-function(x,y,grp=c(1,2),iv=1,xout=FALSE,outfun=outpro,STAND=FALSE,alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){
#
#  Test hypothesis that for two or more independent groups, all slope parameters
#  are equal using OLS  estimator.
#
#  Use Welch type test statistic in conjunction with HC4 estimate of covariances.
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the MVE method.
#
#   BLO=TRUE, only bad leverage points are removed.
#
#  OUTPUT: 
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#
if(!is.list(x))stop('Argument x should have list mode')
iv1=iv+1
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}
if(BLO){
for(j in 1:J){
temp=reglev(x[[j]],y[[j]],plotit=FALSE)
ad1=c(temp1[[j]]$out.id,temp$regout)
flag1=duplicated(ad1)
if(sum(flag1)>0){
flag1=ad1[flag1]
x[[j]]=as.matrix(x[[j]])
x[[j]]=x[[j]][-flag1,]
y[[j]]=y[[j]][-flag1]
}}}}
x=lapply(x,as.matrix)
K=p1
est=matrix(NA,nrow=J,ncol=p1)
grpnum=NULL
for(j in 1:J)grpnum[j]=paste("Group",j)
vlabs="Intercept"
for(j in 2:p1)vlabs[j]=paste("Slope",j-1)
dimnames(est)=list(grpnum,vlabs)
ecov=list()
ecovinv=list()
W=rep(0,p1)
for(j in 1:J){
est[j,]=ols(x[[j]],y[[j]],xout=FALSE,plotit=FALSE,...)$coef
nv.keep[j]=nrow(x[[j]])
ecov[[j]]=olshc4(x[[j]],y[[j]],HC3=HC3)$cov
}
q1=ecov[[grp[1]]][iv1,iv1]
q2=ecov[[grp[2]]][iv1,iv1]
top=est[grp[1]]-est[grp[2]]
F=(est[grp[1],iv1]-est[grp[2],iv1])/sqrt(q1+q2)
df=(q1+q2)^2/(q1^2/(nv[grp[1]]-1)+q2^2/(nv[grp[2]]-1))
pv=2*(1-pt(abs(F),df))
crit=qt(1-alpha/2,df)
ci=est[grp[1],iv1]-est[grp[2],iv1]-crit*sqrt(q1+q2)
ci[2]=est[grp[1],iv1]-est[grp[2],iv1]+crit*sqrt(q1+q2)
list(n=nv,n.keep=nv.keep,test.stat=F,conf.interval=ci,
est=c(est[grp[1],iv1],est[grp[2],iv1]),est.dif=est[grp[1],iv1]-est[grp[2],iv1],p.value=pv)
}

cov.roc<-function(x){
library(robust)
temp<-covRob(x,estim='M')
val<-temp
list(center=val[3]$center,cov=val[2]$cov)
}
reg1mcp<-function(x,y,regfun=tsreg,SEED=TRUE,nboot=100,xout=FALSE,outfun=outpro,STAND=FALSE,alpha=.05,pr=TRUE,MC=FALSE,...){
#
#  Perform all pairwise comparisons of intercepts among J independent groups
#  Do the same of the first slope, followed by the 2nd slope, etc.
#  
#  Control FWE via Hochberg's methods for each set of 
#  (J^2-J)/2 parameters. That is, control FWE for the intercepts
#  Do the same for the first slope, etc. 
#
#  #  x and y are assumed to have list mode having 
#  length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the projection method.
#
#  OUTPUT: 
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#   output contains all pairwise comparisons  
#   For each parameter, FWE is controlled using Hochberg's method
#   So by default, for the intercepts, 
#   all pairwise comparisons are performed with FWE=.05
#   For the first slope, all pairwise comparisons are performed 
#   with FWE=.05, etc.
#
if(SEED)set.seed(2)
if(!is.list(x))stop('Argument x should have list mode')
if(!is.list(y))stop('Argument y should have list mode')
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
nv.keep[j]=length(y[[j]])
}}
tot=(J^2-J)/2
dvec<-alpha/c(1:tot)
outl=list()
nr=tot*p1
outp=matrix(NA,ncol=5,nrow=nr)
x=lapply(x,as.matrix)
rlab=rep('Intercept',tot)
xx=list()
yy=list()
iall=0
ivp=c(1,tot)-tot
for(ip in 1:p){
#iv=ip-1
rlab=c(rlab,rep(paste('slope',ip),tot))
}
i=0
sk=1+tot*p
st=seq(1,sk,tot)
st=st-1
for(j in 1:J){
for(k in 1:J){
if(j < k){
i=i+1
st=st+1
xx[[1]]=x[[j]][,1:p]
xx[[2]]=x[[k]][,1:p]
yy[[1]]=y[[j]]
yy[[2]]=y[[k]]
if(!MC)temp=reg2ci(xx[[1]],yy[[1]],xx[[2]],yy[[2]],regfun=regfun)$output
if(MC)temp=reg2ci(xx[[1]],yy[[1]],xx[[2]],yy[[2]],regfun=regfun)$output
iall=iall+1
outp[iall,1]=j
outp[iall,2]=k
outp[st,3]=temp[,4]
}}}
for(i in 1:p1){
ivp=ivp+tot
temp2<-order(0-outp[ivp[1]:ivp[2],3])
icc=c(ivp[1]:ivp[2])
icc[temp2]=dvec
outp[ivp[1]:ivp[2],4]=icc
}
flag=(outp[,3]<=outp[,4])                                   
outp[,5]=rep(0,nr)
outp[flag,5]=1   
v=outp[1:tot,1]
vall=rep(v,p1)
outp[,1]=vall
v=outp[1:tot,2]
vall=rep(v,p1)
outp[,2]=vall
dimnames(outp)=list(rlab,c('Group','Group','p.value','p.crit','Sig'))
list(n=nv,n.keep=nv.keep,output=outp)
}



qcor<-function(x,y,q=.5,xout=FALSE,outfun=outpro,plotit=FALSE,...){
#
# Compute a measure of the strength of the association
# based on the quantile regression lines
#
X=cbind(x,y)
X=elimna(X)
x<-as.matrix(x)
p=ncol(x)
x=X[,1:p]
p1=p+1
y=X[,p1]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
X=cbind(x,y)
}
est=qreg(x,y,q=q)$coef
top=qreg.sub(X,est,qval=q)
null=hd(y,q)
v=c(null,rep(0,p))
bot=qreg.sub(X,v,qval=q)
est=sqrt(1-top/bot)
list(cor.q=est)
}

scorciMC<-function(x,y,nboot=599,SEED=TRUE,plotit=TRUE,STAND=FALSE,corfun=pcor,cop=3,...){
#
#   Compute a .95 confidence interval for the skipped correlation.
#   A multi-core processor is assumed to be available and the R
#   package multicore must be installed.
#
#   The default number of bootstrap samples is nboot=599
#
library(parallel)
if(ncol(as.matrix(x))!=1)stop('x should be a single vector')
m1=cbind(x,y)
m1<-elimna(m1)  # Eliminate rows with missing values
nval=nrow(m1)
x<-m1[,1]
y<-m1[,2]
est<-scor(x,y,plotit=plotit,STAND=STAND,corfun=corfun,cop=cop,...)$cor.values
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
#
data<-matrix(sample(length(y),size=length(y)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec<-mclapply(data,scorsubMC,x,y,STAND=STAND,corfun=corfun,cop=cop,...) 
bvec=matl(bvec)   # A 1 by nboot matrix.
bvec=as.vector(bvec)
ihi<-floor(.975*nboot+.5)
ilow<-floor(.025*nboot+.5)
bsort<-sort(bvec)
corci<-1
corci[1]<-bsort[ilow]
corci[2]<-bsort[ihi]
phat <- sum(bvec < 0)/nboot
sig <- 2 * min(phat, 1 - phat)
list(cor.ci=corci,p.value=sig,cor.est=est)
}

olsLmcp<-function(x,y,xout=TRUE,outfun=outpro,STAND=FALSE,alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){
#
#  All pairwise comparison of regression models among J independent groups
#  That is, for groups j and k, all j<k, test H_0: all corresponding
#  parameters are equal
#
#  Strategy: Use HC4  estimate of standard errors followed by
#  Johansen type test statistic.
#
#  Hochberg to control FWE
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the projection method.
#
#  OUTPUT:
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#   output contains all pairwise comparisons.
#   For each parameter, FWE is controlled using Hochberg's method
#
if(!is.list(x))stop('Argument x should have list mode')
if(!is.list(y))stop('Argument y should have list mode')
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. 
Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
nv.keep[j]=length(y[[j]])
}}
tot=(J^2-J)/2
dvec<-alpha/c(1:tot)
outl=list()
nr=tot*p1
outp=matrix(NA,ncol=5,nrow=tot)
x=lapply(x,as.matrix)
xx=list()
yy=list()
iall=0
ivp=c(1,tot)-tot
i=0
for(j in 1:J){
for(k in 1:J){
if(j < k){
i=i+1
xx[[1]]=x[[j]]
xx[[2]]=x[[k]]
yy[[1]]=y[[j]]
yy[[2]]=y[[k]]
all=ols1way(xx,yy,HC3=HC3)
temp=all$adjusted.p.value
if(is.null(temp))temp=all$p.value
outp[i,1]=j
outp[i,2]=k
outp[i,3]=temp
}}
temp2<-order(0-outp[,3])
icc=c(1:tot)
icc[temp2]=dvec
outp[,4]=icc
}
flag=(outp[,3]<=outp[,4])
outp[,5]=rep(0,tot)
outp[flag,5]=1
dimnames(outp)=list(NULL,c('Group','Group','p.value','p.crit','sig'))
list(n=nv,n.keep=nv.keep,output=outp)
}
olsJmcp=olsLmcp



olsWmcp<-function(x,y,xout=TRUE,outfun=outpro,STAND=FALSE,alpha=.05,pr=TRUE,BLO=FALSE,HC3=FALSE,...){
#
#  All pairwisecomparison of intercepts, followed by all pairwise
#  comparison of first slope, etc.
#  using OLS  estimator.
#
#  Strategy: Use bootstrap estimate of standard errors followed by
#  Welch-type test statistic.
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the projection method.
#
#  OUTPUT:
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#   output contains all pairwise comparisons.
#   For each parameter, FWE is controlled usingHochberg's method
#   So by default, for the intercepts, 
#   all pairwise comparisons are performed with FWE=.05
#   For the first slope, all pairwise comparisons 
#   are performed with FWE=.05, etc.
#
if(!is.list(x))stop('Argument x should have list mode')
if(!is.list(y))stop('Argument y should have list mode')
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. 
Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
}}

tot=(J^2-J)/2
dvec<-alpha/c(1:tot)
outl=list()
nr=tot*p1
outp=matrix(NA,ncol=7,nrow=nr)
x=lapply(x,as.matrix)
rlab=rep('Intercept',tot)
xx=list()
yy=list()
iall=0
ivp=c(1,tot)-tot
for(ip in 1:p1){
iv=ip-1
i=0
if(iv>0)rlab=c(rlab,rep(paste('slope',iv),tot))
for(j in 1:J){
for(k in 1:J){
if(j < k){
i=i+1
xx[[1]]=x[[j]][,1:p]
xx[[2]]=x[[k]][,1:p]
yy[[1]]=y[[j]]
yy[[2]]=y[[k]]
all=olsW2g(xx,yy,iv=iv,BLO=BLO,HC3=HC3)
temp=all$p.value
iall=iall+1
outp[iall,1]=j
outp[iall,2]=k
outp[iall,3]=all$conf.interval[1]
outp[iall,4]=all$conf.interval[2]
outp[iall,5]=temp
}}}
ivp=ivp+tot
temp2<-order(0-outp[ivp[1]:ivp[2],5])
icc=c(ivp[1]:ivp[2])
icc[temp2]=dvec
outp[ivp[1]:ivp[2],6]=icc
D=rep('NO',tot)
flag=(outp[ivp[1]:ivp[2],5]<=outp[ivp[1]:ivp[2],4])
}
flag=(outp[,5]<=outp[,6])
outp[,7]=rep(0,nr)
outp[flag,7]=1
dimnames(outp)=list(rlab,c('Group','Group','ci.low','ci.up','p.value','p.crit','sig'))
list(n=nv,n.keep=nv.keep,output=outp)
}



anctsmcp<-function(x,y,regfun=tsreg,nboot=599,alpha=0.05,pts=NULL, 
SEED=TRUE,xout=FALSE,outfun=out,fr1=1,fr2=1,...){
#
# Like reg2ci only x1 etc have list mode containing
# data for J>1 groups. For all pairs of groups are compared via a 
# call to ancova.
#
#  x list mode contain a matrix of predictors.
#  x[[1]] contains predictors for first group
#  y[[1]] dependent variable for first group.
#
#
if(!is.list(x))stop('x and y should have list mode')
J=length(x) # number of groups
jcom<-0
for (j in 1:J){
for (k in 1:J){
if (j < k){
res=ancts(x[[j]],y[[j]],x[[k]],y[[k]],regfun=regfun,pts=pts,
nboot=nboot,alpha=alpha,fr1=fr1,fr2=fr2,
plotit=FALSE,xout=xout,outfun=outfun,WARN=FALSE,...)
print(paste('Group', j,'Group', k))
print(res)
}}}
}

chregF<-function(x,y,bend=1.345,SEED=FALSE,xout=FALSE,outfun=out,...){
#
# Compute Coakley Hettmansperger robust regression estimators
# JASA, 1993, 88, 872-880
#
# x is a n by p matrix containing the predictor values.
#
# No missing values are allowed
#
#  Comments in this function follow the notation used
#  by Coakley and Hettmansperger
#
library(MASS)
# with old version of R, need library(lqs) when using ltsreg
# as the initial estimate.
#
if(SEED)set.seed(12) # Set seed so that results are always duplicated.
x<-as.matrix(x)
p<-ncol(x)
m<-elimna(cbind(x,y))
x<-m[,1:p]
p1<-p+1
y<-m[,p1]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
x<-as.matrix(x)
cutoff<-bend
mve<-vector('list')
if(ncol(x)==1){
mve$center<-median(x)
mve$cov<-mad(x)^2
}
if(ncol(x)>=2)mve<-cov.mve(x)  # compute minimum volume ellipsoid measures of
                 # location and scale and store in mve.
reg0<-ltsgreg(x,y) # compute initial regression est using least trimmed
                 # squares.
# Next, compute the rob-md2(i) values and store in rob
rob<-1  # Initialize vector rob
mx<-mve$center
rob<-mahalanobis(x,mx,mve$cov)
k21<-qchisq(.95,p)
c62<-k21/rob
vecone<-c(rep(1,length(y))) # Initialize vector vecone to 1
c30<-pmin(vecone,c62)  # mallows weights put in c30
k81<-median(abs(reg0$residuals)) # median of absolute residuals
k72<-1.4826*(1+(5/(length(y)-p-1)))*k81 # lms scale
c60<-reg0$residuals/(k72*c30) # standardized residuals
#  compute psi and store in c27
cvec<-c(rep(cutoff,length(y))) # Initialize vector cvec to cutoff
c27<-pmin(cvec,c60)
c27<-pmax(-1*cutoff,c27)  #c27 contains psi values
#
# compute B matrix and put in c66.
#  Also, transform B so that i th diag elem = 0 if c27[i] is
#  between -cutoff and cutoff, 1 otherwise.
#
c66<-ifelse(abs(c27)<=bend,1,0) # Have derivative of psi in c66
m1<-cbind(1,x)  # X matrix with col of 1's added
m2<-t(m1)   #X transpose
m5<-diag(c30) # matrix W, diagonal contains weights
m4<-diag(c66) # B matrix
m6<-m4%*%m1   # BX
m7<-m2%*%m6   # X'BX (nD=X'BX)
m8<-solve(m7)  #m8 = (X'-B-X)inverse
m9<-m8%*%m2 #m9=X prime-B-X inverse X'
m9<-m9%*%m5 # m9=X prime-B-X inverse X'W
m10<-m9%*%c27
c20<-m10*k72
c21<-reg0$coef+c20 #update initial estimate of parameters.
res<-y-m1%*%c21
list(coef=t(c21),residuals=res)
}

DregGOLS<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,SEED=TRUE,nboot=200,...){
#
#  Global test that two dependent (time 1 and time 2)
#  OLS regression lines are identical
#  
#  Use a variation of Hotelling's test coupled with a bootstrap 
#  estimate of the relevant covariance matrix associated with the differences
#  in the estimates of the parameters.
#
if(SEED)set.seed(2)
X=elimna(cbind(x1,y1,x2,y2))
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1)$out.id
flag2=outfun(x2)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
nk=length(y1)
x1=as.matrix(x1)
x2=as.matrix(x2)
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data,1,regboot,x1,y1,regfun=lsfit,...)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
bvec2<-apply(data,1,regboot,x2,y2,regfun=lsfit,...)
dif=t(bvec1-bvec2)
S=cov(dif)
est1=lsfit(x1,y1)$coef
est2=lsfit(x2,y2)$coef
est=est1-est2
k <- (nk-p1)/((nk - 1)*p1)
        stat <- k * crossprod(est, solve(S, est))[1, ]
        pvalue <- 1 - pf(stat, p1, nk - p1)
list(test.statistic = stat, degrees_of_freedom = c(p1, nk - p1), p.value =
pvalue,est.1=est1,est.2=est2,estimate.dif = est)
}

difregOLS<-function(x1,y1,x2,y2,regfun=lsfit,xout=FALSE,outfun=outpro,nboot=200,
alpha=.05,SEED=TRUE,plotit=FALSE,xlab='X',ylab='Y',...){
#
# OLS regression data from two different times i.e., two dependent groups
#
#  compute confidence interval for the difference between intercepts
#  and the slopes
#
if(SEED)set.seed(2)
X=elimna(cbind(x1,y1,x2,y2))
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1)$out.id
flag2=outfun(x2)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
nk=length(y1)
x1=as.matrix(x1)
x2=as.matrix(x2)
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data,1,regboot,x1,y1,regfun,...)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
bvec2<-apply(data,1,regboot,x2,y2,regfun,...)
dif=t(bvec1)-t(bvec2)
est1=lsfit(x1,y1)$coef
est2=lsfit(x2,y2)$coef
estdif=est1-est2
se=apply(dif,2,sd)
pvec=NA
test=NA
test=estdif/se
df=nk-1
pvec=2*(1-pt(abs(test),df))
if(plotit){
reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab)
}
lvec='Intercept'
for(j in 2:p1)lvec=c(lvec,paste('slope',j-1))
pvec=array(pvec,dimnames=lvec)
list(n=n,n.keep=nk,est.dif=estdif,est.1=est1,est.2=est2,
test.stat=test,standard.error=se,p.values=pvec)
}

Dancols<-function(x1,y1,x2,y2,pts=NULL,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',CR=FALSE,...){
#
# Compare the OLS regression lines of two dependent (within) groups
# at specified design points
#
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#  If not specified, points are chosen for you.
#
#  CR=TRUE: determine interval outside of which the lines cross.
#  (Analog of Johnson--Neyman method)
#  
#   OUTPUT:
#  cross.interval indicates interval outside of which the lines have crossed.
#  output cr.quant.grp1 indicates that quantiles of group 1 corresponding to 
#  to the end of the intervals returned in cross.interval
#
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns')
if(SEED)set.seed(2)
FLAG=pts
X=elimna(cbind(x1,y1,x2,y2))
if(ncol(X)>4)stop('Only one covariate is allowed')
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1,SEED=SEED,...)$out.id
flag2=outfun(x2,SEED=SEED,...)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
n.keep=length(y1)
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
pts=x1[isub]
pts=unique(pts)
npt=nrow(as.matrix(pts))
mat<-matrix(NA,npt,9)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
mat[,1]=pts
sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED)
est1=regYhat(x1,y1,xr=pts,regfun=lsfit) #Note: if xout=T, leverage points already removed
est2=regYhat(x2,y2,xr=pts,regfun=lsfit)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
df=length(y1)-1
pv=2*(1-pt(abs(tests),df))
mat[,9]=pv
crit<-smmcrit(df,5)
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(!is.null(FLAG)){
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
pts=unique(pts)
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
mat[,1]<-pts
sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED)
est1=regYhat(x1,y1,xr=pts,regfun=lsfit,,...)
est2=regYhat(x2,y2,xr=pts,regfun=lsfit,,...)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
df=length(y1)-1
pv=2*(1-pt(abs(tests),df))
mat[,9]=pv
crit<-smmcrit(df,length(pts))
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(plotit){
plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab)
points(x1,y1,pch='o')
points(x2,y2,pch='+')
abline(lsfit(x1,y1)$coef)
abline(lsfit(x2,y2)$coef,lty=2)
}
int=NULL
crq=NULL
crq2=NULL
if(CR){
if(ncol(as.matrix(x1))>1)stop('CR=T only allowed with one covariate')
int=DancCR(x1,y1,x2,y2)
crq=mean(x1<=int[1])
crq[2]=mean(x1<=int[2])
crq2=mean(x2<=int[1])
crq2[2]=mean(x2<=int[2])
}

list(n=n,n.keep=n.keep,output=mat,cross.interval=int,cr.quant.grp1=crq,
cr.quant.grp2=crq2)
}
Dancols_sub1<-function(pts,x1,y1,x2,y2){
#
#
ci=abs(Dancols_sub(x1,y1,x2,y2,pts=pts)$output[1,7])  
ci
}
Dancols_sub2<-function(pts,x1,y1,x2,y2){
#
#
ci=abs(Dancols_sub(x1,y1,x2,y2,pts=pts)$output[1,8])  
ci
}
Dancols_sub<-function(x1,y1,x2,y2,pts=NULL,fr1=1,fr2=1,alpha=.05,plotit=FALSE,xout=FALSE,outfun=out,nboot=100,SEED=TRUE,xlab='X',ylab='Y',...){
#
# Compare the OLS regression lines of two dependent (within) groups
# at specified design points
#
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#  If not specified, points are chosen for you.
#
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns')
if(SEED)set.seed(2)
FLAG=pts
X=elimna(cbind(x1,y1,x2,y2))
if(ncol(X)>4)stop('Only one covariate is allowed')
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1,SEED=SEED,...)$out.id
flag2=outfun(x2,SEED=SEED,...)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
n.keep=length(y1)
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,9)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
pts=x1[isub]
mat[,1]=pts
sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED)
est1=regYhat(x1,y1,xr=pts,regfun=lsfit) #Note: if xout=T, leverage points already removed
est2=regYhat(x2,y2,xr=pts,regfun=lsfit)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
df=length(y1)-1
pv=2*(1-pt(abs(tests),df))
mat[,9]=pv
crit<-smmcrit(df,5)
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(!is.null(FLAG)){
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
mat[,1]<-pts
sqsd=difregYvar(x1,y1,x2,y2,regfun=lsfit,pts=pts,nboot=nboot,SEED=SEED)
est1=regYhat(x1,y1,xr=pts,regfun=lsfit,,...)
est2=regYhat(x2,y2,xr=pts,regfun=lsfit,,...)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
df=length(y1)-1
pv=2*(1-pt(abs(tests),df))
mat[,9]=pv
crit<-smmcrit(df,length(pts))
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(plotit){
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab)
points(x1,y1,pch='o')
points(x2,y2,pch='+')
abline(lsfit(x1,y1)$coef)
abline(lsfit(x2,y2)$coef,lty=2)
}
list(n=n,n.keep=n.keep,output=mat)
}
DancCR<-function(x1,y1,x2,y2){
v=optim(0,Dancols_sub1,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par
v[2]=optim(0,Dancols_sub2,x1=x1,y1=y1,x2=x2,y2=y2,method='BFGS')$par
a=min(v)
v=c(a,max(v))
}

difregYvar<-function(x1,y1,x2,y2,regfun=tsreg,pts=NULL,
nboot=100,xout=FALSE,outfun=out,SEED=TRUE,...){
#
#  Estimate standard error of difference between the predicted value of Y
#  corresponding to two dependent groups using regression estimator indicated by 
#  the argument
#  regfun
#  corresponding to the points in
#  pts
#  regfun defaults to tsreg, the Theil--Sen estimator
#  pts default is to use all unique points among x1 and x2
#
X=elimna(cbind(x1,y1,x2,y2))
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
x1<-as.matrix(x1)
x2=as.matrix(x2)
if(is.null(pts)){
pts=rbind(x1,x2)
pts=unique(pts)
}
pts=as.matrix(pts)
nvpts=nrow(pts)
bvec1=matrix(NA,nrow=nboot,ncol=nvpts)
bvec2=matrix(NA,nrow=nboot,ncol=nvpts)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec1[ib,]=regYsub(x1[data[ib,],],y1[data[ib,]],pts,p1=p1,regfun=regfun,...)
bvec2[ib,]=regYsub(x2[data[ib,],],y2[data[ib,]],pts,p1=p1,regfun=regfun,...)
}
bvec=bvec1-bvec2
sqsd=apply(bvec,2,var)
sqsd
}

difreg<-function(x1,y1,x2,y2,regfun=tsreg,xout=FALSE,outfun=outpro,nboot=599,
alpha=.05,SEED=TRUE,plotit=FALSE,xlab='X',ylab='Y',pr=TRUE,...){
#
# regression data from two different times i.e., two dependent groups
#
#  compute confidence interval for the difference in the slopes
#
if(SEED)set.seed(2)
X=elimna(cbind(x1,y1,x2,y2))
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1,...)$out.id
flag2=outfun(x2,...)$out.id
flag=unique(c(flag1,flag2))
X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
flagF=identical(regfun,tsreg)
if(flagF){
if(pr){
if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
pr=FALSE
}
if(pr){
if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
}}
nk=length(y1)
x1=as.matrix(x1)
x2=as.matrix(x2)
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec1<-lapply(data,regboot,x1,y1,regfun,xout=FALSE,...)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
bvec2<-lapply(data,regboot,x2,y2,regfun,xout=FALSE,...)
bvec1=matl(bvec1)
bvec2=matl(bvec2)
dif=t(bvec1)-t(bvec2)
dif.sort=apply(dif,2,sort)
pvec=NA
for(i in 1:p1){
pvec[i]<-(sum(dif[,i]<0)+.5*sum(dif[,i]==0))/nboot
if(pvec[i]>.5)pvec[i]<-1-pvec[i]
}
pvec<-2*pvec
if(plotit){
reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab,regfun=regfun,...)
}
lvec='Intercept'
for(j in 2:p1)lvec=c(lvec,paste('slope',j-1))
pvec=array(pvec,dimnames=lvec)
est1=regfun(x1,y1,xout=FALSE,...)$coef                  
est2=regfun(x2,y2,xout=FALSE,...)$coef
list(n=n,n.keep=nk,param=lvec,p.values=pvec,est.grp1=est1,est.grp2=est2)
}

Dancts<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=out,BLO=FALSE,nboot=100,SEED=TRUE,xlab='X',ylab='Y',pr=TRUE,...){
#
# Compare the regression lines of two dependent  groups using 
# the robust regression indicated by the argument
# regfun. Default is modified Theil--Sen estimator
# 
#  Comparisons are done at specified design points
#  This is a robust Johnson-Neyman method for dependent groups.
#
#  For OLS, use Dancols
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns')
if(SEED)set.seed(2)
FLAG=pts
X=elimna(cbind(x1,y1,x2,y2))
if(ncol(X)>4)stop('Only one covariate is allowed')
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
if(xout){
flag1=outfun(x1)$out.id
flag2=outfun(x2)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
flagF=identical(regfun,tsreg)
if(flagF){
if(pr){
if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
pr=FALSE
}
if(pr){
if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
}}
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,9)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
pts=x1[isub]
mat[,1]=pts
sqsd=difregYvar(x1,y1,x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED)
est1=regYhat(x1,y1,xr=pts,regfun=regfun) #Note: if xout=T, leverage points already removed
est2=regYhat(x2,y2,xr=pts,regfun=regfun)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
pv=2*(1-pnorm(abs(tests)))
mat[,9]=pv
crit<-smmcrit(Inf,5)
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(!is.null(FLAG)){
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
mat[,1]<-pts
sqsd=difregYvar(x1,y1,x2,y2,regfun=regfun,pts=pts,nboot=nboot,SEED=SEED)
est1=regYhat(x1,y1,xr=pts,regfun=regfun)
est2=regYhat(x2,y2,xr=pts,regfun=regfun)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
sd=sqrt(sqsd)
mat[,6]=sd
tests=(est1-est2)/sd
mat[,5]=tests
pv=2*(1-pnorm(abs(tests)))
mat[,9]=pv
crit<-smmcrit(Inf,length(pts))
mat[,7]=est-crit*sd
mat[,8]=est+crit*sd
}
if(plotit){
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab)
points(x1,y1,pch='o')
points(x2,y2,pch='+')
abline(regfun(x1,y1)$coef)
abline(regfun(x2,y2)$coef,lty=2)
}
list(output=mat)
}


tshd<-function(x,y,HD=TRUE,plotit=FALSE,xlab='X',ylab='Y'){
#
# Compute the Theil-Sen regression estimator.
# Only a single predictor is allowed in this version
#
#  HD=TRUE, use Harrell-Davis for slopes
#  HD=FALSE, use usual median
#
temp<-matrix(c(x,y),ncol=2)
temp<-elimna(temp)     # Remove any pairs with missing values
x<-temp[,1]
y<-temp[,2]
ord<-order(x)
xs<-x[ord]
ys<-y[ord]
vec1<-outer(ys,ys,'-')
vec2<-outer(xs,xs,'-')
v1<-vec1[vec2>0]
v2<-vec2[vec2>0]
if(!HD)slope<-median(v1/v2,na.rm=TRUE)
if(HD)slope<-hd(v1/v2,na.rm=TRUE)
res=y-slope*x
int=hd(res)
coef=c(int,slope)
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
abline(coef)
}
list(coef=coef)
}
tshdreg<-function(x,y,HD=TRUE,xout=FALSE,outfun=out,iter=10,varfun=pbvar,
corfun=pbcor,plotit=FALSE,tol=.0001,RES=FALSE,...){
#
#  Compute Theil-Sen regression estimator
#
#  Use back-fitting
#  when there is more than one predictor
#  and estimate intercept using Harrel-Davis estimator
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(ncol(x)==1){
temp1<-tshd(x,y,HD=HD)
coef<-temp1$coef
res<-y-coef[2]*x-coef[1]
}
if(ncol(x)>1){
for(p in 1:ncol(x)){
temp[p]<-tshd(x[,p],y)$coef[2]
}
res<-y-x%*%temp
alpha<-hd(res)
r<-matrix(NA,ncol=ncol(x),nrow=nrow(x))
tempold<-temp
for(it in 1:iter){
for(p in 1:ncol(x)){
r[,p]<-y-x%*%temp-alpha+temp[p]*x[,p]
temp[p]<-tshd(x[,p],r[,p],plotit=FALSE)$coef[2]
}
if(max(abs(temp-tempold))<tol)break
alpha<-hd(y-x%*%temp)
tempold<-temp
}
coef<-c(alpha,temp)
res<-y-x%*%temp-alpha
}
yhat<-y-res
stre=NULL
temp=varfun(y)
if(temp==0)print('Warning: When computing strength of association, measure of variation=0')
e.pow=NULL
if(temp>0){
e.pow<-varfun(yhat)/varfun(y)
if(!is.na(e.pow)){
if(e.pow>=1)e.pow<-corfun(yhat,y)$cor^2
e.pow=as.numeric(e.pow)
stre=sqrt(e.pow)
}}
if(!RES)res=NULL
list(coef=coef,residuals=res,Strength.Assoc=stre,Explanatory.Power=e.pow,residuals=res)
}


ltsreg<-function(x,y,xout=FALSE,outfun=outpro,STAND=FALSE,...){
#
# Leasts trimmed squares regression via the function ltsReg in the 
# R package robustbase
#
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
library(robustbase)
coef=ltsReg(y~x)[8]$coefficients
list(coef=coef)
}
DregG<-function(x1,y1,x2,y2,nullv=NULL,regfun=tshdreg,nboot=500,xout=FALSE,outfun=outpro,
SEED=TRUE,plotit=FALSE,pr=TRUE,...){
#
#  Global test that two dependent groups have identical 
#  regression parameters.
#
#  Use a variation of Hotelling's test coupled with a bootstrap 
#  estimate of the relevant covariance matrix associated with the differences
#  in the estimates of the parameters.#  For OLS, use DregGOLS
#
#  (plotit=F is used so that in simulations, if xout=T, the seed is not
#  set everytime outpro is called.)
#
if(SEED)set.seed(2)
X=elimna(cbind(x1,y1,x2,y2))
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
if(is.null(nullv))nullv=rep(0,p1)
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1,...)$out.id
flag2=outfun(x2,...)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
x1=as.matrix(x1)
x2=as.matrix(x2)
flagF=FALSE
flagF=identical(regfun,tsreg)
if(flagF){if(pr){
if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
pr=FALSE
}
if(pr){
if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
}}
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
bvec1<-apply(data,1,regboot,x1,y1,regfun=regfun,xout=FALSE,...)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
bvec2<-apply(data,1,regboot,x2,y2,regfun=regfun,xout=FALSE,...)
dif=t(bvec1-bvec2)
temp<-pdis(rbind(dif,nullv))
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
est1=regfun(x1,y1)$coef
est2=regfun(x2,y2)$coef
est=est1-est2
list(p.value=sig.level,est.1=est1,est.2=est2,estimate.dif = est)
}
DregGMC<-function(x1,y1,x2,y2,nullv=NULL,regfun=tsreg,nboot=500,xout=FALSE,outfun=outpro,
SEED=TRUE,plotit=FALSE,pr=TRUE,...){
#
#  Global test that two dependent groups have identical 
#  regression parameters.
#
#  Use a variation of Hotelling's test coupled with a bootstrap 
#  estimate of the relevant covariance matrix associated with the differences
#  in the estimates of the parameters.#  For OLS, use DregGOLS
#
#  (plotit=F is used so that in simulations, if xout=T, the seed is not
#  set everytime outpro is called.)
#
flag=FALSE
library(parallel)
if(SEED)set.seed(2)
X=elimna(cbind(x1,y1,x2,y2))
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
if(is.null(nullv))nullv=rep(0,p1)
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1,...)$out.id
flag2=outfun(x2,...)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
flagF=FALSE
flagF=identical(regfun,tsreg)
if(flagF){
if(pr){
if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
pr=FALSE
}
if(pr){
if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
}}
x1=as.matrix(x1)
x2=as.matrix(x2)
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec1=mclapply(data,regbootMC,x1,y1,regfun,xout=FALSE,...)
bvec1=matl(bvec1)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
bvec2=mclapply(data,regbootMC,x2,y2,regfun,xout=FALSE,...)
bvec2=matl(bvec2)
dif=t(bvec1-bvec2)
temp<-pdisMC(rbind(dif,nullv))
sig.level<-sum(temp[nboot+1]<temp[1:nboot])/nboot
est1=regfun(x1,y1)$coef
est2=regfun(x2,y2)$coef
est=est1-est2
list(p.value=sig.level,est.1=est1,est.2=est2,estimate.dif = est)
}
difregMC<-function(x1,y1,x2,y2,regfun=tsreg,xout=FALSE,outfun=outpro,nboot=599,
alpha=.05,SEED=TRUE,plotit=FALSE,xlab='X',ylab='Y',pr=TRUE,...){
#
# regression data from two different times i.e., two dependent groups
#
#  compute confidence interval for the difference in the slopes and intercepts
#
library(parallel)
if(SEED)set.seed(2)
X=elimna(cbind(x1,y1,x2,y2))
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
n=length(y1)
if(xout){
flag1=outfun(x1,...)$out.id
flag2=outfun(x2,...)$out.id
flag=unique(c(flag1,flag2))
X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
flagF=identical(regfun,tsreg)
if(flagF){
if(pr){
if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
pr=FALSE
}
if(pr){
if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
}}
nk=length(y1)
x1=as.matrix(x1)
x2=as.matrix(x2)
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
bvec1<-mclapply(data,regboot,x1,y1,regfun,mc.preschedule=TRUE,xout=FALSE,...)
# bvec is a p+1 by nboot matrix. The first row
#                     contains the bootstrap intercepts, the second row
#                     contains the bootstrap values for first predictor, etc.
bvec2<-mclapply(data,regboot,x2,y2,regfun,mc.preschedule=TRUE,xout=FALSE,...)
bvec1=matl(bvec1)
bvec2=matl(bvec2)
dif=t(bvec1)-t(bvec2)
dif.sort=apply(dif,2,sort)
pvec=NA
for(i in 1:p1){
pvec[i]<-(sum(dif[,i]<0)+.5*sum(dif[,i]==0))/nboot
if(pvec[i]>.5)pvec[i]<-1-pvec[i]
}
pvec<-2*pvec
if(plotit){
reg2plot(x1,y1,x2,y2,xlab=xlab,ylab=ylab,regfun=regfun,...)
}
lvec='Intercept'
for(j in 2:p1)lvec=c(lvec,paste('slope',j-1))
pvec=array(pvec,dimnames=lvec)
est1=regfun(x1,y1,xout=FALSE,...)$coef
est2=regfun(x2,y2,xout=FALSE,...)$coef
list(n=n,n.keep=nk,param=lvec,p.values=pvec,est.grp1=est1,est.grp2=est2)
}

qcipb<-function(x,q=.5,alpha=.05,nboot=2000,SEED=TRUE,nv=0,...){
#
#   Compute a bootstrap, .95 confidence interval for the
#   qth quantile via the Harrell--Davis estimator.
#   
#   Default is q=.5, meaning a confidence interval for the median is
#   computed.
#
#   Appears to be best method when there are tied values
#
#    nv=null value when  computing a p-value
#
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
x=elimna(x)
data<-matrix(sample(x,size=length(x)*nboot,replace=TRUE),nrow=nboot)
bvec<-apply(data,1,hd,q=q)
bvec<-sort(bvec)
low<-round((alpha/2)*nboot)
up<-nboot-low
low<-low+1
pv=mean(bvec>nv)+.5*mean(bvec==nv)
pv=2*min(c(pv,1-pv))
estimate=hd(x,q=q)
list(ci=c(bvec[low],bvec[up]),n=length(x),estimate=estimate,p.value=pv)
}
Qreg<-function(x,y,q=.5,xout=FALSE,outfun=outpro,res.vals=FALSE,...){
# 
# Quantile regression. Like the function qreg, but avoids computational 
# problems that can arise when there are tied values among the dependent
# variable
# 
x<-as.matrix(x)
xx<-cbind(x,y)
xx<-elimna(xx)
xx=as.matrix(xx)
x<-xx[,1:ncol(x)]
x<-as.matrix(x)
y<-xx[,ncol(x)+1]
temp<-NA
x<-as.matrix(x)
if(xout){
x<-as.matrix(x)
flag<-outfun(x,plotit=plotit,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
init=ols(x,y)$coef
v=optim(init,qfun,x=x,y=y,q=q,method='BFGS')$par
p1=ncol(x)+1
res=NULL
if(res.vals)res<-y-x%*%v[2:p1]-v[1]
list(coef=v,residuals=res)
}
qfun<-function(x,y,coef,q){
x=as.matrix(x)
p1=ncol(x)+1
 r=y-coef[1]-x%*%coef[2:p1]
 rhoq=sum(r*(q-as.numeric((r<0))))
 s=sum(rhoq)
 s
 }
Rcoefalpha<-function(x,cov.fun=skipcov,pr=FALSE,...){
# Compute coefficient alpha plus a robust analog) 
#
# x is assumed to be a matrix
# output:
# coefficient alpha plus robust version
#
# other possible choices for cov.fun:
# skipcov
# tbscov
# covout
# covogk
# mgvcov
# mvecov
# mcdcov
# wincov
#
x=elimna(x)
x=as.matrix(x)
mcor=winall(x,tr=0)$cov
term=sum(mcor)
diag(mcor)=0
term1=sum(mcor)
k=ncol(x)
lam=k*term1/(k-1)
res1=lam/term
#
mcor=cov.fun(x,...)
term=sum(mcor)                   
diag(mcor)=0
term1=sum(mcor)                                          
k=ncol(x)                                                                 
lam=k*term1/(k-1)                                       
lam=lam/term 
list(coef.alpha=res1,robust.alpha=lam)
}
Dancova<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,LP=TRUE,...){
#
# Compare two dependent  groups using a method  similar to the one used by the R function ancova
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  Assume data are in x1 y1 x2 and y2
#
#  sm=T will create smooths using bootstrap bagging.
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
#  DIF=FALSE: marginal trimmed means are compared
#  DIF=TRUE: Trimmed means of difference scores are used.
#
if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function')
if(length(x1)!=length(y1))stop('x1 and y1 have different lengths')
if(length(x1)!=length(x2))stop('x1 and y2 have different lengths')
if(length(x2)!=length(y2))stop('x2 and y2 have different lengths')
if(length(y1)!=length(y2))stop('y1 and y2 have different lengths')
xy=elimna(cbind(x1,y1,x2,y2))
x1=xy[,1]
y1=xy[,2]
x2=xy[,3]
y2=xy[,4]
if(is.na(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,9)
dimnames(mat)<-list(NULL,c('X','n','DIF','TEST','se','ci.low','ci.hi','p.value','p.crit'))
for (i in 1:5){
t1=near(x1,x1[isub[i]],fr1)
t2=near(x2,x1[isub[i]],fr2)
pick=as.logical(t1*t2)
mat[i,2]<-length(y1[pick])
if(!DIF)test<-yuend(y1[pick],y2[pick],tr=tr)
if(DIF)test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE)
mat[i,1]<-x1[isub[i]]
if(!DIF){
mat[i,4]<-test$teststat
mat[i,3]<-test$dif
}
if(DIF){
mat[i,4]<-test$test.stat
mat[i,3]<-test$estimate
}
mat[i,5]<-test$se
mat[i,6]<-test$ci[1]
mat[i,7]<-test$ci[2]
mat[i,8]<-test$p.value
}
temp2<-order(0-mat[,8])
bot=c(1:nrow(mat))
dvec=sort(alpha/bot,decreasing=TRUE)
mat[temp2,9]=dvec
}
if(!is.na(pts[1])){
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
# First check sample size
#
flage=rep(TRUE,length(pts))
for (i in 1:length(pts)){
t1<-near(x1,pts[i],fr1)
t2<-near(x2,pts[i],fr2)
pick=as.logical(t1*t2)
if(sum(pick)<=5){print(paste('Warning: there are',sum(pick),' points corresponding to the design point X=',pts[i]))
flage[i]=FALSE
}}
pts=pts[flage]
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c('X','n','DIF','TEST','se','ci.low','ci.hi',
'p.value','p.crit'))
for (i in 1:length(pts)){
t1<-near(x1,pts[i],fr1)
t2<-near(x2,pts[i],fr2)
pick=as.logical(t1*t2)
if(!DIF)test<-yuend(y1[pick],y2[pick],tr=tr)
if(DIF)test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE)
if(!DIF){
mat[i,4]<-test$teststat
mat[i,3]<-test$dif
}
if(DIF){
mat[i,4]<-test$test.stat
mat[i,3]<-test$estimate
}
mat[i,1]<-pts[i]
mat[i,2]<-length(y1[pick])
mat[i,5]<-test$se
mat[i,6]<-test$ci[1]
mat[i,7]<-test$ci[2]
mat[i,8]<-test$p.value
}
temp2<-order(0-mat[,8])
bot=c(1:nrow(mat))
dvec=sort(alpha/bot,decreasing=TRUE)
mat[temp2,9]=dvec
}
if(plotit){
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
runmean2g(x1,y1,x2,y2,fr=fr1,est=tmean,sm=sm,xout=FALSE,LP=LP,...)
}
list(output=mat)
}
Dancovamp<-function(x1,y1,x2,y2,fr1=1.5,fr2=1.5,tr=.2,alpha=.05,pts=NULL,SEED=TRUE,DIF=TRUE,cov.fun=skipcov,...){
#
# Compare two dependent  groups using the ancova method.
# Multiple covariates are allowed.
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
# Design points are chosen based on depth of points in x1 if pts=NULL
#  Assume data are in x1 y1 x2 and y2
#
#  Choices for cov.fun include
# skipcov
# tbscov
# covogk
# mgvcov
# mvecov
# mcdcov
# wincov
#
flag=identical(cov.fun,cov.mve)
if(flag)if(SEED)set.seed(2) # now cov.mve always returns same result
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=ncol(x2))stop('x1 and x2 should have same number of columns')
if(ncol(x1)==1)stop('For one covariate, use Dancova')
if(nrow(x1)!=nrow(x2))stop('x1 and x2 should have same number of rows')
if(length(y1)!=length(y2))stop('y1 and y2 should have same length')
p=ncol(x1)
p1=p+1
m1=elimna(cbind(x1,y1,x2,y2))
x1=m1[,1:p]
y1=m1[,p1]
p2=p1+1
p3=p2+p-1
p4=p3+1
x2=m1[,p2:p3]
y2=m1[,p4]
if(is.null(pts[1])){
x1<-as.matrix(x1)
x2<-as.matrix(x2)
pts<-ancdes(x1)
}
pts<-as.matrix(pts)
flag<-rep(T,nrow(pts))
if(!DIF){
mat<-matrix(NA,nrow(pts),9)
dimnames(mat)<-list(NULL,c('n','est1','est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
}
if(DIF){
mat<-matrix(NA,nrow(pts),7)
dimnames(mat)<-list(NULL,c('n','DIF','TEST','se','ci.low','ci.hi','p.value'))
}
n<-1
vecn<-1
mval1<-cov.funl(cov.fun(x1,...))
mval2<-cov.funl(cov.fun(x2,...))
for(i in 1:nrow(pts)){
t1=near3d(x1,pts[i,],fr1,mval1)
t2=near3d(x2,pts[i,],fr2,mval2)
pick=as.logical(t1*t2)
n[i]<-length(y1[pick])
if(n[i]<5)flag[i]<-F
if(n[i]>=5){
if(!DIF){
test<-yuend(y1[pick],y2[pick],tr=tr,alpha=alpha)
mat[i,2]=test$est1
mat[i,3]=test$est2
mat[i,4]=test$dif
mat[i,5]=test$teststat
mat[i,6]=test$se
mat[i,7]=test$ci[1]
mat[i,8]=test$ci[2]
mat[i,9]=test$p.value
}
if(DIF){
test<-trimci(y1[pick]-y2[pick],tr=tr,pr=FALSE,alpha=alpha)
mat[i,2]=test$estimate
mat[i,3]=test$test.stat
mat[i,4]=test$se
mat[i,5]=test$ci[1]
mat[i,6]=test$ci[2]
mat[i,7]=test$p.value
}
}
mat[i,1]<-n[i]
}
if(sum(flag)==0)print('No comparable design points found, might increase span.')
list(pts=pts,output=mat)
}
cov.funl<-function(m){
list(cov=m)
}
rplotCI<-function(x,y,tr=.2,fr=1,plotit=TRUE,scat=TRUE,pyhat=FALSE,
eout=FALSE,xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,alpha=.05,...){
#
# running  interval smoother based on a trimmed mean.
# Unlike rplot, includes a confidence band.  
#
# LP=TRUE, the plot is further smoothed via lowess
#
# fr controls amount of smoothing
plotit<-as.logical(plotit)
scat<-as.logical(scat)
str=rplot(x,y,tr=tr,xout=xout,plotit=FALSE,LP=LP,fr=fr)$Strength.Assoc
m<-cbind(x,y)
if(ncol(m)>2)stop('To get a smooth with more than one covariate, use rplot')
m<-elimna(m)
nv=nrow(m)
if(eout && xout)stop('Not allowed to have eout=xout=T')
if(eout){
flag<-outfun(m,plotit=FALSE)$keep
m<-m[flag,]
}
if(xout){
flag<-outfun(m[,1])$keep
m<-m[flag,]
}
x=m[,1]
y=m[,2]
n.keep=length(y)
rmd<-c(1:length(x))
for(i in 1:length(x))rmd[i]<-mean(y[near(x,x[i],fr)],tr=tr)
sedf=runse(x,y,fr=fr,tr=tr,pts=x)
df=sedf$df
flag=df>4
se=sedf$se
low=rmd[flag]-qt(1-alpha/2,df[flag])*se[flag]
up=rmd[flag]+qt(1-alpha/2,df[flag])*se[flag]
rmd=rmd[flag]
x=x[flag]
y=y[flag]
if(plotit){
ord=order(x)
x=x[ord]
rmd=rmd[ord]
up=up[ord]
low=low[ord]
if(LP){
rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
up=lplot(x,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
low=lplot(x,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
}
if(scat){
plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type='n')
lines(x,up,lty=2)
lines(x,low,lty=2)
points(x,y)
}
if(!scat)plot(c(x,x),c(y,rmd),type='n',ylab=ylab,xlab=xlab)
points(x,rmd,type='n')
sx<-sort(x)
xorder<-order(x)
sysm<-rmd[xorder]
lines(sx,sysm)
lines(x,up,lty=2)
lines(x,low,lty=2)
}
if(pyhat)output<-rmd
if(!pyhat)output<-'Done'
list(output=output,str=str,n=nv,n.keep=n.keep)
}
runse<-function(x,y,fr=1,tr=.2,pts=x,RNA=FALSE,outfun=out,xout=FALSE,SEED=TRUE){
#
# Estimate SE of Yhat when using a running interval smooth 
#  based on a trimmed mean.
# fr controls amount of smoothing
#
# Missing values are automatically removed.
#
# RNA=F, do not remove missing values when averaging
# (computing the smooth) at x
# xout=T removes points for which x is an outlier
#
if(SEED)set.seed(2)
temp<-cbind(x,y)
if(ncol(temp)>2)stop(' 1 predictor only is allowed')
temp<-elimna(temp) # Eliminate any rows with missing values
if(xout){
flag<-outfun(x,plotit=FALSE)$keep
temp<-temp[flag,]
}
x<-temp[,1]
y<-temp[,2]
pts<-as.matrix(pts)
vals<-NA
WSE=NA
df=NA
h=NA
for(i in 1:length(pts)){
ysub=y[near(x,pts[i],fr)]
v=trimse(ysub,tr=tr,na.rm=TRUE)
if(is.na(v))v=0
if(v>0){
WSE[i]=trimse(ysub,tr=tr,na.rm=TRUE)
df[i]=length(ysub)-2*floor(tr*length(ysub))-1
}
if(v==0){
df[i]=0
WSE[i]=0
}}
list(se=WSE,df=df)
}
rplotpbCI<-function(x,y,est=onestep,fr=1,plotit=TRUE,scat=TRUE,pyhat=FALSE,
xout=FALSE,xlab='x',ylab='y',outfun=out,LP=TRUE,alpha=.05,
nboot=500,SEED=TRUE,...){
#
# running  interval smoother based on any measure of location 
# Unlike rplotCI, uses a percentile bootstrap
# method to get a confidence band
#
# LP=TRUE, the plot is further smoothed via lowess
#
# fr controls amount of smoothing
plotit<-as.logical(plotit)
scat<-as.logical(scat)
m<-cbind(x,y)
if(ncol(m)>2)stop('To get a smooth with more than one covariate, use rplot')
m<-elimna(m)
if(xout){
flag<-outfun(m[,1])$keep
m<-m[flag,]
}
x=m[,1]
y=m[,2]
low=rep(NA,length(y))
up=rep(NA,length(y))
rmd<-NA
for(i in 1:length(x)){
sel=y[near(x,x[i],fr)]
temp=onesampb(sel,est=est,nboot=nboot,alpha=alpha,SEED=SEED,...)
low[i]=temp$ci[1]
up[i]=temp$ci[2]
rmd[i]=temp$estimate
}
all=elimna(cbind(x,low,up,y,rmd))
x=all[,1]
low=all[,2]
up=all[,3]
y=all[,4]
rmd=all[,5]
if(plotit){
ord=order(x)
x=x[ord]
rmd=rmd[ord]
up=up[ord]
low=low[ord]
if(LP){
rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
up=lplot(x,up,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
low=lplot(x,low,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
}
if(scat){
plot(c(x,x),c(y,rmd),xlab=xlab,ylab=ylab,type='n')
lines(x,up,lty=2)
lines(x,low,lty=2)
points(x,y)
}
if(!scat)plot(c(x,x),c(y,rmd),type='n',ylab=ylab,xlab=xlab)
points(x,rmd,type='n')
sx<-sort(x)
xorder<-order(x)
sysm<-rmd[xorder]
lines(sx,sysm)
lines(x,up,lty=2)                                       
lines(x,low,lty=2)
}
if(pyhat)output<-rmd
if(!pyhat)output<-'Done'
list(output=output)
}

Danctspb<-function(x1,y1,x2,y2,pts=NULL,regfun=tsreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,SCAT=TRUE,
outfun=outpro,BLO=FALSE,nboot=500,SEED=TRUE,xlab='X',ylab='Y',pr=TRUE,eout=FALSE,...){
#
# Compare the regression lines of two dependent  groups at specified design points using
# the robust regression estimator indicated by the argument
# regfun. Default is modified Theil--Sen estimator
# 
#  Comparisons are done at specified design points
#  This is a robust Johnson-Neyman method for dependent groups.
#
#  For OLS, use Dancols
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#  
# Uses bootstrap samples based on resamples of the points followed by a regression fit.
# In contrast, Dancts uses bootstrap estimate of the se of Yhat followed by a pivotal test
# statistic. 
#
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns')
if(SEED)set.seed(2)
FLAG=pts
X=elimna(cbind(x1,y1,x2,y2))
if(ncol(X)>4)stop('Only one covariate is allowed')
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
if(xout){
flag1=outfun(x1)$out.id
flag2=outfun(x2)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
flagF=identical(regfun,tsreg)
if(identical(regfun,tshdreg))flagF=FALSE
if(flagF){
if(pr){
if(sum(duplicated(y1)>0))print('Duplicate values detected; tshdreg might have more power than tsreg')
pr=FALSE
}
if(pr){
if(sum(duplicated(y2)>0))print('Duplicate values detected; tshdreg might have more power than tsreg')
}}
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
pts=x1[isub]
}
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),7)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value'))
mat[,1]=pts
n=length(y1)
x1=as.matrix(x1)
x2=as.matrix(x2)
data<-matrix(sample(length(y1),size=n*nboot,replace=TRUE),nrow=nboot)
est1=apply(data,1,Danctspb.sub,x1,y1,xr=pts,regfun=regfun,xout=FALSE,...)
est2=apply(data,1,Danctspb.sub,x2,y2,xr=pts,regfun=regfun,xout=FALSE,...)
mat[,2]=regYhat(x1,y1,xr=pts,regfun=regfun,...)
mat[,3]=regYhat(x2,y2,xr=pts,regfun=regfun,...)
est=est1-est2
if(!is.matrix(est))est=matrix(est,nrow=1)
mat[,4]=mat[,2]-mat[,3]
pv1=apply(est<0,1,mean,na.rm=T)
pv2=apply(est==0,1,mean,na.rm=T)
pv=pv1+.5*pv2
pv1m=1-pv
pv=2*apply(cbind(pv,pv1m),1,min)
mat[,7]=pv
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
estsort=apply(est,1,sort)
mat[,5]=estsort[ilow,]
mat[,6]=estsort[ihi,]
if(plotit){
if(eout && xout)stop('Cannot have both eout and xout = F')
if(eout){
flag<-outfun(cbind(x1,y1),plotit=FALSE,...)$keep                                
x1<-x1[flag]                                                                    
y1<-y1[flag]                                                                    
flag<-outfun(cbind(x2,y2),plotit=FALSE,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab)
if(SCAT)points(x1,y1,pch='o')
if(SCAT)points(x2,y2,pch='+')
abline(regfun(x1,y1)$coef)
abline(regfun(x2,y2)$coef,lty=2)
}
list(output=mat)
}

Danctspb.sub<-function(data,x,y,xr,regfun,...){
x=as.matrix(x)
yhat=regYhat(x[data,],y[data],xr=xr,regfun=regfun,...)
yhat
}

DanctspbMC<-function(x1,y1,x2,y2,pts=NULL,regfun=tshdreg,fr1=1,fr2=1,alpha=.05,SCAT=TRUE,
plotit=TRUE,xout=FALSE,outfun=outpro,BLO=FALSE,nboot=500,SEED=TRUE,xlab='X',ylab='Y',WARN=FALSE,pr=TRUE,eout=FALSE,...){
#
# Compare the regression lines of two dependent  groups at specified design points using
# the robust regression estimator indicated by the argument
# regfun. Default is modified Theil--Sen estimator
# 
#  Similar to Dancts, which uses a bootstrap estimate of se of Y hat 
#  Here, do bootstrap based on bootstrap samples from the data
#  as done for example by regci
#
#  Comparisons are done at specified design points
#  This is a robust Johnson-Neyman method for dependent groups.
#
#  For OLS, use Dancols
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
x1=as.matrix(x1)
x2=as.matrix(x2)
if(ncol(x1)!=ncol(x2))stop('x1 and x2 have different number of columns')
if(SEED)set.seed(2)
FLAG=pts
X=elimna(cbind(x1,y1,x2,y2))
if(ncol(X)>4)stop('Only one covariate is allowed')
x1=as.matrix(x1)
x2=as.matrix(x2)
p=ncol(x1)
p1=p+1
p2=p+2
p3=p1+p
p4=p3+1
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
if(xout){
flag1=outfun(x1)$out.id
flag2=outfun(x2)$out.id
flag=unique(c(flag1,flag2))
if(length(flag)>0)X=X[-flag,]
x1=X[,1:p]
y1=X[,p1]
x2=X[,p2:p3]
y2=X[,p4]
}
flagF=identical(regfun,tsreg)
if(identical(regfun,tshdreg))flagF=FALSE
if(flagF){
if(pr){
if(sum(duplicated(y1)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
pr=FALSE
}
if(pr){
if(sum(duplicated(y2)>0))print('Duplicate values detected; regfun=tshdreg might have more power than tsreg')
}}
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
pts=x1[isub]
}
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),7)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value'))
mat[,1]=pts
n=length(y1)
x1=as.matrix(x1)
x2=as.matrix(x2)
data<-matrix(sample(length(y1),size=n*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
library(parallel)
est1=mclapply(data,Danctspb.sub,x1,y1,xr=pts,regfun=regfun,xout=FALSE,...)
est2=mclapply(data,Danctspb.sub,x2,y2,xr=pts,regfun=regfun,xout=FALSE,...)
est1=matl(est1)
est2=matl(est2)
mat[,2]=regYhat(x1,y1,xr=pts,regfun=regfun,...)
mat[,3]=regYhat(x2,y2,xr=pts,regfun=regfun,...)
est=est1-est2
if(!is.matrix(est))est=matrix(est,nrow=1)
mat[,4]=mat[,2]-mat[,3]
pv1=apply(est<0,1,mean,na.rm=T)
pv2=apply(est==0,1,mean,na.rm=T)
pv=pv1+.5*pv2
pv1m=1-pv
pv=2*apply(cbind(pv,pv1m),1,min)
mat[,7]=pv
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
estsort=apply(est,1,sort)
mat[,5]=estsort[ilow,]
mat[,6]=estsort[ihi,]
if(plotit){
if(eout && xout)stop('Cannot have both eout and xout = F')
if(eout){
flag<-outfun(cbind(x1,y1),plotit=FALSE,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(cbind(x2,y2),plotit=FALSE,...)$keep              
x2<-x2[flag]                                         
y2<-y2[flag] 
}
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab)
if(SCAT)points(x1,y1,pch='o')
if(SCAT)points(x2,y2,pch='+')
abline(regfun(x1,y1)$coef)
abline(regfun(x2,y2)$coef,lty=2)
}
list(output=mat)
}

Danctspb.sub<-function(data,x,y,xr,regfun,...){
x=as.matrix(x)
yhat=regYhat(x[data,],y[data],xr=xr,regfun=regfun,...)
yhat
}

anctspb<-function(x1,y1,x2,y2,pts=NULL,regfun=tshdreg,fr1=1,fr2=1,alpha=.05,plotit=TRUE,xout=FALSE,outfun=outpro,nboot=500,SEED=TRUE,xlab='X',ylab='Y',...){
#
# Compare the regression lines of two independent groups 
# at specified design points using a robust regression estimator.
#
#  Like ancts but uses 
#   a percentile bootstrap method is used. 
# This might help when there are tied values among the dependent variable. 
#
#  Assume data are in x1 y1 x2 and y2
#
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
if(SEED)set.seed(2)
FLAG=pts
xy=elimna(cbind(x1,y1))
if(ncol(xy)>2)stop('Only one covariate is allowed')
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
if(ncol(xy)>2)stop('Only one covariate is allowed')
x2=xy[,1]
y2=xy[,2]
if(is.null(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,length(pts),9)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','TEST','se','ci.low','ci.hi','p.value'))
pts=x1[isub]
}
mat<-matrix(NA,length(pts),7)
dimnames(mat)<-list(NULL,c('X','Est1','Est2','DIF','ci.low','ci.hi','p.value'))
mat[,1]<-pts
bvec1=matrix(NA,nrow=nboot,ncol=length(pts))
bvec2=matrix(NA,nrow=nboot,ncol=length(pts))
x1=as.matrix(x1)
x2=as.matrix(x2)
p1=ncol(x1)+1
data<-matrix(sample(length(y1),size=length(y1)*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec1[ib,]=regYsub(x1[data[ib,],],y1[data[ib,]],pts,p1=p1,regfun=regfun,...)
}
data<-matrix(sample(length(y2),size=length(y2)*nboot,replace=TRUE),nrow=nboot)
for(ib in 1:nboot){
bvec2[ib,]=regYsub(x2[data[ib,],],y2[data[ib,]],pts,p1=p1,regfun=regfun,...)
}
dif=bvec1<bvec2
L=apply(dif,2,mean)
E=bvec1==bvec2
T=apply(E,2,mean)
pvec=L+.5*T
pop=1-pvec
pb=cbind(pvec,pop)
pv=2*apply(pb,1,min)
ilow<-round((alpha/2) * nboot)
ihi<-nboot - ilow
ilow<-ilow+1
ciL=NA
ciU=NA
difb=bvec1-bvec2
for(i in 1:length(pts)){
bs=sort(difb[,i])
ciL[i]=bs[ilow]
ciU[i]=bs[ihi]
}
est1=regYhat(x1,y1,xr=pts,xout=xout,outfun=outfun,regfun=regfun,...)
est2=regYhat(x2,y2,xr=pts,xout=xout,outfun=outfun,regfun=regfun,...)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
mat[,5]=ciL
mat[,6]=ciU
mat[,7]=pv
if(!is.null(FLAG[1])){
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
est1=regYhat(x1,y1,regfun=regfun,xr=pts,xout=xout,outfun=outfun,...)
est2=regYhat(x2,y2,regfun=regfun,xr=pts,xout=xout,outfun=outfun,...)
mat[,2]=est1
mat[,3]=est2
est=est1-est2
mat[,4]=est
mat[,7]=pv
}
if(plotit){
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab)
points(x1,y1,pch='o')
points(x2,y2,pch='+')
abline(regfun(x1,y1)$coef)
abline(regfun(x2,y2)$coef,lty=2)
}
list(output=mat)
}
idmatchv2<-function(m1,m2,id.col1,id.col2=id.col1){
#
#  Same as idmatch, but also return cases not matched
#
#  OUTPUT:
#  m combined data for which there are matching id's 
#  m1.no  data in m1 for which there are no matching id's in m2
#  m2.no  data in m2 for which there are no matching id's in m1
#
flag=!is.na(m1[,id.col1])
m1=m1[flag,]  # eliminate any rows where ID is missing
flag=!is.na(m2[,id.col1])
m2=m2[flag,]
M1=NULL
idnm1=NULL
ic1=0
idnm2=NULL
ic2=0
if(sum(duplicated(m1))>0)stop('Duplicate ids detected in m1')
if(sum(duplicated(m2))>0)stop('Duplicate ids detected in m2')
for(i in 1:nrow(m1)){
flag=duplicated(c(m1[i,id.col1],m2[,id.col2]))
if(sum(flag)==0){
ic1=ic1+1
idnm1[ic1]=i
}
if(sum(flag>0)){
if(is.data.frame(m1)){
if(!is.null(dim(M1)))M1=rbind(M1,as.data.frame(m1[i,]))
if(is.null(dim(M1)))M1=as.data.frame(m1[i,])
}
if(!is.data.frame(m1)){
if(!is.null(dim(M1)))M1=rbind(M1,m1[i,])
if(is.null(dim(M1)))M1=matrix(m1[i,],nrow=1)
}
}}
M2=NULL
for(i in 1:nrow(m2)){
flag=duplicated(c(m2[i,id.col2],m1[,id.col1]))
if(sum(flag)==0){
ic2=ic2+1
idnm2[ic2]=i
}
if(sum(flag>0)){
if(is.data.frame(m2)){
if(!is.null(dim(M2)))M2=rbind(M2,as.data.frame(m2[i,]))
if(is.null(dim(M2)))M2=as.data.frame(m2[i,])
}
if(!is.data.frame(m2)){
if(!is.null(dim(M2)))M2=rbind(M2,m2[i,])
if(is.null(dim(M2)))M2=matrix(m2[i,],nrow=1)
}
}}
m=cbind(M1[,id.col1],M1[,-id.col1],M2[,-id.col2])
nc1=ncol(m2)-1
m1u=NULL
if(!is.null(idnm1))m1u=m1[idnm1,]
m2u=NULL
if(!is.null(idnm2))m2u=m2[idnm2,]
list(m=m,idnm1=idnm1,idnm2=idnm2,m1.no=m1u,m2.no=m2u)
}

regcits<-function(x,y,regfun=tshdreg_C,nboot=599,alpha=.05,SEED=TRUE,pr=TRUE,
xout=FALSE,outfun=outpro,plotit=FALSE,xlab='Predictor 1',ylab='Predictor 2',
MC=TRUE,...){ 
if(MC)v=regciMC(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=pr,xout=xout,
outfun=outfun,plotit=plotit,xlab=xlab,ylab=ylab,...)
if(!MC)v=regci(x,y,regfun=regfun,nboot=nboot,alpha=alpha,SEED=SEED,pr=pr,xout=xout,
outfun=outfun,plotit=plotit,xlab=xlab,ylab=ylab,...)
v
}

qhdsm<-function(x,y,qval=.5,q=NULL,pr=FALSE,xout=FALSE,outfun=outpro,plotit=TRUE,xlab='X',ylab='Y',zlab='Z',pyhat=FALSE,fr=NULL,LP=TRUE,theta=50,phi=25,ticktype='simple',nmin=0,scale=FALSE,pr.qhd=TRUE,...){
#
# Compute the quantile regression line for one or more quantiles 
# using combination of hd, running interval smoother and LOESS
# That is, determine the qth (qval) quantile of Y given X using the
#
#  plotit=TRUE will plot the lines. WIth p=1 predictor, multiple lines can be plotted.
#  Example: qhdsm(x,y,q=c(.25,.5,.75)) will plot the regression lines for
#   predicting quartiles.
  #
if(!is.null(q))qval=q
x<-as.matrix(x)
X<-cbind(x,y)
X<-elimna(X)
np<-ncol(X)
p<-np-1
if(p>1 & length(q)>1)print('Only first quantile specified can be plotted')
x<-X[,1:p]
x<-as.matrix(x)
y<-X[,np]
if(xout){
x<-as.matrix(x)
flag<-outfun(x,...)$keep
x<-x[flag,]
y<-y[flag]
x<-as.matrix(x)
}
if(p==1){
if(is.null(fr))fr=.8
ord=order(x)
x=sort(x)
y=y[ord]
est=matrix(NA,ncol=3,nrow=length(qval))
dimnames(est)=list(NULL,c('q','Inter','Slope'))
x<-as.matrix(x)
qest=matrix(NA,ncol=length(qval),nrow=length(y))
for(j in 1:length(qval)){
rmd=NA
for(i in 1:length(x))rmd[i]<-hd(y[near(x,x[i],fr)],q=qval[j])
if(LP)rmd=lplot(x,rmd,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
qest[,j]=rmd
}
if(plotit){
plot(x,y,xlab=xlab,ylab=ylab)
for(j in 1:ncol(qest))lines(x,qest[,j])
}
if(!pyhat)qest='DONE'
}
if(p>1){
if(is.null(fr))fr=1
if(p==2){
if(pr.qhd){
if(!scale)print('scale=F is specified. If there is dependence, might want to use scale=TRUE')
}}
qest=rplot(x,y,est=hd,q=qval[1],fr=fr,plotit=plotit,pyhat=pyhat,theta=theta,
phi=phi,scale=scale,SEED=FALSE,varfun=pbvar,xlab=xlab,ylab=ylab,zlab=zlab,
ticktype=ticktype,nmin=nmin,pr=pr)
if(!pyhat)qest='DONE'
if(pyhat)qest=qest$yhat
}
qest
}
skip.cov<-function(x,cop = 6, MM = FALSE, op = 1, mgv.op = 0, outpro.cop = 3, 
    STAND = FALSE){
ans=skipcov(x,cop=cop,MM=MM,op=op,mgv.op=mgv.op,outpro.cop=outpro.cop,STAN=STAND)
list(cov=ans)
}

skipSPR<-function(x,cop=6,MM=FALSE,op=1,mgv.op=0,outpro.cop=3,pr=FALSE){
v=skip(x,pr=pr,STAND=TRUE,cop=cop,op=op,mgv.op=mgv.op,outpro.cop=outpro.cop)
v
}
rmdzeroG<-function(x,est=skipSPR,grp=NA,nboot=500,SEED=TRUE,...){
#
#   Do ANOVA on dependent groups
#   using #   depth of zero among  bootstrap values
#   based on difference scores.
#
#    Like rmdzero, only designed for multivariate  estimators such as
#    computed by the R functions, skip and dmean for example.
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=500
#
if(!is.list(x) && !is.matrix(x))stop('Data must be stored in a matrix or in list mode.')
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat<-x
if(!is.na(grp[1])){
mat<-mat[,grp]
}
FLAG=FALSE
if(ncol(mat)<3)FLAG=TRUE
#if(ncol(mat)<3)stop('This function is for three or more measures of location')
mat<-elimna(mat) # Remove rows with missing values.
J<-ncol(mat)
jp<-0
Jall<-(J^2-J)/2
dif<-matrix(NA,nrow=nrow(mat),ncol=Jall)
ic<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic<-ic+1
dif[,ic]<-mat[,j]-mat[,k]
}}}
dif<-as.matrix(dif)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data <- matrix(sample(nrow(mat), size = nrow(mat) * nboot, replace = T),
                nrow = nboot)
if(!FLAG){
bvec <- matrix(NA, ncol = ncol(dif), nrow = nboot)
        for(i in 1:nboot) {
                bvec[i, ] <- est(dif[data[i,],],...)$center
        }  #bvec is an nboot by Jm matrix
center<-est(dif,...)$center
bcen<-apply(bvec,2,mean)
cmat<-var(bvec-bcen+center)
zvec<-rep(0,Jall)
m1<-rbind(bvec,zvec)
bplus<-nboot+1
discen<-mahalanobis(m1,center,cmat)
sig.level<-sum(discen[bplus]<=discen)/bplus
}
if(FLAG){
bvec=matrix(NA,ncol=ncol(mat),nrow=nboot)
for(i in 1:nboot)bvec[i, ]=est(mat[data[i,],],...)$center              
pv=mean(bvec[,1]<bvec[,2])+.5*mean(bvec[,1]==bvec[,2]) 
sig.level=2*min(c(pv,1-pv))                               
center=est(mat,...)$center
}
list(p.value=sig.level,center=center)
}
rmdzeroGMC<-function(x,est=skipSPR,grp=NA,nboot=500,SEED=TRUE,...){
#
#   Do ANOVA on dependent groups
#   using #   depth of zero among  bootstrap values
#   based on difference scores.
#
#    Like rmdzeroG, only designed to used a multicore processor
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=500
#
library(parallel)
if(!is.list(x) && !is.matrix(x))stop('Data must be stored in a matrix or in list mode.')
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat<-x
if(!is.na(grp[1])){
mat<-mat[,grp]
}
FLAG=FALSE
if(ncol(mat)<3)FLAG=TRUE
#stop('This function is for three or more measures of location')
mat<-elimna(mat) # Remove rows with missing values.
J<-ncol(mat)
jp<-0
Jall<-(J^2-J)/2
dif<-matrix(NA,nrow=nrow(mat),ncol=Jall)
ic<-0
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic<-ic+1
dif[,ic]<-mat[,j]-mat[,k]
}}}
dif<-as.matrix(dif)
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data <- matrix(sample(nrow(mat), size = nrow(mat) * nboot, replace = T),
                nrow = nboot)
data=listm(t(data))
if(!FLAG){
bvec<-mclapply(data,rmdG_sub,dif,est,mc.preschedule=TRUE,...)
bvec=t(matl(bvec))
center<-est(dif,...)$center
bcen<-apply(bvec,2,mean)
cmat<-var(bvec-bcen+center)
zvec<-rep(0,Jall)
m1<-rbind(bvec,zvec)
bplus<-nboot+1
discen<-mahalanobis(m1,center,cmat)
sig.level<-sum(discen[bplus]<=discen)/bplus
}
if(FLAG){
bvec<-mclapply(data,rmdG_sub,mat,est,mc.preschedule=TRUE,...)
bvec=t(matl(bvec))
pv=mean(bvec[,1]<bvec[,2])+.5*mean(bvec[,1]==bvec[,2])
sig.level=2*min(c(pv,1-pv))
center=est(mat,...)$center
}
list(p.value=sig.level,center=center)
}

rmdG_sub<-function(data,dif,est,...){
v=est(dif[data,],...)$center
v
}
yuendv2<-function(x, y, tr = 0.2, alpha = 0.05,null.value=0){
#
#  Same as yuend, only it also returns a measure of
#  effect size:
#  (est.dif - null.value)/sd
#  For trimmed means, sd is a Winsorized variance
#  rescaled so that it estimates the standard deviation under normality
#
library(MASS)
if(tr<0)stop('tr must be between 0 and .5')
if(tr>.5)stop('tr must be between 0 and .5')
res=yuend(x=x,y=y,tr=tr,alpha=alpha)
if(tr==0)term=1
if(tr>0)term=sqrt(area(dnormvar,qnorm(tr),qnorm(1-tr))+2*(qnorm(tr)^2)*tr)
epow=(res$dif-null.value)*term/sqrt(winvar(x-y,tr=tr))
list(ci=res$ci,p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,se=res$se,
teststat=res$teststat,n=res$n,df=res$df,Effect.Size=epow)
}
qhdsm2g<-function(x1,y1,x2,y2,q=.5,qval=NULL,LP=TRUE,fr=.8,xlab='X',ylab='Y',xout=FALSE,outfun=outpro,...){
#
# Plot of quantile smoother for two groups using qhdsm 
#
# fr controls amount of smoothing
# Missing values are automatically removed.
#
if(!is.null(qval))q=qval
m1<-elimna(cbind(x1,y1))
if(ncol(m1)>3)stop('One covariate only is allowed')
m2<-elimna(cbind(x2,y2))
x1<-m1[,1]
y1<-m1[,2]
x2<-m2[,1]
y2<-m2[,2]
if(xout){
flag<-outfun(m1[,1],plotit=FALSE,...)$keep
m1<-m1[flag,]
x1<-m1[,1]
y1<-m1[,2]
flag<-outfun(m2[,1],plotit=FALSE,...)$keep
m2<-m2[flag,]
x2<-m2[,1]
y2<-m2[,2]
}
flag=order(x1)
x1=x1[flag]
y1=y1[flag]
flag=order(x2)
x2=x2[flag]
y2=y2[flag]
rmd1=NA
rmd2=NA
for(i in 1:length(x1))rmd1[i]<-hd(y1[near(x1,x1[i],fr)],q=q)
for(i in 1:length(x2))rmd2[i]<-hd(y2[near(x2,x2[i],fr)],q=q)
if(LP){
rmd1=lplot(x1,rmd1,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
rmd2=lplot(x2,rmd2,plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
}
plot(c(x1,x2),c(y1,y2),type='n',xlab=xlab,ylab=ylab)
points(x1,y1)
points(x2,y2,pch='+')
lines(x1,rmd1)
lines(x2,rmd2,lty=2)
}



ancGLOB_sub3<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100,
nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,xlab='X',ylab='Y',LP=TRUE,...){
#
#
if(SEED)set.seed(2)
x1<-as.matrix(x1)
p1<-ncol(x1)+1
p<-ncol(x1)
if(p>1)stop('Current version is for one independent variable only')
xy<-cbind(x1,y1)
xy<-elimna(xy)
x1<-xy[,1:p]
y1<-xy[,p1]
xy<-cbind(x2,y2)
xy<-elimna(xy)
x2<-xy[,1:p]
y2<-xy[,p1]
if(xout){
m<-cbind(x1,y1)
flag<-outfun(x1,plotit=FALSE,...)$keep
m<-m[flag,]
x1<-m[,1:p]
y1<-m[,p1]
m<-cbind(x2,y2)
flag<-outfun(x2,plotit=FALSE,...)$keep                            
m<-m[flag,]                                                        
x2<-m[,1:p]                                                              
y2<-m[,p1] 
}
N1=length(y1)
N2=length(y2)
if(is.null(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=nmin])
isub[5]<-max(sub[vecn>=nmin])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
pts=x1[isub]
g1=list()
g2=list()
for (i in 1:5){
g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)]
g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)]
}}
if(!is.null(pts[1])){
if(length(pts)<2)stop('Should have at least two points (With one point, use the R function ancova)')
g1=list()
g2=list()
for (i in 1:length(pts)){
g1[[i]]<-y1[near(x1,pts[i],fr1)]
g2[[i]]<-y2[near(x2,pts[i],fr2)]
}
}
n1=lapply(g1,length)
nv=(min(as.vector(matl(n1))))
res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,...)
if(plotit)runmean2g(x1,y1,x2,y2,nboot=nboot,fr=fr1,est=est,xout=xout,LP=LP,...)
list(p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2)
}








ancGLOB_sub4<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100,
nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,xlab='X',ylab='Y',LP=TRUE,...){
#
#
if(SEED)set.seed(2)
x1<-as.matrix(x1)
p1<-ncol(x1)+1
p<-ncol(x1)
if(p>1)stop('Current version is for one independent variable only')
xy<-cbind(x1,y1)
xy<-elimna(xy)
x1<-xy[,1:p]
y1<-xy[,p1]
xy<-cbind(x2,y2)
xy<-elimna(xy)
x2<-xy[,1:p]
y2<-xy[,p1]
if(xout){
m<-cbind(x1,y1)
flag<-outfun(x1,plotit=FALSE,...)$keep
m<-m[flag,]
x1<-m[,1:p]
y1<-m[,p1]
m<-cbind(x2,y2)
flag<-outfun(x2,plotit=FALSE,...)$keep                            
m<-m[flag,]                                                        
x2<-m[,1:p]                                                              
y2<-m[,p1] 
}
N1=length(y1)
N2=length(y2)
if(is.null(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=nmin])
isub[5]<-max(sub[vecn>=nmin])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
pts=x1[isub]
g1=list()
g2=list()
for (i in 1:5){
g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)]
g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)]
}}
if(!is.null(pts[1])){
if(length(pts)<2)stop('Should have at least two points (use the R function ancova)')
g1=list()
g2=list()
for (i in 1:length(pts)){
g1[[i]]<-y1[near(x1,pts[i],fr1)]
g2[[i]]<-y2[near(x2,pts[i],fr2)]
}}
n1=lapply(g1,length)
nv=(min(as.vector(matl(n1))))
res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,...)
if(plotit)runmean2g(x1,y1,x2,y2,nboot=nboot,fr=fr1,est=est,xout=xout,LP=LP,...)
list(p.value=res$p.value,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2)
}








ancGLOB_sub5<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,pcrit=NULL,p.crit=NULL,iter=100,
nboot=500,SEED=TRUE,MC=FALSE,nmin=12,pts=NULL,fr1=1,fr2=1,xlab='X',ylab='Y',LP=TRUE,...){
#
#
if(is.null(pts))stop('pts should be specified')
if(SEED)set.seed(2)
x1<-as.matrix(x1)
p1<-ncol(x1)+1
p<-ncol(x1)
if(p>1)stop('Current version is for one independent variable only')
xy<-cbind(x1,y1)
xy<-elimna(xy)
x1<-xy[,1:p]
y1<-xy[,p1]
xy<-cbind(x2,y2)
xy<-elimna(xy)
x2<-xy[,1:p]
y2<-xy[,p1]
if(xout){
m<-cbind(x1,y1)
flag<-outfun(x1,plotit=FALSE,...)$keep
m<-m[flag,]
x1<-m[,1:p]
y1<-m[,p1]
m<-cbind(x2,y2)
flag<-outfun(x2,plotit=FALSE,...)$keep                            
m<-m[flag,]                                                        
x2<-m[,1:p]                                                              
y2<-m[,p1] 
}
N1=length(y1)
N2=length(y2)
if(length(pts)<2)stop('Should have at least two points (With one point, use the R function ancova)')
g1=list()
g2=list()
for (i in 1:length(pts)){
g1[[i]]<-y1[near(x1,pts[i],fr1)]
g2[[i]]<-y2[near(x2,pts[i],fr2)]
}
n1=lapply(g1,length)
nv=(min(as.vector(matl(n1))))
res=aov2depth(g1,g2,est=est,SEED=SEED,nboot=nboot,nmin=nmin,...)$p.value
res
}


ancGLOB_pv<-function(n1,n2,est=tmean,fr1=.8,fr2=.8,nboot=500,SEED=TRUE,iter=1000,nmin=12,MC=TRUE,alpha=.05,PRM=FALSE,pts=NULL,...){
#
#  Determine critical p-value when using the function ancGLOB
#  Strategy: generage data from a normal distribution, NULL true
#  compute p-value, repeate
#  iter times (iter=100 is default)
#  (a larger choice for iter is recommended. To reduce execution time use ancGLOB_pv_C
#
# returns:
# p.crit, the critical p-value for the specified alpha value
# if PRM=T, all p-values that were computed.
# ef.iter, the actual number of iterations, which might differ from iter
# due to sample sizes where it makes no sense to compute a p-value
# based on the  generated data.
#
if(SEED)set.seed(45)
bvec=list()
np1=min(c(n1,n2))+1
nmax=max(c(n1,n2))
for(i in 1:iter){
bvec[[i]]=rmul(nmax,p=4)
if(n1!=n2)bvec[[i]][np1:nmax,1:2]=NA
}
if(MC){
library(parallel)
prm=mclapply(bvec,ancGLOB_sub2,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...)
}
if(!MC)prm=lapply(bvec,ancGLOB_sub2,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...)
prm=elimna(as.vector(matl(prm)))
ef.iter=length(prm)
p.crit=hd(prm,alpha)
prm=sort(elimna(prm))
if(!PRM)prm=NULL
list(p.crit=p.crit,prm=prm,ef.iter=ef.iter)
}

ancGLOB_sub2<-function(bvec,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nmin=12,nboot=nboot,pts=pts,...){
p=ancGLOB_sub3(bvec[,1],bvec[,2],bvec[,3],bvec[,4],est=est,SEED=SEED,fr1=fr1,fr2=fr2,nboot=nboot,
plotit=FALSE,nmin=12,pts=pts,...)$p.value
p
}


ancGLOB_pv_pts<-function(x1,x2,est=tmean,fr1=1,fr2=1,nboot=500,SEED=TRUE,iter=1000,nmin=12,MC=TRUE,alpha=.05,PRM=FALSE,pts=NULL,...){
#
#  Determine critical p-value when using the function ancGLOB and pts is specified.
#  Strategy: generage data from a normal distribution, NULL true
#  compute p-value, repeat
#  iter times (iter=1000 is default)
#
#  pts is used to indicate the covariate values where comparisons are to be made.
#  Example: pts=c(1,4,6) will compare regression lines at X=1, 4 and 6
# if pts is not specified, the function terminates with an error.
#
#
# returns:
# p.crit, the critical p-value for the specified alpha value
# if PRM=T, all p-values that were computed.
# ef.iter, the actual number of interations, which might differ from iter
# due to sample sizes where it makes no sense to compute a p-value
# based on the  generated data.
#
# Like ancGLOB_pv, only pts is specified and use data in x1 and x2
#
if(is.null(pts[1]))stop('pts is null, use ancGLOB_pv')
x1=elimna(x1)
x2=elimna(x2)
n1=length(x1)
n2=length(x2)

if(SEED)set.seed(45)
bvec=list()
np1=min(c(n1,n2))+1
nmax=max(c(n1,n2))
for(i in 1:iter){
bvec[[i]]=rmul(nmax,p=4)
if(n1!=n2)bvec[[i]][np1:nmax,1:2]=NA
bvec[[i]][1:n1,1]=x1
bvec[[i]][1:n2,3]=x2
}
prm=NA
if(MC){
library(parallel)
prm=mclapply(bvec,ancGLOB_sub4,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...)
}
#if(!MC)prm=lapply(bvec,ancGLOB_sub4,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,...)
if(!MC){
for(ij in 1:length(bvec)){
bv=as.matrix(bvec[[ij]])
prm[ij]=ancGLOB_sub4(bv,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nboot=nboot,pts=pts,nmin=nmin,...)
}}
prm=elimna(as.vector(matl(prm)))
ef.iter=length(prm)
p.crit=hd(prm,alpha)
prm=sort(elimna(prm))
if(!PRM)prm=NULL
list(p.crit=p.crit,prm=prm,ef.iter=ef.iter)
}

ancGLOB_sub4<-function(bvec,fr1=fr1,fr2=fr2,est=est,SEED=SEED,nmin=12,nboot=nboot,pts=pts,...){
p=ancGLOB_sub5(bvec[,1],bvec[,2],bvec[,3],bvec[,4],est=est,SEED=SEED,fr1=fr1,fr2=fr2,nboot=nboot,nmin=12,pts=pts,...)
p
}



q2by2<-function(x,q = c(0.1, 0.25, 0.5, 0.75, 0.9), nboot = 2000,SEED=TRUE){
#
# For a 2 by 2 ANOVA, independent groups, test main effects
# and interaction for all quantiles indicated by argument q
#
if(SEED)set.seed(2)
if(is.matrix(x))x<-listm(x)
if(length(x)!=4)stop('Current version is for a 2-by-2 ANOVA only. Should have four groups.')
A=matrix(NA,nrow=length(q),5)
B=matrix(NA,nrow=length(q),5)
AB=matrix(NA,nrow=length(q),5)
dimnames(A)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper'))
dimnames(B)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper'))
dimnames(AB)=list(NULL,c('q','psihat','p.value','ci.lower','ci.upper'))
con=con2way(2,2)

for(i in 1:length(q)){
A[i,1]=q[i]
B[i,1]=q[i]
AB[i,1]=q[i]
a=pbmcp(x,nboot=nboot,est=hd,con=con$conA,SEED=FALSE,q=q[i])
b=pbmcp(x,nboot=nboot,est=hd,con=con$conB,SEED=FALSE,q=q[i])
ab=pbmcp(x,nboot=nboot,est=hd,con=con$conAB,SEED=FALSE,q=q[i])
A[i,2:5]=a$output[,c(2,3,5,6)]
B[i,2:5]=b$output[,c(2,3,5,6)]
AB[i,2:5]=ab$output[,c(2,3,5,6)]
}
list(A=A,B=B,AB=AB)
}
bd1GLOB<-function(x,est=spatcen,nboot=599,alpha=.05,SEED=TRUE,MC=FALSE,...){
#
#   Test the hypothesis of equal measures of location for J
#   dependent groups using a
#   percentile bootstrap method.
#
#  Same as bd1way, only designed for estimators that take into account the 
#  overall structure of the data when dealing with outliers
#
#   By default, use spatial median  estimator
#  est=dmean.cen will use the Donoho-Gasko trimmed mean.
#
#     argument est is location estimator that returns value in $center                          
#    (So, for example, est=dmean will not run.)            
#
#   Data are assumed to be stored  in list mode or an n by J matrix.
#   misran=F means missing values do not occur at random, case wise deletion is used.
#
if(MC){
if(identical(est,dmean_C))stop('Using dmean_C with MC=T can cause R to crash. Use MC=F') 
library(parallel)
}
if(!is.list(x) && !is.matrix(x))stop('Data must be store in list mode or in an n by J matrix.')
if(is.list(x)){
m<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))m[,j]<-x[[j]]
}
if(is.matrix(x))m<-x
xcen<-m
locval=est(m,...)$center
locval=as.vector(locval)
for (j in 1:ncol(m))xcen[,j]<-m[,j]-locval[j]
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data<-matrix(sample(nrow(m),size=nrow(m)*nboot,replace=TRUE),nrow=nboot)
data=listm(t(data))
if(MC)bvec<-mclapply(data,bd1GLOB1,xcen=xcen,est=est,...)
if(!MC)bvec<-lapply(data,bd1GLOB1,xcen=xcen,est=est,...)
bvec=as.vector(matl(bvec))
# A vector of  nboot test statistics.
icrit<-floor((1-alpha)*nboot+.5)
test<-(length(locval)-1)*var(locval)
pv=mean((test<bvec))
list(test=test,estimates=locval,p.value=pv)
}

bd1GLOB1<-function(isub,xcen,est,...){
#
#  Compute test statistic for bd1way
#
#  isub is a vector of length n,
#  a bootstrap sample from the sequence of integers
#  1, 2, 3, ..., n
#
#  xcen is an n by J matrix containing the input data
#
val<-NA
val=est(xcen[isub,],...)$center
val=as.vector(val)
test.stat<-(length(val)-1)*var(val)
test.stat
}

rmdzD<-function(x,est=skipSPR,grp=NA,nboot=500,SEED=TRUE,MC=FALSE,...){
#
#   Do ANOVA on dependent groups
#   using depth of zero among  bootstrap values
#   based on difference scores.
#
#    Projection distances 
#    rather than Mahalanobis distances are used  when computing a p-value
#
#   The data are assumed to be stored in x in list mode
#   or in a matrix. In the first case
#   x[[1]] contains the data for the first group, x[[2]] the data
#   for the second group, etc. Length(x)=the number of groups = J.
#   If stored in a matrix, columns correspond to groups.
#
#   grp is used to specify some subset of the groups, if desired.
#   By default, all J groups are used.
#
#   The default number of bootstrap samples is nboot=500
#
if(MC)library(parallel)
if(!is.list(x) && !is.matrix(x))stop('Data must be stored in a matrix or in list mode.')
if(is.list(x)){
# put the data in an n by J matrix
mat<-matrix(0,length(x[[1]]),length(x))
for (j in 1:length(x))mat[,j]<-x[[j]]
}
if(is.matrix(x))mat<-x
if(!is.na(grp[1])){
mat<-mat[,grp]
}
mat<-elimna(mat) # Remove rows with missing values.
center=est(mat,...)$center
if(SEED)set.seed(2) # set seed of random number generator so that
#             results can be duplicated.
data <- matrix(sample(nrow(mat), size = nrow(mat) * nboot, replace = T),
                nrow = nboot)
data=listm(t(data))
if(MC)bvec<-mclapply(data,rmdG_sub,mat,est,mc.preschedule=TRUE,...)
if(!MC) bvec<-lapply(data,rmdG_sub,mat,est,...)
bvec=t(matl(bvec)) # nboot by J matrix
J<-ncol(mat)
jp<-0
Jall<-(J^2-J)/2
dif<-matrix(NA,nrow=nboot,ncol=Jall)
ic<-0
cdif=NA
for(j in 1:J){
for(k in 1:J){
if(j<k){
ic<-ic+1
dif[,ic]<-bvec[,j]-bvec[,k]
cdif[ic]=center[j]-center[k]
}}}
if(J==2){
pv=mean(bvec[,1]<bvec[,2])+.5*mean(bvec[,1]==bvec[,2])
sig.level=2*min(c(pv,1-pv))
}
if(J>2){
zvec<-rep(0,Jall)
m1<-rbind(dif,zvec)
bplus<-nboot+1
cmat=var(dif)
dv<-pdisMC(m1,center=cdif)
bplus<-nboot+1
sig.level<-1-sum(dv[bplus]>=dv[1:nboot])/nboot
}
list(p.value=sig.level,center=center)
}


Dancovapb<-function(x1,y1,x2,y2,fr1=1,fr2=1,est=hd,alpha=.05,plotit=TRUE,pts=NA,sm=FALSE,xout=FALSE,outfun=out,DIF=FALSE,...){
#
# Compare two dependent  groups using the ancova method 
# (a method similar to the one used by the  R function ancova).
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
# percentile bootstrap method is used. 
#
# est indicates estimator to be used; Harrell-Davis median estimator is default.
#
#  Assume data are in x1 y1 x2 and y2
#
#  sm=T will create smooths using bootstrap bagging.
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#  pts=NA means five points will be picked empirically. 
#
#
if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function')
if(length(x1)!=length(y1))stop('x1 and y1 have different lengths')
if(length(x1)!=length(x2))stop('x1 and y2 have different lengths')
if(length(x2)!=length(y2))stop('x2 and y2 have different lengths')
if(length(y1)!=length(y2))stop('y1 and y2 have different lengths')
xy=elimna(cbind(x1,y1,x2,y2))
x1=xy[,1]
y1=xy[,2]
x2=xy[,3]
y2=xy[,4]
if(is.na(pts[1])){
npt<-5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,7)
dimnames(mat)<-list(NULL,c('X','n','DIF','ci.low','ci.hi','p.value','p.crit'))
for (i in 1:5){
t1=near(x1,x1[isub[i]],fr1)
t2=near(x2,x1[isub[i]],fr2)
pick=as.logical(t1*t2)
test=rmmcppb(y1[pick],y2[pick],est=est,dif=DIF,plotit=FALSE,alpha=alpha,pr=FALSE,SEED=FALSE,...)
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(y1[pick])
mat[i,3]<-test$output[,2]
mat[i,3]<-test$output[,2]
mat[i,4]<-test$output[,5]
mat[i,5]<-test$output[,6]
mat[i,6]<-test$output[,3]
}
temp2<-order(0-mat[,6])
bot=c(1:nrow(mat))
dvec=sort(alpha/bot,decreasing=TRUE)
mat[temp2,7]=dvec
}
if(!is.na(pts[1])){
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
# First check sample size
#
flage=rep(TRUE,length(pts))
for (i in 1:length(pts)){
t1<-near(x1,pts[i],fr1)
t2<-near(x2,pts[i],fr2)
pick=as.logical(t1*t2)
if(sum(pick)<=5){print(paste('Warning: there are',sum(pick),' points corresponding to the design point X=',pts[i]))
flage[i]=FALSE
}}
pts=pts[flage]
mat<-matrix(NA,length(pts),7)
dimnames(mat)<-list(NULL,c('X','n','DIF','ci.low','ci.hi',
'p.value','p.crit'))
for (i in 1:length(pts)){
t1<-near(x1,pts[i],fr1)
t2<-near(x2,pts[i],fr2)
pick=as.logical(t1*t2)
#print(y1[pick])
test=rmmcppb(y1[pick],y2[pick],est=est,dif=DIF,plotit=FALSE,alpha=alpha,pr=FALSE,SEED=FALSE,...)
mat[i,3]<-test$output[,2]
mat[i,1]<-pts[i]
mat[i,2]<-length(y1[pick])
mat[i,4]<-test$output[,5]
mat[i,5]<-test$output[,6]
mat[i,6]<-test$output[,3]
}
temp2<-order(0-mat[,6])
bot=c(1:nrow(mat))
dvec=sort(alpha/bot,decreasing=TRUE)
mat[temp2,7]=dvec
}
if(plotit){
runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,xout=xout,outfun=outfun,,...)
}
list(output=mat)
}
ancdifplot<-function(x1,y1,x2,y2,fr1=1,fr2=1,tr=.2,alpha=.05,pr=TRUE,xout=FALSE,outfun=out,LP=TRUE,
nmin=8,scat=TRUE,xlab='X',ylab='Y',report=FALSE,...){
#
# Compare two independent  groups using the ancova method
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  Assume data are in x1 y1 x2 and y2
#
#  nmin indicates minimun number of values close to a point 
#
#  Similar to ancova, only compute a confidence band for the difference and plot it.
#
if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function')
if(length(x1)!=length(y1))stop('x1 and y1 have different lengths')
if(length(x2)!=length(y2))stop('x2 and y2 have different lengths')
xy=elimna(cbind(x1,y1))
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
if(xout){
flago<-outfun(x1,...)$keep
x1<-x1[flago]
y1<-y1[flago]
flag<-outfun(x2,...)$keep
x2<-x2[flago]
y2<-y2[flago]
}
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
flag=vecn>=nmin
ptsum=sum(flag)
est=NA
low=NA
up=NA
ic=0
xp1=NA
xp2=NA
pv=NA
for (i in 1:length(x1)){
if(flag[i]){
g1<-y1[near(x1,x1[i],fr1)]
g2<-y2[near(x2,x2[i],fr2)]
test<-yuen(g1,g2,tr=tr)
ic=ic+1
xp1[ic]=x1[i]
xp2[ic]=x2[i]
est[ic]=test$dif
low[ic]=test$ci[1]
up[ic]=test$ci[2]
pv[ic]=test$p.value
}}
print(length(pv))
print(length(xp1))
if(LP){
xy=elimna(cbind(xp1,est,low,up,pv))
est=lplot(xy[,1],xy[,2],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
up=lplot(xy[,1],xy[,4],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
low=lplot(xy[,1],xy[,3],plotit=FALSE,pyhat=TRUE,pr=FALSE)$yhat
}
if(!report)output='DONE'
plot(c(x1,x2),c(y1,y2),xlab=xlab,ylab=ylab,type='n')
if(!LP){
lines(xp1,up,lty=2)
lines(xp1,low,lty=2)
lines(xp1,est)
if(scat)points(c(x1,x2),c(y1,y2))
if(report){
output=cbind(xp1,est,low,up,pv)
dimnames(output)=list(NULL,c(xlab,'est.dif','lower.ci','upper.ci','p-value'))
}}
if(LP){
lines(xy[,1],up,lty=2)                                                                   
lines(xy[,1],low,lty=2)                                        
lines(xy[,1],est)                                                       
 if(scat)points(c(x1,x2),c(y1,y2))
if(report){
output=cbind(xy[,1],est,low,up,xy[,5])
dimnames(output)=list(NULL,c(xlab,'est.dif','lower.ci','upper.ci','p-value'))
}
}
output
}
ancGLOB<-function(x1,y1,x2,y2,xout=FALSE,outfun=outpro,est=tmean,p.crit=NULL,iter=500,alpha=.05,pr=TRUE,nboot=500,SEED=TRUE,MC=FALSE,CR=FALSE,
nmin=12,pts=NULL,fr1=1,fr2=1,plotit=TRUE,xlab='X',ylab='Y',LP=TRUE,cpp=FALSE,...){
#
#  Like the function ancova, only performs a global test that the measures of location
#  are equal among all the covariate values that are chosen. 
#  
#  pts = NULL, the function picks five covariate values.
#  iter=500 means that when the critical p-value is determined, simulations with 500
#  replications are used to determine the critical p-value.
#  
#  Reject if the p-value is less than the critical p-value.
#  Works well with alpha=.05. Uncertain about alpha <.05.
#
#  cpp=TRUE, a C++ function is used to determine the critical p-value
#  assuming the library WRScpp has been installed.  This is done as follows:
#  install.packages('devtools')
#  library("devtools") 
#  install_github( "WRScpp", "mrxiaohe") 
#
#  CR=TRUE: If number of points is two or three, plot 1-alpha confidence region
#
#
if(CR)plotit=FALSE # Can't plot both regression lines and confidence region
if(SEED)set.seed(2)
pts.flag=is.null(pts)
if(!is.null(pts))cpp=FALSE
x1<-as.matrix(x1)
p1<-ncol(x1)+1
p<-ncol(x1)
if(p>1)stop('Current version is for one independent variable only')
xy<-cbind(x1,y1)
xy<-elimna(xy)
x1<-xy[,1:p]
y1<-xy[,p1]
xy<-cbind(x2,y2)
xy<-elimna(xy)
x2<-xy[,1:p]
y2<-xy[,p1]
if(xout){
m<-cbind(x1,y1)
flag<-outfun(x1,plotit=FALSE,...)$keep
m<-m[flag,]
x1<-m[,1:p]
y1<-m[,p1]
m<-cbind(x2,y2)
flag<-outfun(x2,plotit=FALSE,...)$keep                            
m<-m[flag,]                                                        
x2<-m[,1:p]                                                              
y2<-m[,p1] 
}
N1=length(y1)
N2=length(y2)
if(is.null(pts[1])){
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=nmin])
isub[5]<-max(sub[vecn>=nmin])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
pts=x1[isub]
g1=list()
g2=list()
for (i in 1:5){
g1[[i]]<-y1[near(x1,x1[isub[i]],fr1)]
g2[[i]]<-y2[near(x2,x1[isub[i]],fr2)]
}}
if(!is.null(pts[1])){
if(length(pts)<2)stop('Should have at least two points (use the R function ancova)')
g1=list()
g2=list()
for (i in 1:length(pts)){
g1[[i]]<-y1[near(x1,pts[i],fr1)]
g2[[i]]<-y2[near(x2,pts[i],fr2)]
}
}
p.alpha=NULL
if(is.null(p.crit)){
if(pts.flag){
if(cpp){
library(WRScpp)
ve=ancGLOB_pv_C(N1,N2,est=est,iter=iter,fr1=fr1,fr2=fr2,nboot=nboot,SEED=SEED,...)
v=hd(ve,q=alpha)
}
else{
 v=ancGLOB_pv(N1,N2,est=est,iter=iter,fr1=fr1,fr2=fr2,nboot=nboot,
PRM=FALSE,SEED=SEED,alph=alpha,xlab=xlab,ylab=ylab,...)$p.crit
}
}
if(!pts.flag)v=ancGLOB_pv_pts(x1,x2,pts=pts,nmin=nmin,iter=iter,est=est,fr1=fr1,fr2=fr2,
nboot=nboot,SEED=SEED,alpha=alpha,MC=MC)$p.crit
}
if(!is.null(p.crit))v=p.crit
res=aov2depth(g1,g2,est=est,SEED=SEED,CR=CR,alpha=v,...)
if(pr)print('Reject if p.test is less than p.crit')
if(plotit)runmean2g(x1,y1,x2,y2,fr=fr1,est=est,xout=FALSE,LP=LP,xlab=xlab,ylab=ylab,...)
list(p.test=res$p.value,p.crit=v,est1=res$est1,est2=res$est2,dif=res$dif,pts=pts,n1=res$n1,n2=res$n2)
}


aov2depth<-function(x1,x2,est=tmean,nboot=500,SEED=TRUE,nmin=12,CR=FALSE,
xlab=' DIF 1',ylab='DIF 2',zlab='DIF 3',alpha=.05,...){
#
# 2 by K ANOVA independent group (K levels not necessarily independent and 
#                                 not completely dependent
#
#   Main effect Factor A only
#
# Strategy: Use depth of zero based on estimated
# differences for each column  of the K levels of Factor B
# That is, testing no main effects for Factor A in 
# a manner that takes into account the pattern of the
# measures of location rather then simply averaging 
# across columns.
#
#  x1 can be a matrix with K columns corrspoding to groups, ditto for x2
#  Or x1 and x2 can have list mode.
#   Assuming x1 and x2 contain data for indepedendent groups.
#
if(is.matrix(x1)||is.data.frame(x1))x1=listm(x1)
if(is.matrix(x2)||is.data.frame(x2))x2=listm(x2)
J=length(x1)
if(J!=length(x2))stop('x1 and x2 should have same number of groups')
if(SEED)set.seed(2)
for(j in 1:J){
x1[[j]]=na.omit(x1[[j]])
x2[[j]]=na.omit(x2[[j]])
}
n1=mapply(x1,FUN=length)
n2=mapply(x2,FUN=length)
bplus=nboot+1
bvec1=matrix(NA,nrow=nboot,ncol=J)
bvec2=matrix(NA,nrow=nboot,ncol=J)
for(j in 1:J){
data1=matrix(sample(x1[[j]],size=n1[j]*nboot,replace=TRUE),nrow=nboot)
data2=matrix(sample(x2[[j]],size=n2[j]*nboot,replace=TRUE),nrow=nboot)
bvec1[,j]=apply(data1,1,est,...)
bvec2[,j]=apply(data2,1,est,...)
}
difb=bvec1-bvec2
est1=mapply(x1,FUN=est,...)
est2=mapply(x2,FUN=est,...)
dif=est1-est2
m1=var(difb)
nullvec=rep(0,J)
difz=rbind(difb,nullvec)
dis=mahalanobis(difz,dif,m1)
sig=sum(dis[bplus]<=dis)/bplus
if(CR){
dis2<-order(dis[1:nboot])
dis<-sort(dis)
critn<-floor((1-alpha)*nboot)
if(J==2){
plot(difb[,1],difb[,2],xlab=xlab,ylab=ylab)
points(0,0,pch=0)
xx<-difb[dis2[1:critn],]
xord<-order(xx[,1])
xx<-xx[xord,]
temp<-chull(xx)
lines(xx[temp,])
lines(xx[c(temp[1],temp[length(temp)]),])
}
if(J==3){
scatterplot3d(difb[dis2[1:critn],],xlab=xlab,ylab=ylab,zlab=zlab,tick.marks=TRUE)
}

}
list(p.value=sig,est1=est1,est2=est2,dif=dif,n1=n1,n2=n2)
}

ancovaWMW<-function(x1,y1,x2,y2,fr1=1,fr2=1,alpha=.05,plotit=TRUE,pts=NA,xout=FALSE,outfun=out,LP=TRUE,sm=FALSE,est=hd,...){
#
# Compare two independent  groups using the ancova method in conjunction 
# with Cliff's improvement on the Wilcoxon-Mann-Whitney test.
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  Assume data are in x1 y1 x2 and y2
#
#  sm=TRUE will use bootstrap bagging when plotting the regression lines
#  The plot is based on measure of location indicated by the argument
#  est. Default is the Harrell-Davis estimate of the median.
#
#   LP=TRUE: use running interval smoother followed by LOESS
#
if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function')
if(length(x1)!=length(y1))stop('x1 and y1 have different lengths')
if(length(x2)!=length(y2))stop('x2 and y2 have different lengths')
xy=elimna(cbind(x1,y1))
x1=xy[,1]
y1=xy[,2]
xy=elimna(cbind(x2,y2))
x2=xy[,1]
y2=xy[,2]
if(xout){
flag<-outfun(x1,...)$keep
x1<-x1[flag]
y1<-y1[flag]
flag<-outfun(x2,...)$keep
x2<-x2[flag]
y2<-y2[flag]
}
dv.sum=NULL
if(is.na(pts[1])){
npt<-5
CC=5
isub<-c(1:5)  # Initialize isub
test<-c(1:5)
xorder<-order(x1)
y1<-y1[xorder]
x1<-x1[xorder]
xorder<-order(x2)
y2<-y2[xorder]
x2<-x2[xorder]
n1<-1
n2<-1
vecn<-1
for(i in 1:length(x1))n1[i]<-length(y1[near(x1,x1[i],fr1)])
for(i in 1:length(x1))n2[i]<-length(y2[near(x2,x1[i],fr2)])
for(i in 1:length(x1))vecn[i]<-min(n1[i],n2[i])
sub<-c(1:length(x1))
isub[1]<-min(sub[vecn>=12])
isub[5]<-max(sub[vecn>=12])
isub[3]<-floor((isub[1]+isub[5])/2)
isub[2]<-floor((isub[1]+isub[3])/2)
isub[4]<-floor((isub[3]+isub[5])/2)
mat<-matrix(NA,5,8)
dimnames(mat)<-list(NULL,c('X','n1','n2','p.hat','ci.low','ci.hi','p.value','p.crit'))
for (i in 1:5){
g1<-y1[near(x1,x1[isub[i]],fr1)]
g2<-y2[near(x2,x1[isub[i]],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test<-cidv2(g1,g2)
dv.sum=rbind(dv.sum,test$summary.dvals)
mat[i,1]<-x1[isub[i]]
mat[i,2]<-length(g1)
mat[i,3]<-length(g2)
mat[i,4]<-test$p.hat
mat[i,5]<-test$p.ci[1]
mat[i,6]<-test$p.ci[2]
mat[i,7]<-test$p.value
}}
if(!is.na(pts[1])){
CC=length(pts)
n1<-1
n2<-1
vecn<-1
for(i in 1:length(pts)){
n1[i]<-length(y1[near(x1,pts[i],fr1)])
n2[i]<-length(y2[near(x2,pts[i],fr2)])
}
mat<-matrix(NA,length(pts),8)
dimnames(mat)<-list(NULL,c('X','n1','n2','p.hat','ci.low','ci.hi','p.value','p.crit'))
for (i in 1:length(pts)){
g1<-y1[near(x1,pts[i],fr1)]
g2<-y2[near(x2,pts[i],fr2)]
g1<-g1[!is.na(g1)]
g2<-g2[!is.na(g2)]
test=cidv2(g1,g2)
dv.sum=rbind(dv.sum,test$summary.dvals)
mat[i,1]<-pts[i]
mat[i,2]<-length(g1)
mat[i,3]<-length(g2)
if(length(g1)<=5)print(paste('Warning, there are',length(g1),' points corresponding to the design point X=',pts[i]))
if(length(g2)<=5)print(paste('Warning, there are',length(g2),' points corresponding to the design point X=',pts[i]))
mat[i,4]<-test$p.hat
mat[i,5]<-test$p.ci[1]
mat[i,6]<-test$p.ci[2]
mat[i,7]<-test$p.value
}}
dvec<-alpha/c(1:CC)
temp2<-order(0-mat[,6])
mat[temp2,8]=dvec
if(plotit){
runmean2g(x1,y1,x2,y2,fr=fr1,est=est,sm=sm,xout=FALSE,LP=LP,...)
}
list(output=mat,summary=dv.sum)
}

ghtrim<-function(tr=.2,g=0.2,h=0){
#
#  Compute trimmed mean of a g-and-h distribution.
#
# 
if(g==0)val=0
if(g>0){
low=qnorm(tr)
up=-1*low
val=integrate(ftrim,low,up,tr=tr,g=g,h=h)$value
val=val/(1-2*tr)
}
val
}

ftrim<-function(z,tr,g,h){
gz=(exp(g*z)-1)*exp(h*z^2/2)/g
res=dnorm(z)*gz
res
}

DancovaV2<-function(x1=NULL,y1=NULL,x2=NULL,y2=NULL,xy=NULL,fr1=1,fr2=1,p.crit=NULL,
est=tmean,alpha=.05,plotit=TRUE,xlab='X',ylab='Y',pts=NULL,qvals=c(.25,.5,.75),sm=FALSE,
xout=FALSE,eout=FALSE,outfun=out,DIF=FALSE,LP=TRUE,nboot=500,SEED=TRUE,iter=2000,MC=FALSE,cpp=TRUE,
nmin=12,q=.5,...){
#
# Compare two dependent  groups using the ancova method.
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  Like Dancova, only bootstrap samples are obtained by resampling
#  from c(x1,y1,x2,y2) rather than conditioning on the x value as done by Dancova.
#   This function tends to have more power than Dancova.
#
# One covariate only is allowed.
#
# To get critical p-value, need the following commands to get access to the software.
# library(`devtools')
# install_github( `WRScpp', `mrxiaohe')

#  Assume data are in xy having four columns: x1, y1, x2 and y2. 
#
#  Or can have the
#  data stored in four separate variables:
#   x1 y1 x2 and y2
#
#   x1 y1 are measures at time 1
#   x2 y2 are measures at time 2
#
#  LP=T, when plotting, running interval smoother is smoothed again using lplot.
#  sm=T will create smooths using bootstrap bagging.
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
#  q=.5 means when est=hd (Harrell-Davis estimator), median is estimated.
#
#  eout=TRUE will eliminate all outliers when plotting. 
#
if(!is.null(x1[1])){
if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function')
if(length(x1)!=length(y1))stop('x1 and y1 have different lengths')
if(length(x1)!=length(x2))stop('x1 and y2 have different lengths')
if(length(x2)!=length(y2))stop('x2 and y2 have different lengths')
if(length(y1)!=length(y2))stop('y1 and y2 have different lengths')
xy=cbind(x1,y1,x2,y2)
}
n=nrow(elimna(xy))
if(plotit){
ef=identical(est,hd)
if(!ef)runmean2g(xy[,1],xy[,2],xy[,3],xy[,4],fr=fr1,est=est,sm=sm,xout=xout,LP=LP,eout=eout,
xlab=xlab,ylab=ylab,...)
if(ef)runmean2g(xy[,1],xy[,2],xy[,3],xy[,4],fr=fr1,est=hd,sm=sm,xout=xout,LP=LP,q=q,eout=eout,
xlab=xlab,ylab=ylab,...)
}

if(is.null(p.crit)){
if(cpp)library(WRScpp)
p.crit=DancGLOB_pv_C(n,fr1=fr1,fr2=fr2,nboot=nboot,est=est,SEED=SEED,iter=iter,
nmin=nmin,MC=MC,alpha=alpha,qvals=qvals,cpp=cpp)$p.crit
#$
}
if(is.null(pts)){
for(i in 1:length(qvals))pts=c(pts,qest(xy[,1],qvals[i]))
}
if(SEED)set.seed(2)
ef=identical(est,hd)
n=nrow(xy)
est1=NA
est2=NA
J=length(pts)
est1=matrix(NA,nrow=nboot,ncol=J)
est2=matrix(NA,nrow=nboot,ncol=J)
#
data=matrix(sample(n,size=n*nboot,replace=TRUE),ncol=nboot,nrow=n)
if(!MC){
if(!ef){
est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...)
est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...)
}
if(ef){
est1=apply(data,2,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...)
est2=apply(data,2,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...)
}
est1=t(as.matrix(est1))
est2=t(as.matrix(est2))
}
if(MC){
library(parallel)
data=listm(data)
if(!ef){
est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...)
est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=est,fr=fr2,nmin=nmin,...)
}
if(ef){
est1=mclapply(data,DancGLOB_sub,xy=xy[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...)
est2=mclapply(data,DancGLOB_sub,xy=xy[,3:4],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...)
}
est1=t(matl(est1))
est2=t(matl(est2))
}
pv=NA
for(j in 1:J){
pv[j]=mean(est1[,j]<est2[,j],na.rm=TRUE)+.5*mean(est1[,j]==est2[,j],na.rm=TRUE)
pv[j]=2*min(c(pv[j],1-pv[j]))
}
pvm=cbind(pts,pv)
dimnames(pvm)=list(NULL,c('X','p.values'))
list(output=pvm,n=n,p.crit=p.crit)
}
DancGLOB_sub<-function(data,xy=xy,pts=pts,est=est,fr=fr,nmin=nmin,...){
x1=xy[data,1]
y1=xy[data,2]
xye=elimna(cbind(x1,y1))
est1=runhat(xye[,1],xye[,2],pts=pts,est=est,fr=fr,nmin=nmin,...)
est1
}

ancovaV2<-function(x1=NULL,y1=NULL,x2=NULL,y2=NULL,fr1=1,fr2=1,p.crit=NULL,
est=tmean,alpha=.05,plotit=TRUE,xlab='X',ylab='Y',pts=NULL,qvals=c(.25,.5,.75),sm=FALSE,
xout=FALSE,eout=FALSE,outfun=out,LP=TRUE,nboot=500,SEED=TRUE,iter=2000,MC=FALSE,cpp=TRUE,
nmin=12,q=.5,...){
#
# Compare two independent  groups using the ancova method.
# No parametric assumption is made about the form of
# the regression lines--a running interval smoother is used.
#
#  Like ancova, only bootstrap samples are obtained by resampling
#  from c(x1,y1,x2,y2) rather than conditioning on the x value as done by ancova.
#   This function tends to have more power than ancova.
#
# To get a critical p.value using C++ code, 
#  library(`devtools')
#  install_github( `WRScpp', `mrxiaohe')

#
# One covariate only is allowed.
#
#  Or can have the
#  data stored in four separate variables:
#   x1 y1 x2 and y2
#
#   x1 y1 are measures at time 1
#   x2 y2 are measures at time 2
#
#  LP=T, when plotting, running interval smoother is smoothed again using lplot.
#  sm=T will create smooths using bootstrap bagging.
#  pts can be used to specify the design points where the regression lines
#  are to be compared.
#
#  q=.5 means when est=hd (Harrell-Davis estimator), median is estimated.
#
#  eout=TRUE will eliminate all outliers when plotting. 
#
if(ncol(as.matrix(x1))>1)stop('One covariate only is allowed with this function')
if(length(x1)!=length(y1))stop('x1 and y1 have different lengths')
if(length(x2)!=length(y2))stop('x2 and y2 have different lengths')
xy1=elimna(cbind(x1,y1))
xy2=elimna=cbind(x2,y2)
n1=nrow(xy1)
n2=nrow(xy2)
if(plotit){
ef=identical(est,hd)
if(!ef)runmean2g(xy1[,1],xy1[,2],xy2[,1],xy2[,2],fr=fr1,est=est,sm=sm,xout=xout,LP=LP,eout=eout,
xlab=xlab,ylab=ylab,...)
if(ef)runmean2g(xy1[,1],xy1[,2],xy2[,1],xy2[,2],fr=fr1,est=hd,sm=sm,xout=xout,LP=LP,q=q,eout=eout,
xlab=xlab,ylab=ylab,...)
}

if(is.null(p.crit)){
n=min(c(n1,n2))
if(cpp)library(WRScpp)
p.crit=DancGLOB_pv_C(n,fr1=fr1,fr2=fr2,nboot=nboot,est=est,SEED=SEED,iter=iter,
nmin=nmin,MC=MC,alpha=alpha,qvals=qvals,cpp=cpp)$p.crit
# this function is stored in the package WRScpp $
}
if(is.null(pts)){
for(i in 1:length(qvals))pts=c(pts,qest(xy1[,1],qvals[i]))
}
if(SEED)set.seed(2)
ef=identical(est,hd)
est1=NA
est2=NA
J=length(pts)
est1=matrix(NA,nrow=nboot,ncol=J)
est2=matrix(NA,nrow=nboot,ncol=J)
#
data1=matrix(sample(n1,size=n1*nboot,replace=TRUE),ncol=nboot,nrow=n1)
data2=matrix(sample(n2,size=n2*nboot,replace=TRUE),ncol=nboot,nrow=n2)
if(!MC){
if(!ef){
est1=apply(data1,2,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...)
est2=apply(data2,2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=est,fr=fr2,nmin=nmin,...)
}
if(ef){
est1=apply(data1,2,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...)
est2=apply(data2,2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...)
}
est1=t(as.matrix(est1))
est2=t(as.matrix(est2))
}
if(MC){
library(parallel)
data1=listm(data1)
data2=listm(data2)
if(!ef){
est1=mclapply(data1,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=est,fr=fr1,nmin=nmin,...)
est2=mclapply(data2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=est,fr=fr2,nmin=nmin,...)
}
if(ef){
est1=mclapply(data1,DancGLOB_sub,xy=xy1[,1:2],pts=pts,est=hd,fr=fr1,nmin=nmin,q=q,...)
est2=mclapply(data2,DancGLOB_sub,xy=xy2[,1:2],pts=pts,est=hd,fr=fr2,nmin=nmin,q=q,...)
}
est1=t(matl(est1))
est2=t(matl(est2))
}
pv=NA
for(j in 1:J){
pv[j]=mean(est1[,j]<est2[,j],na.rm=TRUE)+.5*mean(est1[,j]==est2[,j],na.rm=TRUE)
pv[j]=2*min(c(pv[j],1-pv[j]))
}
pvm=cbind(pts,pv)
dimnames(pvm)=list(NULL,c('X','p.values'))
list(output=pvm,n=c(n1,n2),p.crit=p.crit)
}
DancGLOB_sub<-function(data,xy=xy,pts=pts,est=est,fr=fr,nmin=nmin,...){
x1=xy[data,1]
y1=xy[data,2]
xye=elimna(cbind(x1,y1))
est1=runhat(xye[,1],xye[,2],pts=pts,est=est,fr=fr,nmin=nmin,...)
est1
}
regGmcp<-function(x,y,regfun=tsreg,SEED=TRUE,nboot=100,xout=FALSE,AD=FALSE,
    outfun=outpro,STAND=TRUE,alpha=0.05,pr=TRUE,MC=FALSE,...)
{
#
#  All pairwise comparison of regression models among J independent groups
#  That is, for groups j and k, all j<k, test H_0: all corresponding
#  parameters are equal
#
#  For individual parameters, use reg1mcp

#  Perform all pairwise comparisons, where each comparison is based
#  on the global hypothesis that all parameters are equal
#  
#  Control FWE via Hochberg's methods for each set of 
#  (J^2-J)/2 parameters. That is, control FWE for the intercepts
#  Do the same for the first slope, etc. 
#
#  x and y are assumed to have list mode having length J equal to the number of groups
#  For example, x[[1]] and y[[1]] contain the data for group 1.
#
#   xout=T will eliminate leverage points using the function outfun, 
#   which defaults to the projection method.
#
#  AD=TRUE, adjusted critical value is used. Does not seem necessary with a robust estimator
#           but tends to be a bit more conservative in terms of Type I errors.
#
#  OUTPUT:
#   n is sample size after missing values are removed
#   nv.keep is sample size after leverage points are removed.
#   output contains all pairwise comparisons.
#
if(!is.list(x))stop('Argument x should have list mode')
if(!is.list(y))stop('Argument y should have list mode')
J=length(x) # number of groups
x=lapply(x,as.matrix)
pchk=lapply(x,ncol)
temp=matl(pchk)
if(var(as.vector(temp))!=0)stop('Something is wrong. 
Number of covariates differs among the groups being compared')
nv=NULL
p=ncol(x[[1]])
p1=p+1
for(j in 1:J){
xy=elimna(cbind(x[[j]],y[[j]]))
x[[j]]=xy[,1:p]
y[[j]]=xy[,p1]
x[[j]]=as.matrix(x[[j]])
nv=c(nv,nrow(x[[j]]))
}
nv.keep=nv
critrad=NULL
if(!xout){
if(pr)print('Might want to consider removing any leverage points')
}
if(xout){
temp=lapply(x,outfun,plotit=FALSE,STAND=STAND,...)
for(j in 1:J){
x[[j]]=x[[j]][temp[[j]]$keep,]
y[[j]]=y[[j]][temp[[j]]$keep]
nv.keep[j]=length(y[[j]])
}}
tot=(J^2-J)/2
dvec<-alpha/c(1:tot)
outl=list()
nr=tot*p1
outp=matrix(NA,ncol=5,nrow=tot)
x=lapply(x,as.matrix)
xx=list()
yy=list()
iall=0
ivp=c(1,tot)-tot
i=0
for(j in 1:J){
for(k in 1:J){
if(j < k){
i=i+1
xx[[1]]=x[[j]]
xx[[2]]=x[[k]]
yy[[1]]=y[[j]]
yy[[2]]=y[[k]]
if(!MC)all=reg1way(xx,yy,regfun=regfun,nboot=nboot,SEED=SEED,AD=TRUE,alpha=alpha,
pr=FALSE,...)
if(MC)all=reg1wayMC(xx,yy,regfun=regfun,nboot=nboot,SEED=SEED,AD=TRUE,alpha=alpha,
pr=FALSE,...)
if(AD)temp=all$adjusted.p.value
if(!AD)temp=all$p.value
if(is.null(temp))temp=all$p.value
outp[i,1]=j
outp[i,2]=k
outp[i,3]=temp
}}
temp2<-order(0-outp[,3])
icc=c(1:tot)
icc[temp2]=dvec
outp[,4]=icc
}
flag=(outp[,3]<=outp[,4])
outp[,5]=rep(0,tot)
outp[flag,5]=1
dimnames(outp)=list(NULL,c('Group','Group','p.value','p.crit','sig'))
list(n=nv,n.keep=nv.keep,output=outp)
}



OGK<-function(x,niter=2,beta=.9,control){
#
#  OGK estimator via the R package rrcov
#
library(rrcov)
v=CovOgk(x,niter=niter,beta=beta,control)
list(center=v@center,cov=v@cov)
}
wmean.cov<-function(x,tr=0){
#
# Compute Winsoriced mean and covariance for data in x
#
loc=apply(x,2,mean,tr=tr)
cv=wincov(x,tr=tr)
list(center=loc,cov=cv)
}
medhd2g<-function(x, y, alpha = 0.05, nboot = 2000,SEED=TRUE,pr=TRUE, ...){
#
# Compare medians via the Harrell-Davis estimator
#
res=pb2gen(x,y,alpha=alpha,nboot=2000,est=hd,SEED=SEED,pr=pr, ...) 
res
}
med.effect<-function(x,y,HD=TRUE,eq.var=FALSE,nboot=100,loc.fun=median){
#
#  Compute robust analog of Cohen's d using
#  the median and the percentage bend midvariance
#
#  HD=TRUE, use Harrell-Davis estimator
#  HD=FALSE, use usual sample median
#
#  eq.var=FALSE, use explanatory measure of effect size
#  eq.var=TRUE, use analog of Cohen's d.
#
x=elimna(x)
y=elimna(y)
if(HD){
e1=hd(x)
e2=hd(y)
}
if(!HD){
e1=median(x)
e2=median(y)
}
if(eq.var){
s1sq=pbvar(x)
s2sq=pbvar(y)
spsq<-(n1-1)*s1sq+(n2-1)*s2sq
sp<-sqrt(spsq/(n1+n2-2))
dval=(e1-e2)/sp
}
if(!eq.var){
n1=length(x)
n2=length(x)
if(n1==n2)dval=var(c(e1,e2))/pbvar(c(x,y))
if(n1!=n2){
N=min(c(n1,n2))
vals=0
for(i in 1:nboot)vals[i]=med.effect.sub(sample(x,N),sample(y,N),HD=HD)
dval=loc.fun(vals)
}}
dval=sqrt(dval)
dval
}

med.effect.sub<-function(x,y,HD){
if(HD){
e1=hd(x)
e2=hd(y)
}
if(!HD){
e1=median(x)
e2=median(y)
}
val=var(c(e1,e2))/pbvar(c(x,y))
val
}




outms<-function(x,crit=2,plotit=FALSE){
x=elimna(x)
x=as.matrix(x)
if(ncol(x)==1){
z=(x-mean(x))/sd(x)
flag=abs(z)>=crit
out.id=z[flag]
n.out=sum(flag)
nums=c(1:length(x))
keep=nums[!flag]
}
if(ncol(x)>1)stop('Use function	out with outfun=wmean.cov')
list(n.out=n.out,out.value=x[flag],out.id=nums[flag],keep=keep)
}
wmean.cov<-function(x,tr=0){
#
# Compute Winsoriced mean and covariance for data in x
#
loc=apply(x,2,mean,tr=tr)
cv=wincov(x,tr=tr)
list(center=loc,cov=cv)
}

Try the WRS package in your browser

Any scripts or data that you put into this service are public.

WRS documentation built on May 2, 2019, 5:49 p.m.