R/mscor.R

mscor <-
function(m,corfun=spear,cop=3,MM=FALSE,gval=NA,ap=TRUE,pw=TRUE,STAND=TRUE){
#
# m is an n by p matrix
#
# Compute a skipped correlation matrix
#
#  corfun indicates the correlation to be used
#  corfun=pcor uses Pearson's correlation
#  corfun=spear uses Spearman's correlation
#
#  When calling outpro,
#  STAND=T means marginals are first standardized.
# This function returns the p by p matrix of correlations
#
# Method: Eliminate outliers using a projection technique.
# That is, compute Donoho-Gasko median, for each point
# consider the line between it and the median,
# project all points onto this line, and
# check for outliers using a boxplot rule.
# Repeat this for all points. A point is declared
# an outlier if for any projection it is an outlier
# using a modification of the usual boxplot rule.
#
# cop determines how center of the scatterplot is
# estimated; see the function outpro.
# cop=l Donoho-Gasko halfspace median
# cop=2 MCD measure of location
# cop=3 marginal medians
# cop=4 MVE measure of location
#
# Eliminate any outliers and compute
# correlations using remaining data.
#
# gval is critical value for determining whether a point
# is an outlier. It is determined automatically if not specified,
# assuming that Spearman's correlation is used. Critical
# values when using some other correlation have not been
# determined.
#
# Hypothesis of zero correlations tested with FWE=.05
#
# AGRUMENTS:
# MM; see function outpro
# ap=T all pairwise comparisons are tested
# ap=F first variable is tested versus all others
# (for a total of p-1 tests).
# pw=T, print message about high execution time
# pw=F, suppress the message.
#
m<-elimna(m)
p<-ncol(m)
pm<-p-1
n<-nrow(m)
if(p<2)stop("Something wrong; number of variables is < 2")
if(pw && cop==1){
print("If execution time is too high,")
print("use cop=2 or 4 rather than the default value of 1")
}
if(ap){
inter<-c(2.374,2.780,3.030,3.208,3.372,3.502,3.722,3.825,3.943)
slope<-c(5.333,8.8,25.67,32.83,51.53,75.02,111.34,123.16,126.72)
expo<-c(-1,-1,-1.2,-1.2,-1.3,-1.4,-1.5,-1.5,-1.5)
if(p>10){
qvec<-NA
for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i]
pval<-c(2:10)
temp<-lsfit(pval,qvec)$coef
}
}
if(!ap){
inter<-c(2.374,2.54,2.666,2.92,2.999,3.097,3.414,3.286,3.258)
slope<-c(5.333,8.811,14.89,20.59,51.01,52.15,58.498,64.934,59.127)
expo<-c(-1,-1,-1.2,-1.2,-1.5,-1.5,-1.5,-1.5,-1.5)
if(p>10){
qvec<-NA
for(i in 1:9)qvec[i]<-inter[i]+slope[i]*n^expo[i]
pval<-c(1:9)
temp<-lsfit(pval,qvec)$coef
}
}
if(p<=10)crit<-inter[pm]+slope[pm]*n^expo[pm]
if(p>10)crit<-temp[2]*p+temp[1]
if(cop!=1 && is.na(gval))gval<-sqrt(qchisq(.975,ncol(m)))
temp<-outpro(m,plotit=FALSE,MM=MM,gval=gval,cop=cop,STAND=STAND)$keep
mcor<-corfun(m[temp,])$cor
test<-abs(mcor*sqrt((nrow(m)-2)/(1-mcor^2)))
diag(test) <- NA
if(!ap){
test<-as.matrix(test[1,])
}
list(cor=mcor,crit.val=crit,test.stat=test)
}
musto101/wilcox_R documentation built on May 23, 2019, 10:52 a.m.