#' Trimmed means for 2-way ANOVA
#'
#' A heteroscedastic two-way ANOVA for trimmed means using a generalization of Welch's method.
#'
#' @section By default,
#'
#' @aliases t2way
#' @param J number of levels for factor 1
#' @param K number of levels for factor 2
#' @param data contains the raw data stored in list mode, or a matrix with columns corresponding to groups.
#' If stored in list mode, data[[1]] contains the data for the first level of both factors: level 1,1.
#' data[[2]] is assumed to contain the data for level 1 of the first factor and level 2 of the second
#' factor: level 1,2, etc. It is assumed that data has length JK, the total number of groups being tested.
#' @param tr proportion to be trimmed, default is .2
#' @param grp \code{1:p}
#' @param p is a numberic value J*K
#' @param MAT logical; if \code{TRUE} assumes data are stored in matrix with 3 columns with two of the
#' columns indicated by the argument \code{lev.col} specifying the columns of x containing the values of the
#' levels of the two factors.
#' @param lev.col something
#' @param var.col something
#' @param pr logical; print the results?
#' @export t2way
#' @author Rob Cribbie and Phil Chalmers
#' @examples
#' \dontrun{
#' data(twowaythreeway)
#' attach(twowaythreeway)
#'
#' }
t2way<-function(J,K,data,tr=.2,grp=c(1:p),p=J*K,MAT=F,
lev.col=c(1:2),var.col=3,pr=T){
# Perform a J by K (two-way) anova on trimmed means where
# all groups are independent.
#
# The R variable data is assumed to contain the raw
# data stored in list mode, or a matrix with columns
# corresponding to groups. If stored in list mode, data[[1]] contains the data
# for the first level of both factors: level 1,1.
# data][2]] is assumed to contain the data for level 1 of the
# first factor and level 2 of the second factor: level 1,2
#
# The default amount of trimming is tr=.2
#
# It is assumed that data has length JK, the total number of
# groups being tested.
#
# MAT=T, assumes data are stored in matrix with 3 columns
# with two of the columns indicated by the argument
# lev.col
# specifying the columns of x containing the values of the
# levels of the two factors.
# The outcome variable is in column
# var.col
# which defaults to column 3
# That is, this function calls selby2 for you.
#
if(is.data.frame(data))data=as.matrix(data)
if(tr==.5){
print("For medians, use med2way if there are no ties")
print("With ties, use linear contrasts in conjunction with medpb")
stop("")
}
if(MAT){
if(!is.matrix(data))stop("With MAT=T, data must be a matrix")
if(length(lev.col)!=2)stop("Argument lev.col should have 3 values")
temp=selby2(data,lev.col,var.col)
lev1=length(unique(temp$grpn[,1]))
lev2=length(unique(temp$grpn[,2]))
gv=apply(temp$grpn,2,rank)
gvad=10*gv[,1]+gv[,2]
grp=rank(gvad)
if(pr){
print(paste("Factor 1 has", lev1, "levels"))
print(paste("Factor 2 has", lev2, "levels"))
}
if(J!=lev1)warning("J is being reset to the number of levels found")
if(K!=lev2)warning("K is being reset to the number of levels found")
J=lev1
K=lev2
data=temp$x
}
if(is.matrix(data))data=listm(data)
if(!is.list(data))stop("Data are not stored in list mode")
if(p!=length(data)){
print("The total number of groups, based on the specified levels, is")
print(p)
print("The number of groups in data is")
print(length(data))
print("Warning: These two values are not equal")
}
tmeans<-0
h<-0
v<-0
for (i in 1:p){
data[[grp[i]]]=elimna(data[[grp[i]]])
tmeans[i]<-mean(data[[grp[i]]],tr)
h[i]<-length(data[[grp[i]]])-2*floor(tr*length(data[[grp[i]]]))
# h is the effective sample size
v[i]<-(length(data[[grp[i]]])-1)*winvar(data[[grp[i]]],tr)/(h[i]*(h[i]-1))
# v contains the squared standard errors
}
v<-diag(v,p,p) # Put squared standard errors in a diag matrix.
ij<-matrix(c(rep(1,J)),1,J)
ik<-matrix(c(rep(1,K)),1,K)
jm1<-J-1
cj<-diag(1,jm1,J)
for (i in 1:jm1)cj[i,i+1]<-0-1
km1<-K-1
ck<-diag(1,km1,K)
for (i in 1:km1)ck[i,i+1]<-0-1
# Do test for factor A
#cmat<-kron(cj,kron(ik,il)) # Contrast matrix for factor A
cmat<-kron(cj,ik) # Contrast matrix for factor A
#alval<-c(999:1)/1000
alval<-c(1:999)/1000
for(i in 1:999){
irem<-i
Qa<-johan(cmat,tmeans,v,h,alval[i])
if(Qa$teststat>Qa$crit)break
}
A.p.value=irem/1000
# Do test for factor B
cmat<-kron(ij,ck) # Contrast matrix for factor B
for(i in 1:999){
irem<-i
Qb<-johan(cmat,tmeans,v,h,alval[i])
if(Qb$teststat>Qb$crit)break
}
B.p.value=irem/1000
# Do test for factor A by B interaction
cmat<-kron(cj,ck) # Contrast matrix for factor A by B
for(i in 1:999){
irem<-i
Qab<-johan(cmat,tmeans,v,h,alval[i])
if(Qab$teststat>Qab$crit)break
}
AB.p.value=irem/1000
tmeans=matrix(tmeans,J,K,byrow=T)
list(Qa=Qa$teststat,A.p.value=A.p.value,
Qb=Qb$teststat,B.p.value=B.p.value,
Qab=Qab$teststat,AB.p.value=AB.p.value,means=tmeans)
}
selby2<-function(m,grpc,coln=NA){
# Create categories according to the grpc[1] and grpc[2] columns
# of the matrix m. The function puts the values in column coln into
# a vector having list mode.
#
if(is.na(coln))stop("The argument coln is not specified")
if(length(grpc)>4)stop("The argument grpc must have length less than or equal to 4")
x<-vector("list")
ic<-0
if(length(grpc)==2){
cat1<-selby(m,grpc[1],coln)$grpn
cat2<-selby(m,grpc[2],coln)$grpn
for (i1 in 1:length(cat1)){
for (i2 in 1:length(cat2)){
temp<-NA
it<-0
for (i in 1:nrow(m)){
if(sum(m[i,c(grpc[1],grpc[2])]==c(cat1[i1],cat2[i2]))==2){
it<-it+1
temp[it]<-m[i,coln]
}
}
if(!is.na(temp[1])){
ic<-ic+1
x[[ic]]<-temp
if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2]),1,2)
if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2]))
}
}}
}
if(length(grpc)==3){
cat1<-selby(m,grpc[1],coln)$grpn
cat2<-selby(m,grpc[2],coln)$grpn
cat3<-selby(m,grpc[3],coln)$grpn
x<-vector("list")
ic<-0
for (i1 in 1:length(cat1)){
for (i2 in 1:length(cat2)){
for (i3 in 1:length(cat3)){
temp<-NA
it<-0
for (i in 1:nrow(m)){
if(sum(m[i,c(grpc[1],grpc[2],grpc[3])]==c(cat1[i1],cat2[i2],cat3[i3]))==3){
it<-it+1
temp[it]<-m[i,coln]
}}
if(!is.na(temp[1])){
ic<-ic+1
x[[ic]]<-temp
if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3]),1,3)
if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3]))
}}}}
}
if(length(grpc)==4){
cat1<-selby(m,grpc[1],coln)$grpn
cat2<-selby(m,grpc[2],coln)$grpn
cat3<-selby(m,grpc[3],coln)$grpn
cat4<-selby(m,grpc[4],coln)$grpn
x<-vector("list")
ic<-0
for (i1 in 1:length(cat1)){
for (i2 in 1:length(cat2)){
for (i3 in 1:length(cat3)){
for (i4 in 1:length(cat4)){
temp<-NA
it<-0
for (i in 1:nrow(m)){
if(sum(m[i,c(grpc[1],grpc[2],grpc[3],grpc[4])]==c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]))==4){
it<-it+1
temp[it]<-m[i,coln]
}}
if(!is.na(temp[1])){
ic<-ic+1
x[[ic]]<-temp
if(ic==1)grpn<-matrix(c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]),1,4)
if(ic>1)grpn<-rbind(grpn,c(cat1[i1],cat2[i2],cat3[i3],cat4[i4]))
}}}}}
}
list(x=x,grpn=grpn)
}
johan<-function(cmat,vmean,vsqse,h,alpha=.05){
#
# This function is used by other functions that come with this book,
# and it can be used to test hypothesis not covered in the text.
#
# The function performs Johansen's test of C mu = 0 for p independent groups,
# where C is a k by p matrix of rank k and mu is a p by 1 matrix of
# of unknown trimmed means.
# The argument cmat contains the matrix C.
# vmean is a vector of length p containing the p trimmed means
# vsqe is a diagonal matrix containing the squared standard errors of the
# the trimmed means in vmean.
# h is a vector containing the effective sample sizes
#
yvec<-matrix(vmean,length(vmean),1)
if(!is.matrix(vsqse))vsqse<-diag(vsqse)
test<-cmat%*%vsqse%*%t(cmat)
invc<-solve(test)
test<-t(yvec)%*%t(cmat)%*%invc%*%cmat%*%yvec
R<-vsqse%*%t(cmat)%*%invc%*%cmat
A<-sum(diag((diag(R))^2/diag(h-1)))
df<-nrow(cmat)
crit<-qchisq(1-alpha,df)
crit<-crit+(crit/(2*df))*A*(1+3*crit/(df+2))
list(teststat=test[1],crit=crit[1])
}
winvar<-function(x,tr=.2,na.rm=FALSE){
#
# Compute the gamma Winsorized variance for the data in the vector x.
# tr is the amount of Winsorization which defaults to .2.
#
if(na.rm)x<-x[!is.na(x)]
y<-sort(x)
n<-length(x)
ibot<-floor(tr*n)+1
itop<-length(x)-ibot+1
xbot<-y[ibot]
xtop<-y[itop]
y<-ifelse(y<=xbot,xbot,y)
y<-ifelse(y>=xtop,xtop,y)
winvar<-var(y)
winvar
}
selby<-function(m,grpc,coln){
#
#
# A commmon situation is to have data stored in an n by p matrix where
# one or more of the columns are group identification numbers.
# This function groups all values in column coln according to the
# group numbers in column grpc and stores the results in list mode.
#
# More than one column of data can sorted
#
# grpc indicates the column of the matrix containing group id number
#
if(is.null(dim(m)))stop("Data must be stored in a matrix or data frame")
if(is.na(grpc[1]))stop("The argument grpc is not specified")
if(is.na(coln[1]))stop("The argument coln is not specified")
if(length(grpc)!=1)stop("The argument grpc must have length 1")
x<-vector("list")
grpn<-sort(unique(m[,grpc]))
it<-0
for (ig in 1:length(grpn)){
for (ic in 1:length(coln)){
it<-it+1
flag<-(m[,grpc]==grpn[ig])
x[[it]]<-m[flag,coln[ic]]
}}
list(x=x,grpn=grpn)
}
elimna<-function(m){
#
# remove any rows of data having missing values
#
if(is.null(dim(m)))m<-as.matrix(m)
ikeep<-c(1:nrow(m))
for(i in 1:nrow(m))if(sum(is.na(m[i,])>=1))ikeep[i]<-0
elimna<-m[ikeep[ikeep>=1],]
elimna
}
kron<-function(m1,m2){
# compute the Kronecker product of the two matrices m1 and m2.
#
m1<-as.matrix(m1) # Vectors of length p are converted to a p by 1 matrix
m2<-as.matrix(m2)
kron<-vector(mode="numeric",length=0)
for(i in 1:nrow(m1)){
m3<-m1[i,1]*m2
for(j in 2:ncol(m1))m3<-cbind(m3,m1[i,j]*m2)
if(i==1)kron<-m3
if(i>=2)kron<-rbind(kron,m3)
}
kron
}
t1way<-function(x,tr=.2,grp=NA){
#
# A heteroscedastic one-way ANOVA for trimmed means
# using a generalization of Welch's method.
#
# The data are assumed to be stored in $x$ in list mode.
# Length(x) is assumed to correspond to the total number of groups.
# By default, the null hypothesis is that all groups have a common mean.
# To compare a subset of the groups, use grp to indicate which
# groups are to be compared. For example, if you type the
# command grp<-c(1,3,4), and then execute this function, groups
# 1, 3, and 4 will be compared with the remaining groups ignored.
#
# Missing values are automatically removed.
#
if(is.matrix(x)){
y<-list()
for(j in 1:ncol(x))y[[j]]<-x[,j]
x<-y
}
if(is.na(grp[1]))grp<-c(1:length(x))
if(!is.list(x))stop("Data are not stored in a matrix or in list mode.")
J<-length(grp) # The number of groups to be compared
h<-vector("numeric",J)
w<-vector("numeric",J)
xbar<-vector("numeric",J)
for(j in 1:J){
xx<-!is.na(x[[j]])
val<-x[[j]]
x[[j]]<-val[xx] # Remove missing values
h[j]<-length(x[[grp[j]]])-2*floor(tr*length(x[[grp[j]]]))
# h is the number of observations in the jth group after trimming.
w[j]<-h[j]*(h[j]-1)/((length(x[[grp[j]]])-1)*winvar(x[[grp[j]]],tr))
xbar[j]<-mean(x[[grp[j]]],tr)
}
u<-sum(w)
xtil<-sum(w*xbar)/u
A<-sum(w*(xbar-xtil)^2)/(J-1)
B<-2*(J-2)*sum((1-w/u)^2/(h-1))/(J^2-1)
TEST<-A/(B+1)
nu1<-J-1
nu2<-1./(3*sum((1-w/u)^2/(h-1))/(J^2-1))
sig<-1-pf(TEST,nu1,nu2)
list(TEST=TEST,nu1=nu1,nu2=nu2,p.value=sig)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.