R/TwoSamplesHT2.R In MVTests: Multivariate Hypothesis Tests

Documented in TwoSamplesHT2

#' @title Two Independent Samples Hotelling T^2 Test
#'
#' @description
#' \code{TwoSamplesHT2} function computes Hotelling T^2 statistic for two
#' independent samples and gives confidence intervals.
#'
#' @details
#' This function computes two independent samples Hotelling T^2 statistics
#' that is used to test
#' whether two population mean vectors are equal to each other.
#' When \code{H0} is rejected, this function computes confidence intervals
#' for all variables to determine variable(s) affecting on rejection decision.
#' Moreover, when covariance matrices are not homogeneity, the approach proposed
#' by D. G. Nel and V. D. Merwe (1986) is used.
#'
#' @importFrom stats cor cov pchisq pf pnorm qchisq qf var
#' @param data a data frame.
#' @param group a group vector consisting of 1 and 2 values.
#' @param alpha Significance Level that will be used for confidence intervals.
#' default=0.05
#' @param Homogenity a logical argument. If sample covariance matrices are
#' homogeneity,then \code{Homogenity=TRUE}. Otherwise \code{Homogenity=FALSE}
#' The homogeneity of covariance matrices can be investigated with \code{BoxM} function.
#' @export
#'
#' @return a list with 8 elements:
#' \item{HT2}{The value of Hotelling T^2 Test Statistic}
#' \item{F}{The value of F Statistic}
#' \item{df}{The F statistic's degree of freedom}
#' \item{p.value}{p value}
#' \item{CI}{The lower and upper limits of confidence intervals obtained for
#' all variables}
#' \item{alpha}{The alpha value using in confidence intervals}
#' \item{Descriptive1}{Descriptive Statistics for the first group}
#' \item{Descriptive2}{Descriptive Statistics for the second group}
#' @references Rencher, A. C. (2003). Methods of multivariate analysis
#' (Vol. 492). John Wiley & Sons.
#' @references Tatlidil, H. (1996). Uygulamali Cok Degiskenli Istatistiksel
#' Yontemler. Cem Web.
#' @references D.G. Nel & C.A. Van Der Merwe (1986) A solution to the
#' multivariate behrens fisher problem, Communications in Statistics:Theory and
#'  Methods, 15:12, 3719-3735
#' @author Hasan BULUT <hasan.bulut@omu.edu.tr>
#' @examples
#'
#' data(iris)
#' G<-c(rep(1,50),rep(2,50))
#' # When covariances matrices are homogeneity
#' results1 <- TwoSamplesHT2(data=iris[1:100,1:4],group=G,alpha=0.05)
#' summary(results1)
#' # When covariances matrices are not homogeneity
#' results2 <- TwoSamplesHT2(data=iris[1:100,1:4],group=G,Homogenity=FALSE)
#' summary(results2)

TwoSamplesHT2<-function(data,group,alpha=0.05,Homogenity=TRUE){
Name<-"TwoSamplesHT2"

# When Homogenity=TRUE
if (Homogenity=="TRUE") {
data<-as.matrix(data)
X1<-data[which(group==1),]
X2<-data[which(group==2),]
n1<-nrow(X1); n2<-nrow(X2);n<-n1+n2
p<-ncol(data)
X1.mean<-colMeans(X1);  X2.mean<-colMeans(X2)
diff<-X1.mean-X2.mean
S1<-cov(X1);  S2<-cov(X2)
Sigma<-(1/(n1+n2-2))*((n1-1)*S1+(n2-1)*S2)
T2<-(n1*n2/n)*(t(diff)%*%solve(Sigma)%*%(diff))
F<-((n-p-1)/(p*(n-2)))*T2
T2.table<-((p*(n-2))/(n-p-1))*qf(df1=p,df2=n-p-1,p=alpha,lower.tail=FALSE)
pval<-pf(F, df1=p, df2=(n-p-1), lower.tail = FALSE)
df<-c(p,n-p-1)

Low<-Up<-imp<-NULL
for (i in 1:p) {
a<-c(rep(0,p));a[i]<-1
Low[i]<-a%*%diff-sqrt(T2.table*(n/(n1*n2))*Sigma[i,i])
Up[i]<-a%*%diff+sqrt(T2.table*(n/(n1*n2))*Sigma[i,i])
if (Low[i]<=0& Up[i]>=0) {
imp[i]<-"FALSE"
}else {
imp[i]<-"*TRUE*"
} }
CI<-data.frame(Lower=Low,Upper=Up,Import=imp)
colnames(CI)<-c("Lower","Upper","Important Variables?")
rownames(CI)<-colnames(data)

} else {  # When Homogenity=FALSE
data<-as.matrix(data)
X1<-data[which(group==1),]
X2<-data[which(group==2),]
n1<-nrow(X1); n2<-nrow(X2);n<-n1+n2
p<-ncol(data)
X1.mean<-colMeans(X1);  X2.mean<-colMeans(X2)
diff<-X1.mean-X2.mean
S1<-cov(X1);  S2<-cov(X2)
SS1<-S1*(1/n1); SS2<-S2*(1/n2)
S<-SS1+SS2
T2<-(t(diff)%*%solve(S)%*%(diff))

B1<-(sum(diag((SS1%*%SS1)))+(sum(diag((SS1))))^2)/(n1-1)
B2<-(sum(diag((SS2%*%SS2)))+(sum(diag((SS2))))^2)/(n2-1)

pay<-sum(diag((S%*%S)))+(sum(diag((S))))^2
payda<-B1+B2

v<-pay/payda    #Equation 4.7 (Nel and Merwe, 1986)

F<-((v-p+1)/(p*v))*T2
T2.table<-((p*v)/(v-p+1))*qf(df1=p,df2=v-p+1,p=alpha,lower.tail=FALSE)
pval<-pf(F, df1=p, df2=(v-p+1), lower.tail = FALSE)
df<-c(p,v-p+1)

Low<-Up<-imp<-NULL
for (i in 1:p) {
a<-c(rep(0,p));a[i]<-1
Low[i]<-a%*%diff-sqrt(T2.table*S[i,i])
Up[i]<-a%*%diff+sqrt(T2.table*S[i,i])
if (Low[i]<=0& Up[i]>=0) {
imp[i]<-"FALSE"
}else {
imp[i]<-"*TRUE*"
} }
CI<-data.frame(Lower=Low,Upper=Up,Import=imp)
colnames(CI)<-c("Lower","Upper","Important Variables?")
rownames(CI)<-colnames(data)

}

#1. Group
Desc1<-rbind(X1.mean,sqrt(diag(S1)))
colnames(Desc1)<-colnames(X1)
rownames(Desc1)<-c("Means","Sd")

#2. Group
Desc2<-rbind(X2.mean,sqrt(diag(S2)))
colnames(Desc2)<-colnames(X2)
rownames(Desc2)<-c("Means","Sd")

results <- list(HT2=T2,F=F, df=df,p.value=pval,CI=CI,alpha=alpha,
Descriptive1=Desc1,Descriptive2=Desc2,Test=Name)
class(results)<-c("MVTests","list")
return(results)
}


Try the MVTests package in your browser

Any scripts or data that you put into this service are public.

MVTests documentation built on Nov. 3, 2023, 5:11 p.m.