Nothing
#' Convert data from wide format to long format
#'
#' The function dataformat_wide_to_long() takes data in wide format (see the example) and convert to long format so that it will be ready for function Heter.test().
#'
#' @param data The data stored in wide format. The first column is the index for subject (named as sub).
#' The second column is index for treatment (named as trt). The remaining columns are named time1, time2, etc. See the example.
#'
#' @return The data in long format.
#'
#' @export
#'
#' @examples
#' # Example of data in wide format
#' # sub trt time1 time2 time3 time4 time5
#' # 1 1 2.4644642 1.7233498 -1.1374695 -0.5242729 -2.379145
#' # 2 1 2.5746848 1.0181738 -0.8325308 -2.4873067 -3.463602
#' # 3 1 2.5813995 -0.7528324 -3.1457645 -3.3135573 -4.364621
#' # 4 1 0.8232141 0.2394987 -2.2073150 -3.3583005 -6.073399
#' # 5 1 0.8274860 0.8323298 -2.1028060 -2.6015848 -3.291307
#' # 1 2 -2.2217084 0.6779049 3.6310542 3.2052691 4.310316
#' # 2 2 -3.3954705 -0.7827040 3.1364749 3.7184895 5.118996
#' #
#' # Data stored in long format
#' # x_{ijk}, k=1, ..., n_i are the kth observation from the ith subject at time j.
#' # 1 1 1 x111
#' # 1 1 2 x112
#' # 1 2 1 x121
#' # 1 2 2 x122
#' # 1 2 3 x123
#'
#' # The following example generate a data set that contains data from
#' # 3 treatments, with 3 subjects in treatment 1, 3 subjects in treatment 2,
#' # and 4 subjects in treatment 3. Each subject contains m=50
#' # repeated observations from Poisson distribution. For the 1st treatment,
#' # the mean vector of the repeated observations from the same subject is
#' # equal to mu1 plus a random effect vector generated by NorRanGen( ).
#' # The m is the number of repeated measurements per subject.
#' f1<-function(m, mu1, raneff) {
#' currentmu=mu1+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' f2<-function(m, mu2, raneff) {
#' currentmu=mu2+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' f3<- function(m, mu3, raneff){
#' currentmu=mu3+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#'
#' # The a is the number of treatments. The mn stores the number of subjects in treatments.
#' a=3; mn=c(3, 3, 4); mu1=3; mu2=3; mu3=3; m=50
#' raneff=NorRanGen(m) # generate random effects with AR(1) structure.
#'
#' # Generate data and store in wide format.
#' datawide=numeric()
#' now=0
#' for (i in 1:a){
#' fi=function(x1, x2) f1(m,x1, x2)*(i==1)+f2(m,x1, x2)*(i==2)+f3(m, x1, x2)*(i==3)
#' mu=mu1*(i==1)+mu2*(i==2)+mu3*(i==3)
#' for (k in 1:mn[i]){
#' now=now+1
#' datawide<-rbind(datawide, c(k, i, fi(mu, raneff) ) )
#' colnames(datawide)=c("sub", "trt", paste("time", seq(m), sep=""))
#' #this is a typical way to store data in practice
#' }
#' } #end of j
#'
#' dat=dataformat_wide_to_long(datawide)
dataformat_wide_to_long= function(data){
m=ncol(data)-2; y=c(t(data[,-c(1,2)]))
Time=rep(1:m, nrow(data))
sub=kronecker( data[,1],rep(1, m))
trt=kronecker( data[,2],rep(1, m))
mydata= cbind(trt, Time, sub, y); mydata
}
#' Generate a vector of random effects with specific correlation structure and given variance
#'
#' Generate a vector of random effects that follow AR(1)
#' correlation structure with rho=exp(-1/m) and variances sigmaj being
#' given or andomly generated from uniform distribution on (1.2, 1.4).
#'
#' @param m the length of the vector of the random effects
#' @param sigmaj standard deviations for the random effect vector. Default is a vector from U(1.2, 1.4).
#'
#' @return the random effect vector of length m
#'
#' @export
#'
#' @examples
#' m=50; raneff=NorRanGen(m)
#' # Note: If X ~ N(0, I), then tran X ~ N(0, A) with
#' # A being the cov matrix of AR(1), which contains the standard deviations sigma and the
#' # correlation coeff rho=exp(-1/m).
#' # i.e. corr= (1 rho rho^2 ... rho^(m-1)
#' # rho 1 rho ... rho^(m-2)
#' # ...................
#' # rho^(m-1) rho^(m-2) ... rho )
#' #
#' # To see the correlation values, run the following example
#' # j1=seq(25); cv=numeric()
#' # for (j in 1:25){
#' # lag=abs(j1-j)/25; cv=rbind(cv, exp(-lag))
#' #}
#' # row.names(cv)=j1; colnames(cv)=j1; cv[1,]
NorRanGen=function(m, sigmaj =stats::runif(m, 1.2, 1.4)){
rho<-exp(-1/m)
covm<-rho^abs(matrix(rep(seq(m), m), m, m)- matrix(rep(seq(m), m), m, m, byrow=T))
tran<-t(chol(covm) )
raneff=sigmaj*tran%*%stats::rnorm(m)
raneff
}
#' A function to calaculate cell residues.
#'
#' This function calcualtes the residual $x_{ijk}-bar{x}_{ij.}$ for the kth replication from the ith group at jth time point.
#'
#' @param x the data matrix in long format, whose first column gives the index i, 2nd columns given the index j, 3rd column gives the index k,
#' and the last column gives the observation $x_{ijk}$.
#' For given i and j, $x_{ijk}, k=1, ..., n_{ij}$ are assumed to be i.i.d. from the same distribution.
#'
#' @param coln takes value 4 or 5. If coln=4, the calculation uses original data; if coln=5, the calculation uses rank. The default value for coln is 5.
#' @param Rij is the mean of all observations in ith treatment at jth measurement calculated in other functions
#' @param Rik is the mean for the kth subject in ith treatment calculated in other functions
#
#' @return The residual to be used in Heter.test()
eu<-function(x, coln, Rij, Rik){
# The data stored in x has the long format below:
# 1 1 1 x111
# 1 1 2 x112
# 1 2 1 x121
# 1 2 2 x122
# 1 2 3 x123
d1<-x[ 1]
d2<-x[ 2]
d3<-x[ 3]
R<-x[ coln]
# e<- R-Rij[d1,d2]+Rik[d1, d3]-mean(Rij[d1,])
u<- R-Rij[d1,d2]
result<-rbind(u,u) #used to be rbind(e,u)
result
# returns a vector with two elements. The first one is e_{ijk}
# and the second is u_{ijk}. But later found no need for e_{ijk}
}
#' Unbiased estimate of $sigma_{ijj1}^2$
#'
#' This function calculatess an unbiased estimate of $sigma_{ijj1}^2$ using the u-statisitic of vectors
#' $x=(x_1, x_2, ..., x_{ni})$ and $y=(y_1, y_2, ..., y_{ni})$,
#' where $X_j$ and $Y_j$ are correlated, but $X_j$ and $Y_{j1}$ are independent if $j ne j1$.
#' Note: $sum_{k1 ne k2 ne k3 ne k4} (x_{k1}-x_{k2)})(y_{k1}-y_{k2)}) (x_{k3}-x_{k4)}) (y_{k3}-y_{k4)})$ is an
#' unbiased est. of $4*ni*(ni-1)*(ni-2)*(ni-3) [E(X_{ijk}-mu_{ij} u_{ij1k}) ]^2$.
#'
#' @param x a vector
#' @param y a vector
#'
#' @return A list containing two variables sigmaijj12 and ssijj1 in it.
#' The sigmaijj12 variable gives an unbiased estimate of $sigma_{ijj1}^2$.
#' The ssijj1 variable gives an unbiased estimate of $sigma_{ijj} sigma_{ij_1j_1}$.
#'
#'
fun.sigijj12<-function(x, y){
ni<-length(x)
sigmaijj12<- 0
ssijj1<- 0
for (m1 in 1:ni){
for (m2 in 1:ni){
for (m3 in 1:ni){
for (m4 in 1:ni){
flag<- (m1!=m2)&(m1 !=m3)&(m1 !=m4) &(m2 !=m3) &(m2 !=m4) & (m3 !=m4)
sigmaijj12<-sigmaijj12+ (flag==T)* ( (x[m1]-x[m2])*(y[m1]-y[m2])* (x[m3]-x[m4])*(y[m3]-y[m4]) )
ssijj1<-ssijj1+(flag==T)* (x[m1]-x[m2])^2*(y[m3]-y[m4])^2
}
}
}
}
sigmaijj12<-sigmaijj12/(4*ni*(ni-1)*(ni-2)*(ni-3) )
ssijj1 <- ssijj1/(4*ni*(ni-1)*(ni-2)*(ni-3) )
result<- list(sigmaijj12=sigmaijj12, ssijj1=ssijj1)
result
}
#' An intermediate function to calculate the partial sums
#'
#' This function is internally used to calculate the partial sums which will
#' then be used in calculating the asymptotic variances of the test statistics in Heter.test()
#'
#' @param u a vector
#' @param ranks a vector corresponds to either the fourth column of data or rank of the observations.
#' @param d1 a vector corresponds to the first column of data in the long format.
#' @param d2 a vector corresponds to the second column of data in the long format.
#' @param d3 a vector corresponds to the third column of data in the long format.
#'
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.
#' @param mcon a vector corresponds to $b^h$ in Theorem 3.2 of Wand and Akritas (2010a)
#' @param coln takes value 4 or 5. Value 4 is for the tests based on original data
#' and value 5 is for the tests based on ranks.
#'
#' @return Asymptotic variances to be used in Heter.test().
taufun<-function(u, ranks, d1, d2, d3, a, b, mn, mcon, coln){
R<-ranks
usigmaijj1<-usigmaijj12<-array(rep(0, a*b*b), c(a, b, b))
usigma2<-0
us <-numeric()
for (i in 1:a){
us[i]<-0
for (j in 1:b){
for (j1 in 1:b){
usigmaijj1[i, j, j1]<-sum(u[((d1==i)&(d2==j))] * u[((d1==i)&(d2==j1))]) /(mn[i]-1) # unbiased est. of $\sigma<-{ijj1}$
# usigmaijj12 sample covariance $\frac{1}{n<-{i}-1} \sum<-{k=1}^{n<-i} (X<-{ijk}-\bar X<-{ij.})(X<-{ij<-1k}-\bar X<-{ij<-1.})$
# gives unbiased estimate of E[(X<-{ijk}-\mu<-{ij}) (X<-{ij<-1k}-\mu<-{ij<-1}) ] and asymptotically
# unbiased estimate of E(e<-{ijk}e<-{ij'k})
x<-R[((d1==i)&(d2==j))]
y<-R[((d1==i)&(d2==j1))]
usigmaijj12[i, j, j1]<-cov_squared_jackknife(x , y)
} # end of j1
usigma2<-usigma2+usigmaijj1[i, j, j]/mn[i]
us[i]<-us[i]+usigmaijj1[i, j, j]/mn[i]
}
}
usigma2<-usigma2/(a*b) # consistent est. of \frac{1}{ab} \sum<-{ij} \sigma_{ijj} /n_i i.e. E(MSE_{\phi})
EMSEphi<-usigma2
# ucsi1<-2*sum(apply(usigmaijj12, 1, sum) /(mn*(mn-1)) )/(a^2*b)
# estimate of $\zeta<-1$ in thm 2.1
# tmpucsi2<-sum( (apply(usigmaijj1/mn, c(2, 3), sum) )^2 ) - sum(usigmaijj1^2 / mn^2)
# ucsi2<-2*tmpucsi2/(a^2*b) #estimate of $\zeta<-2$ in thm 2.1.
# note when $i\ne i'$, they are indept. so we can use usigmaijj1 directly.
pucsi1<-2*apply(usigmaijj12 /(mn*(mn-1)), c(2, 3), sum) /(a^2*b)
pucsi2<-2*((apply(usigmaijj1/mn, c(2, 3), sum) )^2 - apply(usigmaijj1^2 / mn^2, c(2, 3), sum) )/(a^2*b)
ucsi1<-sum(pucsi1) # estimate of $\zeta<-1$ in thm 2.1 using all sum
ucsi2<-sum(pucsi2) #estimate of $\zeta<-2$ in thm 2.1. using all sum
psum<-apply(usigmaijj1/mn, c(2, 3), sum)
zeta1<-zeta2<-partsum<-numeric()
#mc<-c(1/4, 1/3, 2/5, 9/20)
#mc<-mcon
#beyond mcon, b corresponds to the full sum zeta1 and zeta2 in Theorem 3.2 of Wang and Akritas (2010a)
ll<-0
for (l3 in mcon){
ll<-ll+1
tu1<-tu2<-parts0<-0
for (j0 in 1:b){
# for (j2 in seq(round(-b^l3 ), round(b^l3))){
for (j2 in seq(-l3, l3)){
if ((j0+j2>0)& (j0+j2<=b) ) {
tu1<-tu1+pucsi1[j0, j0+j2]
tu2<-tu2+pucsi2[j0, j0+j2]
parts0<- parts0+ psum[j0, j0+j2]
}
}
}
zeta1[ll]<-tu1
zeta2[ll]<-tu2
partsum[ll]<-parts0
}
# zeta1[length(mc)]<- ucsi1
# zeta2[length(mc)]<- ucsi2
# partsum[length(mc)]<- sum(usigmaijj1/mn)
esigma2<- a*b*usigma2/(a*(b-1))- partsum/(a*b*(b-1)) # estimate of E(MSE) if use obs
N<-sum(mn)*b
tauphi2<-taubeta2<- taugamma2<-numeric()
tauphi2<-zeta1 + zeta2/(a-1)^2
taubeta2<-zeta1 + zeta2
taugamma2<-zeta1 + zeta2/(a-1)^2
if (coln==5) {
tauphi2<-tauphi2/N^4
taubeta2<-taubeta2/N^4
taugamma2<-taugamma2/N^4
esigma2<-esigma2/N^2
usigma2<-usigma2/N^2
}
tauphi2[(tauphi2 <=0)] <- 10^(-15)
taubeta2[(taubeta2 <=0)] <-10^(-15)
taugamma2[(taugamma2 <=0)] <-10^(-15)
list( EMSE=esigma2, tauphi2=tauphi2, taubeta2=taubeta2, EMSEphi=usigma2,taugamma2=taugamma2, zeta1=zeta1, zeta2=zeta2)
}
#' Heteroscedastic test for functional data
#'
#' This function conducts the test of main effect of treatment, interaction effect of treatment and time, main effect of time,
#' and simple effect of treament based on original observations (see Wang and Akritas 2010a) or ranks (see Wang and Akritas 2010b).
#'
#' @param data The data in long format (see the example in function dataformat_wide_to_long( )).
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.
#' @param h The h value used in the estimators in Proposition 3.3 of Wang and Akritas (2010a) and Theorem 3.2 of Wang, Higgins, and Blasi (2010).
#' The value of h should be in (0, 0.5) for the test of no main treatment effect
#' or contract effect of treatments. For other tests, h should be in (0, 1).
#' Recommend to use the default value h=0.45 as given in the function.
#' Note: If multiple values are provided to h as a vector, then the calculation
#' will be carried out for each h value, which results in multiple p-values
#' in the returned result for palpha, pbeta, pgamma, pphi.
#'
#' @param method Specifying method='rank' to use rank test. For all other values, the test based on original data will be used.
#' @param Ca Contrast matrix for the contrast effect of the treatments.
#'
#' @return a matrix of p-values for the test of no main
#' treatment effect (palpha), test of no main time effect (pbeta),
#' test of no interaction effect between treatment and time (pgamma),
#' and test of no simple effect of treatment (pphi). For each h value,
#' the p-values are in the same column.
#'
#' @export
#'
#' @examples
#' # Generate a data set that contains data from 3 treatments,
#' # with 3 subjects in treatment 1, 3 subjects in treatment 2,
#' # and 4 subjects in treatment 3. Each subject contains m=50
#' # repeated observations from Poisson distribution. For the 1st treatment,
#' # the mean vector of the repeated observations from the same subject is
#' # equal to mu1 plus a random effect vector generated by NorRanGen( ).
#' # The m is the number of repeated measurements per subject.
#' f1<-function(m, mu1, raneff) {
#' currentmu=mu1+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' f2<-function(m, mu2, raneff) {
#' currentmu=mu2+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' f3<- function(m, mu3, raneff){
#' currentmu=mu3+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' # The a is the number of treatments. The mn stores the number of subjects in treatments.
#' a=3; mn=c(3, 3, 4); mu1=3; mu2=3; mu3=3; m=50
#' # Generate the time effects via random effects with AR(1) structure.
#' raneff=NorRanGen(m)
#' # Generate data and store in wide format.
#' datawide=numeric()
#' now=0
#' for (i in 1:a){
#' fi=function(x1, x2) f1(m,x1, x2)*(i==1)+f2(m,x1, x2)*(i==2)+f3(m, x1, x2)*(i==3)
#' mu=mu1*(i==1)+mu2*(i==2)+mu3*(i==3)
#' for (k in 1:mn[i]){
#' now=now+1
#' datawide<-rbind(datawide, c(k, i, fi(mu, raneff) ) )
#' colnames(datawide)=c("sub", "trt", paste("time", seq(m), sep=""))
#' #this is a typical way to store data in practice
#' }
#' } #end of j
#'
#'# Note:There are different time effects since values in raneff vector are different
#' dat=dataformat_wide_to_long(datawide) #dat is in long format
#' # Define the h value used in Proposition 3.3 of Wang and Akritas (2010a)
#' h=c(0.45, 0.49)
#' #Note: The resulting palpha, pbeta, pgamma, pphi each contains
#' # two p-values, one corresponds to each h value
#' # (see Proposition 3.3 of Wang and Akritas (2010a))
#' # test based on original data.
#' (org= Heter.test(dat, a, m, mn, h, method='original') )
#' #test based on ranks
#' (rankt= Heter.test(dat, a, m, mn, h, method='rank') )
#'
#'
#' @references
#' Haiyan Wang and Michael Akritas (2010a). Inference from heteroscedastic functional data, Journal of Nonparametric Statistics. 22:2, 149-168.
#' DOI: 10.1080/10485250903171621
#'
#' Haiyan Wang and Michael Akritas (2010b). Rank test for heteroscedastic functional data. Journal of Multivariate Analysis. 101: 1791-1805.
#' https://doi.org/10.1016/j.jmva.2010.03.012
#'
#' Haiyan Wang, James Higgins, and Dale Blasi (2010). Distribution-Free
#' Tests For No Effect Of Treatment In Heteroscedastic Functional Data
#' Under Both Weak And Long Range Dependence. Statistics and Probability
#' Letters. 80: 390-402. Doi:10.1016/j.spl.2009.11.016
Heter.test<-function(data,a,b, mn, h=0.45, method='rank', Ca=cbind(as.vector(rep(1, a-1)), -diag(a-1)) ){
if (b>20) mcon=round(b^h) else mcon= rep(4, length(h))
if (method !='rank') coln=4 else {
coln=5; data=cbind(data, rank(data[,4]))
}
N<-sum(mn)*b
d1<-data[, 1]; d2<-data[, 2]; d3<-data[, 3]
ranks<-data[, coln]
Rij<-as.matrix(tapply(ranks, list(d1, d2), mean) )
# matrix with \bar{R}_{ij.} as the (i,j) element
Rik<-as.matrix(tapply(ranks, list(d1, d3), mean) )
# matrix with \bar{R}_{i.k} as the (i,j) element might have NA if unbalanced.
Rik<-replace(Rik, is.na(Rik), 0) # replace NA's in matrix Rik by 0.
# note: after replacement, the number of rows still correct, but the number of columns
# would be all same as max_i n_i instead of n_i columns for the ith row.
Ri<-apply(Rij, 1, mean) # returns a vector (\wtR_{1..}, ..., \wtR_{a..})
Rj<-apply(Rij, 2, mean)
Rim<-kronecker(Ri, t(as.vector(rep(1,b))) )
# an a by b matrix with all elements of the ith row same as \wtR_{i..}
Rjm<-kronecker(t(Rj), as.vector(rep(1,a)))
# an a by b matrix with all elements of the ith column same as \wtR_{.j.}
## calculate test statistics
MSbeta<- a* sum( (Rj-mean(Rj))^2 ) /(b-1)
MSgamma<- sum((Rij-Rim-Rjm+mean(Ri) )^2 )/((a-1)*(b-1) )
MSphi<- sum((Rij-Rjm)^2 )/((a-1)*b )
tmp1<-tapply(ranks^2, d1, sum)
# returns a vector with ith element \sum_{j=1}^b \sum_{k=1}^{n_i} R_{ijk}^2
tmp2<-sum( tmp1/(mn*(mn-1)) ) # \sum_{i,j, k}\frac{X_{ijk}^2}{n_i(n_i-1)}
MSEphi<- tmp2/(a*b) - mean( apply(Rij^2, 1, mean)/(mn-1) )
MSE<- MSEphi *b/(b-1) - mean(apply(Rik^2, 1, sum)/(mn*(mn-1)) ) *b/(b-1) + mean(Ri^2/(mn-1)) *b/(b-1)
## another way to calculate MSE
# MSE<- tmp2/(a*(b-1)) - mean(apply(Rik^2, 1, sum)/(mn*(mn-1)) ) *b/(b-1) - mean(apply((Rij-Rim)^2, 1, sum)/(mn-1) )/(b-1)
Fbeta<-MSbeta/MSE
Fgamma<-MSgamma/MSE
Fphi<-MSphi/MSEphi
Dbeta<-MSbeta-MSE
Dgamma<-MSgamma-MSE
Dphi<-MSphi-MSEphi
euijk<-apply(data, 1, e<-function(x) eu(x, coln, Rij, Rik))
e<-euijk[1,] # returns the e_{ijk} as a vector, same as ranks structure
u<-euijk[2,] # returns the u_{ijk} as a vector, same as ranks structure
vars<-taufun(u, ranks, d1, d2, d3, a, b, mn,mcon, coln)
TSbeta<-sqrt(b)*(Fbeta-1)
TSgamma<-sqrt(b)*(Fgamma-1)
TSphi<-sqrt(b)*(Fphi-1)
DSbeta<-sqrt(b)*Dbeta
DSgamma<-sqrt(b)*Dgamma
DSphi<-sqrt(b)*Dphi
if (coln==5) {
DSbeta<-DSbeta/N^2
DSgamma<-DSgamma/N^2
DSphi<-DSphi/N^2
}
varTSbeta<-vars$taubeta2/vars$EMSE^2
varTSgamma<-vars$taugamma2/vars$EMSE^2
varTSphi<-vars$tauphi2/vars$EMSEphi^2
pbeta<- 1-stats::pnorm( TSbeta/sqrt(varTSbeta) )
pphi<- 1-stats::pnorm( TSphi/sqrt(varTSphi) )
pgamma<- 1- stats::pnorm(TSgamma/sqrt(varTSgamma) )
Dpbeta<- 1-stats::pnorm( DSbeta/sqrt(vars$taubeta2) )
Dpphi<- 1-stats::pnorm( DSphi/sqrt(vars$tauphi2) )
Dpgamma<- 1- stats::pnorm(DSgamma/sqrt(vars$taugamma2) )
# test for main treatment effect
#Ca<-cbind(as.vector(rep(1, a-1)), -diag(a-1))
N<-sum(mn)*b
etai<-matrix(0, length(mcon), a)
ll<-0
for (l in mcon){
ll<-ll+1
for (i in 1:a){
for (j in 1:b){
# for (j1 in seq(round(-b^l ), round(b^l))){
for (j1 in seq(-l, l) ){
if ((j+j1>0)& (j+j1<=b) ) {
tmpinc<- sum((ranks[((d1==i)&(d2==j))] -Rij[i, j])*(ranks[((d1==i)&(d2==(j+j1)))] -Rij[i, (j+j1)]))*sum(mn)/(b*mn[i]*(mn[i]-1))
etai[ll, i]<-etai[ll, i]+tmpinc
}
} #end of j1
} #end of j
} #end of i
} # end of l
Ri<-as.vector(Ri)
TSalphastat<-function(etaii, Ca, N, Ri) N * t(Ri)%*% t(Ca)%*%solve( Ca%*% diag(etaii)%*% t(Ca) ) %*% Ca %*% Ri
TSalpha<- apply(etai, 1, TSalpha<-function(etaii) {TSalphastat(etaii, Ca, N, Ri) } )
palpha<-1-stats::pchisq(TSalpha, nrow(Ca))
palpha1<- palpha
results=rbind(palpha, pbeta, pgamma, pphi)
row.names(results)=c('palpha', 'pbeta','pgamma', 'pphi')
colnames(results)=paste0('h=', h)
#results=list(palpha=palpha, pbeta=pbeta, pgamma=pgamma, pphi=pphi)
#names(results)=c("palpha", "pbeta", "pgamma", "pphi")
# list(TSalpha=TSalpha, Dpalpha=palpha1, Dpbeta=Dpbeta, Dpgamma=Dpgamma, Dpphi=Dpphi, palpha=palpha1, pbeta=pbeta, pgamma=pgamma, pphi=pphi,
# results=results)
results
}
#' Classical F test for mixed effects model
#'
#'#' This function conducts the test of main effect of treatment, interaction effect of treatment and time, main effect of time,
#' and simple effect of treament based on mixed effects models
#'
#' @param data The data in long format (see the example in function dataformat_wide_to_long( )).
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.
#' @return a list of p-values from mixed effect model for the test of no main
#' treatment effect (palpha), test of no main time effect (pbeta),
#' test of no interaction effect between treatment and time (pgamma),
#' and test of no simple effect of treatment (pphi).
#'
CF<-function(data,a,b, mn){
coln=4
ranks<-data[, coln]
d1<-data[, 1]
d2<-data[, 2]
d3<-data[, 3]
Rij<-as.matrix(tapply(ranks, list(d1, d2), mean) )
# matrix with \bar{R}_{ij.} as the (i,j) element
Rik<-as.matrix(tapply(ranks, list(d1, d3), mean) )
# matrix with \bar{R}_{i.k} as the (i,j) element might have NA if unbalanced.
Rik<-replace(Rik, is.na(Rik), 0) # replace NA's in matrix Rik by 0.
# note: after replace, the number of rows still correct, but the number of columns
# would be all same as max_i n_i instead of n_i columns for the ith row.
Rbari<-as.vector(tapply(ranks, d1, mean) ) #vector with ith element \bar{R}_{i..}
Rbarj<-as.vector(tapply(ranks, d2, mean) ) #vector with ith element \bar{R}_{.j.}
Rim<-kronecker(Rbari, t(as.vector(rep(1,b))) )
# a a by b matrix with all elements of the ith row same as \wtR_{i..}
Rjm<-kronecker(t(Rbarj), as.vector(rep(1,a)))
# a a by b matrix with all elements of the ith column same as \wtR_{.j.}
## calculate test statistics
MSalpha<- sum(mn*(Rbari-mean(ranks))^2)*b/(a-1)
MSb<- (sum(Rik^2)-sum(Rbari^2*mn) )*b/(sum(mn)-a)
Falpha<-MSalpha/MSb
MSphi<- sum(mn*apply((Rij-Rjm)^2 , 1, sum) )/((a-1)*b )
MSEphi<- (sum(ranks^2)- sum(mn*apply(Rij^2, 1, sum)) )/(b*(sum(mn)-a))
Fphi<-MSphi/MSEphi
MSbeta<- sum(mn)* sum( (Rbarj-mean(ranks))^2 ) /(b-1)
MSgamma<- sum(mn*apply((Rij-Rim-Rjm+mean(ranks) )^2, 1, sum ))/((a-1)*(b-1) )
MSE<- MSEphi*b/(b-1) - MSb/(b-1)
Fbeta<-MSbeta/MSE
Fgamma<-MSgamma/MSE
palpha<-1-stats::pf(Falpha, a-1, sum(mn)-a )
pbeta<-1-stats::pf(Fbeta, b-1, (sum(mn)-a)*(b-1) )
pgamma<-1-stats::pf(Fgamma, (a-1)*(b-1), (sum(mn)-a)*(b-1) )
pphi<-1-stats::pf(Fphi, (a-1)*b, (sum(mn)-a)*b )
list(palpha=palpha, pbeta=pbeta, pgamma=pgamma, pphi=pphi)
}
#' Unbiased estimate of squared variance $sigma^4$ based on U-statistic of an i.i.d. sample
#'
#' This function takes a random sample and produces an unbiased estimate of the squared variance based on
#' the U-statistic $sum_{k1 ne k2 ne k3 ne k4} (x_{k1}-x_{k2)})^2 (x_{k3}-x_{k4)})^2$.
#'
#' @param x is a vector of i.i.d. observations.
#'
#' @return unbiased estimate of squared variance $sigma^4$, where $sigma^2$ is the variance.
#'
#' @export
#'
#' @examples
#' x=stats::rnorm(10)
#' sigma4(x)
sigma4<-function(x){
nij<-length(x)
sigmaij4<- 0
for (m1 in 1:nij){
for (m2 in 1:nij){
for (m3 in 1:nij){
for (m4 in 1:nij){
flag<- (m1!=m2)&(m1 !=m3)&(m1 !=m4) &(m2 !=m3) &(m2 !=m4) & (m3 !=m4)
sigmaij4<-sigmaij4+ (flag==T)*(x[m1]-x[m2])^2 * (x[m3]-x[m4])^2
}
}
}
}
sigmaij4<-sigmaij4/(4*nij*(nij-1)*(nij-2)*(nij-3) )
sigmaij4
}
thetahat<-function(x) (mean((x-mean(x))^2 ))^2
#' Jackknife estimate of $sigma^4$ using an i.i.d. sample
#'
#' This function takes a random sample and approximates an unbiased estimate of the squared variance based on
#' Jackknife estimate.
#'
#' @param x is a vector of i.i.d. observations.
#'
#' @return Jackknife estimate of $sigma^4$, where $sigma^2$ is the variance.
#'
#' @export
#'
#' @examples
#' x=stats::rnorm(10)
#' sigma4jackknife(x)
sigma4jackknife<-function(x){
n<-length(x)
s4hat<- thetahat(x)
result <- n *s4hat
for (i in 1:n) result<-result- (n-1)/n* thetahat(x[-i])
result
}
#' Bootstrap estimate of $sigma^4$ using an i.i.d. sample
#'
#' This function takes a random sample and approximates an unbiased estimate of the squared variance based on
#' Bootstrap estimate.
#'
#' @param x is a vector of i.i.d. observations.
#'
#' @return Bootstrap estimate of $ sigma^4$, where $sigma^2$ is the variance.
#'
#' @export
#'
#' @examples
#' x=stats::rnorm(10)
#' sigma4bootstrap(x)
sigma4bootstrap<-function(x){
n<-length(x)
thetahatstar<-mean(apply(matrix(sample(x, 3*1000, replace = T), 1000, 3), 1, thetahat ) )
result<-2*thetahat(x)-thetahatstar
result
}
#' Jackknife estimate of the squared covariance between two time points
#'
#' This function provides the Jackknife estimate of the squared
#' covariance using i.i.d. obervations from one variable given in vector \code{x}
#' and i.i.d. observations from a second variable given in vector \code{y}.
#'
#' @param x a vector of i.i.d. observations
#' @param y a second vector of i.i.d. observations
#'
#' @return Jackknife estimate of the squared covariance
cov_squared_jackknife<-function(x, y){
# Jackknife estimate of \sigma_{ijj'}^2
# x=(X_{ij1}, \cdots, X_{ijn_i}), y=(X_{ij'1}, \cdots, X_{ij'n_i}),
thetahatijj1<-function(x, y) (mean((x-mean(x))*(y-mean(y)) ))^2
n<-length(x)
s4hat<- thetahatijj1(x, y)
result <- n *s4hat
for (i in 1:n) result<-result- (n-1)/n* thetahatijj1(x[-i], y[-i])
result
}
#' Test of no contrast effect of the treatments
#'
#' This function conducts the test of no contract effect of treatments
#' based on Theorem 3.2 of Wang, Higgins, and Blasi (2010).
#'
#' @param data The data in long format (see the example in function dataformat_wide_to_long( )).
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.
#' @param h The h value used in the estimators in Theorem 3.2 of Wang, Higgins, and Blasi (2010).
#' The value of h should be in (0, 0.5).
#' Recommend to use the default value h=0.45 as given in the function.
#' Note: If multiple values are provided to h as a vector, then the calculation
#' will be carried out for each h value, which results in multiple p-values
#' in the returned result.
#'
#' @param method Specifying method='rank' to use rank test. For all other values, the test based on original data will be used.
#' @param Ca Contrast matrix for the contrast effect of the treatments.
#' The default contrast corresponds to the main treatment effect.
#'
#' @return a matrix that contains the test statistics and pvalues for each h value.
#'
#' @export
#'
#' @examples
#' # Generate a data set that contains data from 3 treatments,
#' # with 3 subjects in treatment 1, 3 subjects in treatment 2,
#' # and 4 subjects in treatment 3. Each subject contains m=50
#' # repeated observations from Poisson distribution. For the 1st treatment,
#' # the mean vector of the repeated observations from the same subject is
#' # equal to mu1 plus a random effect vector generated by NorRanGen( ).
#' # The m is the number of repeated measurements per subject.
#' f1<-function(m, mu1, raneff) {
#' currentmu=mu1+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' f2<-function(m, mu2, raneff) {
#' currentmu=mu2+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' f3<- function(m, mu3, raneff){
#' currentmu=mu3+raneff;
#' currentmu[abs(currentmu)<1e-2]=1e-2;
#' rpois(m, abs(currentmu))}
#' # The a is the number of treatments. The mn stores the number of subjects in treatments.
#' a=3; mn=c(3, 3, 4); mu1=3; mu2=3; mu3=2; m=50
#' # Note treatment 3 has mean mu3=2, which is different from the mean of
#' # the other two treatments.
#' # Generate the time effects via random effects with AR(1) structure.
#' raneff=NorRanGen(m)
#' # Generate data and store in wide format.
#' datawide=numeric()
#' now=0
#' for (i in 1:a){
#' fi=function(x1, x2) f1(m,x1, x2)*(i==1)+f2(m,x1, x2)*(i==2)+f3(m, x1, x2)*(i==3)
#' mu=mu1*(i==1)+mu2*(i==2)+mu3*(i==3)
#' for (k in 1:mn[i]){
#' now=now+1
#' datawide<-rbind(datawide, c(k, i, fi(mu, raneff) ) )
#' colnames(datawide)=c("sub", "trt", paste("time", seq(m), sep=""))
#' #this is a typical way to store data in practice
#' }
#' } #end of j
#'
#'# Note:There are different time effects since values in raneff vector are different
#' dat=dataformat_wide_to_long(datawide) #dat is in long format
#' #Note: For each h value below, the test statistic and p-value are calculated.
#' # (see Theorem 3.2 of Wang, Higgins, and Blasi (2010))
#'
#' tcontrast(dat, a, m, mn, h=c(0.45, 0.49), method='original')
#'
#' @references
#' Haiyan Wang and Michael Akritas (2010a). Inference from heteroscedastic functional data, Journal of Nonparametric Statistics. 22:2, 149-168.
#' DOI: 10.1080/10485250903171621
#'
#' Haiyan Wang and Michael Akritas (2010b). Rank test for heteroscedastic functional data. Journal of Multivariate Analysis. 101: 1791-1805.
#' https://doi.org/10.1016/j.jmva.2010.03.012
#'
#' Haiyan Wang, James Higgins, and Dale Blasi (2010). Distribution-Free
#' Tests For No Effect Of Treatment In Heteroscedastic Functional Data
#' Under Both Weak And Long Range Dependence. Statistics and Probability
#' Letters. 80: 390-402. Doi:10.1016/j.spl.2009.11.016
tcontrast<-function(data,a,b, mn, h=0.45, method='rank', Ca=cbind(as.vector(rep(1, a-1)), -diag(a-1)) ){
if (b>20) mcon=round(b^h) else mcon= rep(4, length(h))
if (method !='rank') coln=4 else {
coln=5; data=cbind(data, rank(data[,4]))
}
N<-sum(mn)*b
d1<-data[, 1]
d2<-data[, 2]
d3<-data[, 3]
ranks<-data[, coln]
Rij<-as.matrix(tapply(ranks, list(d1, d2), mean) )
# matrix with \bar{R}<-{ij.} as the (i,j) element
Ri<-apply(Rij, 1, mean) # returns a vector (\wtR<-{1..}, ..., \wtR<-{a..})
N<-sum(mn)*b
etai<-matrix(0, length(mcon), a)
ll<-0
for (l in mcon){
ll<-ll+1
for (i in 1:a){
for (j in 1:b){
# for (j1 in seq(round(-b^l ), round(b^l))){
for (j1 in seq(-l, l) ){
if ((j+j1>0)& (j+j1<=b) ) {
tmpinc<- sum((ranks[((d1==i)&(d2==j))] -Rij[i, j])*(ranks[((d1==i)&(d2==(j+j1)))] -Rij[i, (j+j1)]))*sum(mn)/(b*mn[i]*(mn[i]-1))
etai[ll, i]<-etai[ll, i]+tmpinc
}
} #end of j1
} #end of j
} #end of i
} # end of l
Ri<-as.vector(Ri)
TSalphastat<-function(etaii, Ca, N, Ri) N * t(Ri)%*% t(Ca)%*%solve( Ca%*% diag(etaii)%*% t(Ca) ) %*% Ca %*% Ri
TSalpha<- apply(etai, 1, TSalpha<-function(etaii) {TSalphastat(etaii, Ca, N, Ri) } )
palpha<-1-stats::pchisq(TSalpha, nrow(Ca))
palpha1<- palpha
result=rbind(TSalpha, palpha1)
row.names(result)=c('Test_Statistic', 'p_value')
colnames(result)=paste0('h=', h)
result
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.