R/functions.R

Defines functions tcontrast cov_squared_jackknife sigma4bootstrap sigma4jackknife thetahat sigma4 CF Heter.test taufun fun.sigijj12 eu NorRanGen dataformat_wide_to_long

Documented in CF cov_squared_jackknife dataformat_wide_to_long eu fun.sigijj12 Heter.test NorRanGen sigma4 sigma4bootstrap sigma4jackknife taufun tcontrast

#' Convert data from wide format to long format
#'
#' The function dataformat_wide_to_long() takes data in wide format (see the example) and convert to long format so that it will be ready for function Heter.test().
#'
#' @param data The data stored in wide format. The first column is the index for subject (named as sub).
#' The second column is index for treatment (named as trt). The remaining columns are named time1, time2, etc. See the example.
#'
#' @return The data in long format.
#'
#' @export
#'
#' @examples
#' # Example of data in wide format
#' # sub trt      time1          time2         time3           time4         time5
#'  # 1   1  2.4644642  1.7233498 -1.1374695 -0.5242729 -2.379145
#'  # 2   1  2.5746848  1.0181738 -0.8325308 -2.4873067 -3.463602
#'  # 3   1  2.5813995 -0.7528324 -3.1457645 -3.3135573 -4.364621
#'  # 4   1  0.8232141  0.2394987 -2.2073150 -3.3583005 -6.073399
#'  # 5   1  0.8274860  0.8323298 -2.1028060 -2.6015848 -3.291307
#'  # 1   2 -2.2217084  0.6779049  3.6310542  3.2052691  4.310316
#'  # 2   2 -3.3954705 -0.7827040  3.1364749  3.7184895  5.118996
#'  #
#'  # Data stored in long format
#'  # x_{ijk}, k=1, ..., n_i are the kth observation from the ith subject at time j.
#'  # 1 1 1 x111
#'  # 1 1 2 x112
#'  # 1 2 1 x121
#'  # 1 2 2 x122
#'  # 1 2 3 x123
#'
#'  #  The following example generate a data set that contains data from
#'  # 3 treatments, with 3 subjects in treatment 1, 3 subjects in treatment 2,
#'  # and 4 subjects in treatment 3. Each subject contains m=50
#'  # repeated observations from Poisson distribution. For the 1st treatment,
#'  # the mean vector of the repeated observations from the same subject is
#'  # equal to mu1 plus a random effect vector generated by NorRanGen( ).
#'  # The m is the number of repeated measurements per subject.
#'  f1<-function(m, mu1, raneff) {
#'      currentmu=mu1+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#'  f2<-function(m, mu2, raneff) {
#'      currentmu=mu2+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#'  f3<- function(m, mu3, raneff){
#'      currentmu=mu3+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#'
#'  # The a is the number of treatments. The mn stores the number of subjects in treatments.
#'  a=3;  mn=c(3, 3, 4); mu1=3; mu2=3; mu3=3; m=50
#'  raneff=NorRanGen(m) # generate random effects with AR(1) structure.
#'
#'  # Generate data and store in wide format.
#' datawide=numeric()
#' now=0
#' for (i in 1:a){
#'    fi=function(x1, x2) f1(m,x1, x2)*(i==1)+f2(m,x1, x2)*(i==2)+f3(m, x1, x2)*(i==3)
#'    mu=mu1*(i==1)+mu2*(i==2)+mu3*(i==3)
#'    for (k in 1:mn[i]){
#'           now=now+1
#'           datawide<-rbind(datawide, c(k, i, fi(mu, raneff) )   	)
#'           colnames(datawide)=c("sub", "trt", paste("time", seq(m), sep=""))
#'           #this is a typical way to store data in practice
#'     }
#'  } #end of j
#'
#'  dat=dataformat_wide_to_long(datawide)
dataformat_wide_to_long= function(data){
   m=ncol(data)-2;    y=c(t(data[,-c(1,2)]))
   Time=rep(1:m, nrow(data))
   sub=kronecker( data[,1],rep(1, m))
   trt=kronecker( data[,2],rep(1, m))
   mydata= cbind(trt, Time, sub, y);  mydata
}

#' Generate a vector of random effects with specific correlation structure and given variance
#'
#' Generate a vector of random effects that follow AR(1)
#' correlation structure with rho=exp(-1/m) and variances sigmaj being
#' given or andomly generated from uniform distribution on (1.2, 1.4).
#'
#' @param m the length of the vector of the random effects
#' @param sigmaj standard deviations for the random effect vector. Default is a vector from U(1.2, 1.4).
#'
#' @return the random effect vector of length m
#'
#' @export
#'
#' @examples
#'    m=50;  raneff=NorRanGen(m)
#' # Note: If X ~ N(0, I), then tran X ~ N(0, A) with
#' # A being the cov matrix of AR(1), which contains the standard deviations sigma and the
#' # correlation coeff rho=exp(-1/m).
#' # i.e. corr= (1  rho  rho^2 ... rho^(m-1)
#' #             rho 1   rho   ... rho^(m-2)
#' #                 ...................
#' #             rho^(m-1) rho^(m-2) ... rho )
#' #
#' # To see the correlation values, run the following example
#' # j1=seq(25); cv=numeric()
#' # for (j in 1:25){
#' #  lag=abs(j1-j)/25; cv=rbind(cv, exp(-lag))
#' #}
#' # row.names(cv)=j1;  colnames(cv)=j1;  cv[1,]
NorRanGen=function(m, sigmaj =stats::runif(m, 1.2, 1.4)){
    rho<-exp(-1/m)
    covm<-rho^abs(matrix(rep(seq(m), m), m, m)- matrix(rep(seq(m), m), m, m, byrow=T))
    tran<-t(chol(covm) )
    raneff=sigmaj*tran%*%stats::rnorm(m)
    raneff
}


#' A function to calaculate cell residues.
#'
#' This function calcualtes the residual $x_{ijk}-bar{x}_{ij.}$ for the kth replication from the ith group at jth time point.
#'
#' @param x the data matrix in long format, whose first column gives the index i, 2nd columns given the index j, 3rd column gives the index k,
#' and the last column gives the observation $x_{ijk}$.
#' For given i and j,  $x_{ijk}, k=1, ..., n_{ij}$ are assumed to be i.i.d. from the same distribution.
#'
#' @param coln takes value 4 or 5. If coln=4, the calculation uses original data;  if coln=5, the calculation uses rank. The  default value for coln is 5.
#' @param Rij is the mean of all observations in ith treatment at jth measurement calculated in other functions
#' @param Rik is the mean for the kth subject in ith treatment calculated in other functions
#
#' @return The residual to be used in Heter.test()
eu<-function(x, coln, Rij, Rik){
  # The data stored in x has the long format below:
  # 1 1 1 x111
  # 1 1 2 x112
  # 1 2 1 x121
  # 1 2 2 x122
  # 1 2 3 x123
          d1<-x[ 1]
 	      d2<-x[ 2]
        d3<-x[ 3]
        R<-x[ coln]
        # e<- R-Rij[d1,d2]+Rik[d1, d3]-mean(Rij[d1,])
        u<- R-Rij[d1,d2]
        result<-rbind(u,u)   #used to be rbind(e,u)
        result
        # returns a vector with two elements. The first one is e_{ijk}
        # and the second is u_{ijk}. But later found no need for e_{ijk}

        }





#'  Unbiased estimate of  $sigma_{ijj1}^2$
#'
#' This function calculatess an unbiased estimate of  $sigma_{ijj1}^2$ using the u-statisitic of vectors
#' $x=(x_1, x_2, ..., x_{ni})$ and  $y=(y_1, y_2, ..., y_{ni})$,
#'  where $X_j$ and $Y_j$ are correlated, but $X_j$ and $Y_{j1}$ are independent if $j ne j1$.
#' Note: $sum_{k1 ne k2 ne k3 ne k4} (x_{k1}-x_{k2)})(y_{k1}-y_{k2)}) (x_{k3}-x_{k4)}) (y_{k3}-y_{k4)})$ is an
#' unbiased est. of  $4*ni*(ni-1)*(ni-2)*(ni-3) [E(X_{ijk}-mu_{ij} u_{ij1k}) ]^2$.
#'
#' @param x a vector
#' @param y  a vector
#'
#' @return A list containing two variables sigmaijj12 and ssijj1 in it.
#'       The sigmaijj12 variable gives an unbiased estimate of $sigma_{ijj1}^2$.
#'       The ssijj1 variable gives an unbiased estimate of $sigma_{ijj} sigma_{ij_1j_1}$.
#'
#'
fun.sigijj12<-function(x, y){
        ni<-length(x)
	sigmaijj12<- 0
        ssijj1<- 0
	for (m1 in 1:ni){
	 for (m2 in 1:ni){
	   for (m3 in 1:ni){
	     for (m4 in 1:ni){
	     flag<- (m1!=m2)&(m1 !=m3)&(m1 !=m4) &(m2 !=m3) &(m2 !=m4) & (m3 !=m4)
	     sigmaijj12<-sigmaijj12+ (flag==T)* ( (x[m1]-x[m2])*(y[m1]-y[m2])* (x[m3]-x[m4])*(y[m3]-y[m4]) )
             ssijj1<-ssijj1+(flag==T)*  (x[m1]-x[m2])^2*(y[m3]-y[m4])^2
	      }
	    }
	   }
	  }
	 sigmaijj12<-sigmaijj12/(4*ni*(ni-1)*(ni-2)*(ni-3) )
        ssijj1 <- ssijj1/(4*ni*(ni-1)*(ni-2)*(ni-3) )
	 result<- list(sigmaijj12=sigmaijj12, ssijj1=ssijj1)
	 result
}

#' An intermediate function to calculate the partial sums
#'
#' This function is internally used to calculate the partial sums which will
#' then be used in calculating the asymptotic variances of the test statistics in Heter.test()
#'
#' @param u a vector
#' @param ranks a vector corresponds to either the fourth column of data or rank of the observations.
#' @param d1 a vector corresponds to the first column of data in the long format.
#' @param d2 a vector corresponds to the second column of data in the long format.
#' @param d3 a vector corresponds to the third column of data in the long format.
#'
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.
#' @param mcon a vector corresponds to $b^h$ in Theorem 3.2 of Wand and Akritas (2010a)
#' @param coln takes value 4 or 5. Value 4 is for the tests based on original data
#'             and value 5 is for the tests based on ranks.
#'
#' @return Asymptotic variances to be used in Heter.test().
taufun<-function(u, ranks, d1, d2, d3, a, b, mn, mcon, coln){
   R<-ranks
   usigmaijj1<-usigmaijj12<-array(rep(0, a*b*b), c(a, b, b))
   usigma2<-0
   us <-numeric()
   for (i in 1:a){
     us[i]<-0
    for (j in 1:b){
    for (j1 in 1:b){
    usigmaijj1[i, j, j1]<-sum(u[((d1==i)&(d2==j))] * u[((d1==i)&(d2==j1))])  /(mn[i]-1)  # unbiased est. of $\sigma<-{ijj1}$
    # usigmaijj12 sample covariance $\frac{1}{n<-{i}-1}  \sum<-{k=1}^{n<-i} (X<-{ijk}-\bar X<-{ij.})(X<-{ij<-1k}-\bar X<-{ij<-1.})$
    # gives unbiased estimate of E[(X<-{ijk}-\mu<-{ij}) (X<-{ij<-1k}-\mu<-{ij<-1}) ] and asymptotically
    # unbiased estimate of E(e<-{ijk}e<-{ij'k})
    x<-R[((d1==i)&(d2==j))]
    y<-R[((d1==i)&(d2==j1))]
    usigmaijj12[i, j, j1]<-cov_squared_jackknife(x ,  y)

   }  # end of j1
   usigma2<-usigma2+usigmaijj1[i, j, j]/mn[i]
   us[i]<-us[i]+usigmaijj1[i, j, j]/mn[i]

 }

}

  usigma2<-usigma2/(a*b)   # consistent est. of \frac{1}{ab} \sum<-{ij} \sigma_{ijj} /n_i  i.e. E(MSE_{\phi})
  EMSEphi<-usigma2

#  ucsi1<-2*sum(apply(usigmaijj12, 1, sum) /(mn*(mn-1))  )/(a^2*b)
       # estimate of $\zeta<-1$ in thm 2.1
#  tmpucsi2<-sum( (apply(usigmaijj1/mn, c(2, 3), sum) )^2 ) - sum(usigmaijj1^2  / mn^2)
#  ucsi2<-2*tmpucsi2/(a^2*b)  #estimate of $\zeta<-2$ in thm 2.1.
     #  note when $i\ne i'$, they are indept. so we can use  usigmaijj1 directly.

pucsi1<-2*apply(usigmaijj12 /(mn*(mn-1)), c(2, 3), sum)   /(a^2*b)
pucsi2<-2*((apply(usigmaijj1/mn, c(2, 3), sum) )^2 - apply(usigmaijj1^2  / mn^2, c(2, 3), sum) )/(a^2*b)
ucsi1<-sum(pucsi1)  # estimate of $\zeta<-1$ in thm 2.1 using all sum
ucsi2<-sum(pucsi2)  #estimate of $\zeta<-2$ in thm 2.1. using all sum

psum<-apply(usigmaijj1/mn, c(2, 3), sum)

zeta1<-zeta2<-partsum<-numeric()
#mc<-c(1/4, 1/3, 2/5, 9/20)
#mc<-mcon
#beyond mcon, b corresponds to the full sum zeta1 and zeta2 in Theorem 3.2 of Wang and Akritas (2010a)
ll<-0
for (l3 in mcon){
  ll<-ll+1
  tu1<-tu2<-parts0<-0
 for (j0 in 1:b){
# for (j2 in seq(round(-b^l3 ), round(b^l3))){
for (j2 in seq(-l3, l3)){
          if ((j0+j2>0)& (j0+j2<=b)  )  {
		tu1<-tu1+pucsi1[j0, j0+j2]
		tu2<-tu2+pucsi2[j0, j0+j2]
		parts0<- parts0+ psum[j0, j0+j2]
           }

  }
  }
zeta1[ll]<-tu1
zeta2[ll]<-tu2
partsum[ll]<-parts0
}

# zeta1[length(mc)]<- ucsi1
# zeta2[length(mc)]<- ucsi2
# partsum[length(mc)]<- sum(usigmaijj1/mn)
 esigma2<- a*b*usigma2/(a*(b-1))- partsum/(a*b*(b-1)) # estimate of E(MSE) if use obs



 N<-sum(mn)*b


tauphi2<-taubeta2<- taugamma2<-numeric()

 tauphi2<-zeta1 + zeta2/(a-1)^2
 taubeta2<-zeta1 + zeta2
 taugamma2<-zeta1 + zeta2/(a-1)^2

  if (coln==5) {
      tauphi2<-tauphi2/N^4
      taubeta2<-taubeta2/N^4
      taugamma2<-taugamma2/N^4
      esigma2<-esigma2/N^2
      usigma2<-usigma2/N^2
     }
  tauphi2[(tauphi2 <=0)] <- 10^(-15)
  taubeta2[(taubeta2 <=0)] <-10^(-15)
  taugamma2[(taugamma2 <=0)] <-10^(-15)

  list( EMSE=esigma2, tauphi2=tauphi2, taubeta2=taubeta2, EMSEphi=usigma2,taugamma2=taugamma2, zeta1=zeta1, zeta2=zeta2)
  }





#' Heteroscedastic test for functional data
#'
#' This function conducts the test of main effect of treatment, interaction effect of treatment and time,  main effect of time,
#' and simple effect of treament based on original observations (see  Wang and Akritas 2010a) or ranks (see Wang and Akritas 2010b).
#'
#' @param data The data in long format (see the example in function  dataformat_wide_to_long( )).
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.
#' @param h The h value used in the estimators in Proposition 3.3 of Wang and Akritas (2010a) and Theorem 3.2 of Wang, Higgins, and Blasi (2010).
#'          The value of h should be in (0, 0.5) for the test of no main treatment effect
#'          or contract effect of treatments. For other tests, h should be in (0, 1).
#'          Recommend to use the default value h=0.45 as given in the function.
#'   Note: If multiple values are provided to h as a vector, then the calculation
#'         will be carried out for each h value, which results in multiple p-values
#'         in the returned result for palpha, pbeta, pgamma, pphi.
#'
#' @param method Specifying method='rank' to use rank test. For all other values, the test based on original data will be used.
#' @param Ca Contrast matrix for the contrast effect of the treatments.
#'
#' @return a matrix of p-values for the test of no main
#'    treatment effect (palpha), test of no main time effect (pbeta),
#'    test of no interaction effect between treatment and time (pgamma),
#'    and test of no simple effect of treatment (pphi). For each h value,
#'    the p-values are in the same column.
#'
#' @export
#'
#' @examples
#' # Generate a data set that contains data from 3 treatments,
#' # with 3 subjects in treatment 1, 3 subjects in treatment 2,
#' # and 4 subjects in treatment 3. Each subject contains m=50
#' # repeated observations from Poisson distribution. For the 1st treatment,
#' # the mean vector of the repeated observations from the same subject is
#' # equal to mu1 plus a random effect vector generated by NorRanGen( ).
#' # The m is the number of repeated measurements per subject.
#'  f1<-function(m, mu1, raneff) {
#'      currentmu=mu1+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#'  f2<-function(m, mu2, raneff) {
#'      currentmu=mu2+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#' f3<- function(m, mu3, raneff){
#'      currentmu=mu3+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}

#' # The a is the number of treatments. The mn stores the number of subjects in treatments.
#' a=3;  mn=c(3, 3, 4); mu1=3; mu2=3; mu3=3; m=50
#' # Generate the time effects via random effects with AR(1) structure.
#' raneff=NorRanGen(m)
#' # Generate data and store in wide format.
#' datawide=numeric()
#' now=0
#' for (i in 1:a){
#'    fi=function(x1, x2) f1(m,x1, x2)*(i==1)+f2(m,x1, x2)*(i==2)+f3(m, x1, x2)*(i==3)
#'    mu=mu1*(i==1)+mu2*(i==2)+mu3*(i==3)
#'    for (k in 1:mn[i]){
#'        now=now+1
#'        datawide<-rbind(datawide, c(k, i, fi(mu, raneff) )   	)
#'        colnames(datawide)=c("sub", "trt", paste("time", seq(m), sep=""))
#'        #this is a typical way to store data in practice
#'    }
#' } #end of j
#'
#'# Note:There are different time effects since values in raneff vector are different
#' dat=dataformat_wide_to_long(datawide)   #dat is in long format
#' # Define the h value used in Proposition 3.3 of Wang and Akritas (2010a)
#' h=c(0.45, 0.49)
#'   #Note: The resulting palpha, pbeta, pgamma, pphi each contains
#'   #      two p-values, one corresponds to each h value
#'   #      (see Proposition 3.3 of Wang and Akritas (2010a))
#' # test based on original data.
#' (org= Heter.test(dat, a, m, mn, h, method='original') )
#' #test based on ranks
#' (rankt= Heter.test(dat, a, m, mn, h, method='rank') )
#'
#'
#' @references
#' Haiyan Wang and Michael Akritas (2010a). Inference from heteroscedastic functional data, Journal of Nonparametric Statistics. 22:2, 149-168.
#' DOI: 10.1080/10485250903171621
#'
#' Haiyan Wang and Michael Akritas (2010b). Rank test for heteroscedastic functional data. Journal of Multivariate Analysis. 101: 1791-1805.
#' https://doi.org/10.1016/j.jmva.2010.03.012
#'
#' Haiyan Wang, James Higgins, and Dale Blasi (2010).  Distribution-Free
#' Tests For No Effect Of Treatment In Heteroscedastic Functional Data
#' Under Both Weak And Long Range Dependence. Statistics and Probability
#' Letters. 80: 390-402. Doi:10.1016/j.spl.2009.11.016
 Heter.test<-function(data,a,b, mn, h=0.45, method='rank', Ca=cbind(as.vector(rep(1, a-1)), -diag(a-1)) ){
   if (b>20) mcon=round(b^h) else mcon= rep(4, length(h))
     if (method !='rank') coln=4 else {
	     coln=5;    data=cbind(data,    rank(data[,4]))
		 }
     N<-sum(mn)*b
	d1<-data[, 1];   	d2<-data[, 2];   	d3<-data[, 3]
	ranks<-data[, coln]
	Rij<-as.matrix(tapply(ranks, list(d1, d2), mean) )
		   # matrix with \bar{R}_{ij.} as the (i,j) element
	Rik<-as.matrix(tapply(ranks, list(d1, d3), mean) )
	          # matrix with \bar{R}_{i.k} as the (i,j) element  might have NA if unbalanced.
	Rik<-replace(Rik, is.na(Rik), 0)    # replace NA's in matrix Rik by 0.
	          # note: after replacement, the number of rows still correct, but the number of columns
	          # would be all same as max_i n_i instead of n_i columns for the ith row.

	Ri<-apply(Rij, 1, mean)  # returns a vector (\wtR_{1..}, ..., \wtR_{a..})
	Rj<-apply(Rij, 2, mean)

	Rim<-kronecker(Ri, t(as.vector(rep(1,b))) )
	  # an a by b matrix with all elements of the ith row same as \wtR_{i..}
	Rjm<-kronecker(t(Rj), as.vector(rep(1,a)))
	  # an a by b matrix with all elements of the ith column same as \wtR_{.j.}

## calculate test statistics

	MSbeta<- a* sum( (Rj-mean(Rj))^2 ) /(b-1)
	MSgamma<- sum((Rij-Rim-Rjm+mean(Ri) )^2 )/((a-1)*(b-1) )
	MSphi<- sum((Rij-Rjm)^2 )/((a-1)*b  )

	tmp1<-tapply(ranks^2, d1, sum)
	   # returns a vector  with ith element \sum_{j=1}^b \sum_{k=1}^{n_i} R_{ijk}^2
	tmp2<-sum( tmp1/(mn*(mn-1)) )     # \sum_{i,j, k}\frac{X_{ijk}^2}{n_i(n_i-1)}
	MSEphi<- tmp2/(a*b)   - mean( apply(Rij^2, 1, mean)/(mn-1) )
	MSE<- MSEphi *b/(b-1) - mean(apply(Rik^2, 1, sum)/(mn*(mn-1)) ) *b/(b-1) + mean(Ri^2/(mn-1)) *b/(b-1)
    ## another way to calculate MSE
    #   MSE<-  tmp2/(a*(b-1)) - mean(apply(Rik^2, 1, sum)/(mn*(mn-1)) ) *b/(b-1) - mean(apply((Rij-Rim)^2, 1, sum)/(mn-1) )/(b-1)

	Fbeta<-MSbeta/MSE
	Fgamma<-MSgamma/MSE
	Fphi<-MSphi/MSEphi
	Dbeta<-MSbeta-MSE
	Dgamma<-MSgamma-MSE
	Dphi<-MSphi-MSEphi

    euijk<-apply(data, 1, e<-function(x) eu(x, coln, Rij, Rik))
	e<-euijk[1,]  # returns the e_{ijk} as a vector, same as ranks structure
	u<-euijk[2,]  # returns the u_{ijk} as a vector, same as ranks structure
	vars<-taufun(u, ranks, d1, d2, d3, a, b, mn,mcon, coln)


	TSbeta<-sqrt(b)*(Fbeta-1)
	TSgamma<-sqrt(b)*(Fgamma-1)
	TSphi<-sqrt(b)*(Fphi-1)
	DSbeta<-sqrt(b)*Dbeta
	DSgamma<-sqrt(b)*Dgamma
	DSphi<-sqrt(b)*Dphi
        if (coln==5) {
            DSbeta<-DSbeta/N^2
	        DSgamma<-DSgamma/N^2
            DSphi<-DSphi/N^2
        }

	varTSbeta<-vars$taubeta2/vars$EMSE^2
	varTSgamma<-vars$taugamma2/vars$EMSE^2
	varTSphi<-vars$tauphi2/vars$EMSEphi^2

	pbeta<- 1-stats::pnorm( TSbeta/sqrt(varTSbeta) )
	pphi<- 1-stats::pnorm( TSphi/sqrt(varTSphi) )
	pgamma<- 1- stats::pnorm(TSgamma/sqrt(varTSgamma) )

	Dpbeta<- 1-stats::pnorm( DSbeta/sqrt(vars$taubeta2) )
	Dpphi<- 1-stats::pnorm( DSphi/sqrt(vars$tauphi2) )
	Dpgamma<- 1- stats::pnorm(DSgamma/sqrt(vars$taugamma2) )

    # test for main treatment effect
        #Ca<-cbind(as.vector(rep(1, a-1)), -diag(a-1))
        N<-sum(mn)*b
        etai<-matrix(0, length(mcon), a)
        ll<-0
        for (l in mcon){
        ll<-ll+1
        for (i in 1:a){
            for (j in 1:b){
        #          for (j1 in seq(round(-b^l ), round(b^l))){
	            for (j1 in seq(-l, l) ){
                    if ((j+j1>0)& (j+j1<=b)  )  {
            tmpinc<- sum((ranks[((d1==i)&(d2==j))] -Rij[i, j])*(ranks[((d1==i)&(d2==(j+j1)))] -Rij[i, (j+j1)]))*sum(mn)/(b*mn[i]*(mn[i]-1))
          etai[ll, i]<-etai[ll, i]+tmpinc
              }
            } #end of j1
          } #end of j
         }  #end of i
        } # end of l
        Ri<-as.vector(Ri)
       TSalphastat<-function(etaii, Ca, N, Ri) N * t(Ri)%*% t(Ca)%*%solve( Ca%*% diag(etaii)%*% t(Ca) ) %*% Ca %*% Ri
       TSalpha<- apply(etai, 1, TSalpha<-function(etaii) {TSalphastat(etaii, Ca, N, Ri) }  )
       palpha<-1-stats::pchisq(TSalpha, nrow(Ca))
       palpha1<- palpha
       results=rbind(palpha, pbeta, pgamma, pphi)
       row.names(results)=c('palpha', 'pbeta','pgamma', 'pphi')
       colnames(results)=paste0('h=', h)
       #results=list(palpha=palpha, pbeta=pbeta, pgamma=pgamma, pphi=pphi)
       #names(results)=c("palpha", "pbeta", "pgamma", "pphi")
       # list(TSalpha=TSalpha, Dpalpha=palpha1, Dpbeta=Dpbeta, Dpgamma=Dpgamma, Dpphi=Dpphi, palpha=palpha1, pbeta=pbeta, pgamma=pgamma, pphi=pphi,
       # results=results)
	   results
}


#' Classical F test for mixed effects model
#'
#'#' This function conducts the test of main effect of treatment, interaction effect of treatment and time,  main effect of time,
#' and simple effect of treament based on mixed effects models
#'
#' @param data The data in long format (see the example in function  dataformat_wide_to_long( )).
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.

#' @return a list of p-values from mixed effect model for the test of no main
#'    treatment effect (palpha), test of no main time effect (pbeta),
#'    test of no interaction effect between treatment and time (pgamma),
#'    and test of no simple effect of treatment (pphi).
#'
CF<-function(data,a,b, mn){
         coln=4
        ranks<-data[, coln]
        d1<-data[, 1]
	d2<-data[, 2]
	d3<-data[, 3]
	Rij<-as.matrix(tapply(ranks, list(d1, d2), mean) )
		   # matrix with \bar{R}_{ij.} as the (i,j) element
	Rik<-as.matrix(tapply(ranks, list(d1, d3), mean) )
	          # matrix with \bar{R}_{i.k} as the (i,j) element  might have NA if unbalanced.
	Rik<-replace(Rik, is.na(Rik), 0)    # replace NA's in matrix Rik by 0.
	          # note: after replace, the number of rows still correct, but the number of columns
	          # would be all same as max_i n_i instead of n_i columns for the ith row.
	Rbari<-as.vector(tapply(ranks, d1, mean) ) #vector with ith element \bar{R}_{i..}
	Rbarj<-as.vector(tapply(ranks, d2, mean) ) #vector with ith element \bar{R}_{.j.}
	Rim<-kronecker(Rbari, t(as.vector(rep(1,b))) )
	  # a a by b matrix with all elements of the ith row same as \wtR_{i..}
	Rjm<-kronecker(t(Rbarj), as.vector(rep(1,a)))
	  # a a by b matrix with all elements of the ith column same as \wtR_{.j.}

## calculate test statistics
	MSalpha<- sum(mn*(Rbari-mean(ranks))^2)*b/(a-1)
	MSb<-  (sum(Rik^2)-sum(Rbari^2*mn) )*b/(sum(mn)-a)
	Falpha<-MSalpha/MSb

	MSphi<- sum(mn*apply((Rij-Rjm)^2 , 1, sum) )/((a-1)*b  )
	MSEphi<- (sum(ranks^2)- sum(mn*apply(Rij^2, 1, sum))  )/(b*(sum(mn)-a))
	Fphi<-MSphi/MSEphi

	MSbeta<- sum(mn)* sum( (Rbarj-mean(ranks))^2 ) /(b-1)
	MSgamma<- sum(mn*apply((Rij-Rim-Rjm+mean(ranks) )^2, 1, sum ))/((a-1)*(b-1) )
	MSE<- MSEphi*b/(b-1) - MSb/(b-1)
	Fbeta<-MSbeta/MSE
	Fgamma<-MSgamma/MSE

	palpha<-1-stats::pf(Falpha, a-1, sum(mn)-a )
	pbeta<-1-stats::pf(Fbeta, b-1, (sum(mn)-a)*(b-1) )
	pgamma<-1-stats::pf(Fgamma, (a-1)*(b-1), (sum(mn)-a)*(b-1) )
	pphi<-1-stats::pf(Fphi, (a-1)*b, (sum(mn)-a)*b )

	list(palpha=palpha, pbeta=pbeta, pgamma=pgamma, pphi=pphi)



}




#' Unbiased estimate of squared variance $sigma^4$ based on U-statistic of an i.i.d. sample
#'
#' This function takes a random sample and produces an unbiased estimate of the squared variance based on
#' the U-statistic $sum_{k1 ne k2 ne k3 ne k4} (x_{k1}-x_{k2)})^2 (x_{k3}-x_{k4)})^2$.
#'
#' @param x is a vector of i.i.d. observations.
#'
#' @return unbiased estimate of squared variance $sigma^4$, where $sigma^2$ is the variance.
#'
#' @export
#'
#' @examples
#'   x=stats::rnorm(10)
#'   sigma4(x)
sigma4<-function(x){
  nij<-length(x)
  sigmaij4<- 0
  for (m1 in 1:nij){
    for (m2 in 1:nij){
      for (m3 in 1:nij){
        for (m4 in 1:nij){
          flag<- (m1!=m2)&(m1 !=m3)&(m1 !=m4) &(m2 !=m3) &(m2 !=m4) & (m3 !=m4)
          sigmaij4<-sigmaij4+ (flag==T)*(x[m1]-x[m2])^2 * (x[m3]-x[m4])^2
        }
      }
    }
  }
  sigmaij4<-sigmaij4/(4*nij*(nij-1)*(nij-2)*(nij-3) )
  sigmaij4
}

thetahat<-function(x) (mean((x-mean(x))^2 ))^2

#' Jackknife estimate of $sigma^4$ using an i.i.d. sample
#'
#' This function takes a random sample and approximates an unbiased estimate of the squared variance based on
#' Jackknife estimate.
#'
#' @param x is a vector of i.i.d. observations.
#'
#' @return  Jackknife estimate of $sigma^4$, where $sigma^2$ is the variance.
#'
#' @export
#'
#' @examples
#'   x=stats::rnorm(10)
#'   sigma4jackknife(x)
sigma4jackknife<-function(x){

	n<-length(x)
	s4hat<- thetahat(x)
	result <- n *s4hat
        for (i in 1:n) result<-result- (n-1)/n* thetahat(x[-i])
        result
        }

#' Bootstrap estimate of $sigma^4$ using an i.i.d. sample
#'
#' This function takes a random sample and approximates an unbiased estimate of the squared variance based on
#' Bootstrap estimate.
#'
#' @param x is a vector of i.i.d. observations.
#'
#' @return  Bootstrap estimate of $ sigma^4$, where $sigma^2$ is the variance.
#'
#' @export
#'
#' @examples
#'   x=stats::rnorm(10)
#'   sigma4bootstrap(x)
sigma4bootstrap<-function(x){
	n<-length(x)
	thetahatstar<-mean(apply(matrix(sample(x, 3*1000, replace = T), 1000, 3), 1, thetahat ) )
	result<-2*thetahat(x)-thetahatstar
	result
	}


#' Jackknife estimate of the squared covariance between two time points
#'
#' This function provides the Jackknife estimate of the squared
#' covariance using i.i.d. obervations from one variable given in vector \code{x}
#' and i.i.d. observations from a second variable given in vector \code{y}.
#'
#' @param x a vector of i.i.d. observations
#' @param y a second vector of i.i.d. observations
#'
#' @return Jackknife estimate of the squared covariance
cov_squared_jackknife<-function(x, y){
  # Jackknife estimate of \sigma_{ijj'}^2
  # x=(X_{ij1}, \cdots, X_{ijn_i}), y=(X_{ij'1}, \cdots, X_{ij'n_i}),
  thetahatijj1<-function(x, y) (mean((x-mean(x))*(y-mean(y))  ))^2
	n<-length(x)
	s4hat<- thetahatijj1(x, y)
	result <- n *s4hat
        for (i in 1:n) result<-result- (n-1)/n* thetahatijj1(x[-i], y[-i])
        result
        }


#' Test of no contrast effect of the treatments
#'
#' This function conducts the test of no contract effect of treatments
#' based on Theorem 3.2 of Wang, Higgins, and Blasi (2010).
#'
#' @param data The data in long format (see the example in function  dataformat_wide_to_long( )).
#' @param a The number of treatments.
#' @param b The number of time points or repeated measurements per subject.
#' @param mn The vector of sample sizes in treatments.
#' @param h The h value used in the estimators in Theorem 3.2 of Wang, Higgins, and Blasi (2010).
#'          The value of h should be in (0, 0.5).
#'          Recommend to use the default value h=0.45 as given in the function.
#'          Note: If multiple values are provided to h as a vector, then the calculation
#'         will be carried out for each h value, which results in multiple p-values
#'         in the returned result.
#'
#' @param method Specifying method='rank' to use rank test. For all other values, the test based on original data will be used.
#' @param Ca Contrast matrix for the contrast effect of the treatments.
#'           The default contrast corresponds to the main treatment effect.
#'
#' @return a matrix that contains the test statistics and pvalues for each h value.
#'
#' @export
#'
#' @examples
#' # Generate a data set that contains data from 3 treatments,
#' # with 3 subjects in treatment 1, 3 subjects in treatment 2,
#' # and 4 subjects in treatment 3. Each subject contains m=50
#' # repeated observations from Poisson distribution. For the 1st treatment,
#' # the mean vector of the repeated observations from the same subject is
#' # equal to mu1 plus a random effect vector generated by NorRanGen( ).
#' # The m is the number of repeated measurements per subject.
#'  f1<-function(m, mu1, raneff) {
#'      currentmu=mu1+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#'  f2<-function(m, mu2, raneff) {
#'      currentmu=mu2+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#'  f3<- function(m, mu3, raneff){
#'      currentmu=mu3+raneff;
#'      currentmu[abs(currentmu)<1e-2]=1e-2;
#'      rpois(m, abs(currentmu))}
#' # The a is the number of treatments. The mn stores the number of subjects in treatments.
#' a=3;  mn=c(3, 3, 4); mu1=3; mu2=3; mu3=2; m=50
#' # Note treatment 3 has mean mu3=2, which is different from the mean of
#' #  the other two treatments.
#' # Generate the time effects via random effects with AR(1) structure.
#' raneff=NorRanGen(m)
#' # Generate data and store in wide format.
#' datawide=numeric()
#' now=0
#' for (i in 1:a){
#'    fi=function(x1, x2) f1(m,x1, x2)*(i==1)+f2(m,x1, x2)*(i==2)+f3(m, x1, x2)*(i==3)
#'    mu=mu1*(i==1)+mu2*(i==2)+mu3*(i==3)
#'    for (k in 1:mn[i]){
#'        now=now+1
#'        datawide<-rbind(datawide, c(k, i, fi(mu, raneff) )   	)
#'        colnames(datawide)=c("sub", "trt", paste("time", seq(m), sep=""))
#'        #this is a typical way to store data in practice
#'    }
#' } #end of j
#'
#'# Note:There are different time effects since values in raneff vector are different
#' dat=dataformat_wide_to_long(datawide)   #dat is in long format
#'   #Note: For each h value below, the test statistic and p-value are calculated.
#'   #      (see Theorem 3.2 of Wang, Higgins, and Blasi (2010))
#'
#' tcontrast(dat, a, m, mn, h=c(0.45, 0.49), method='original')
#'
#' @references
#' Haiyan Wang and Michael Akritas (2010a). Inference from heteroscedastic functional data, Journal of Nonparametric Statistics. 22:2, 149-168.
#' DOI: 10.1080/10485250903171621
#'
#' Haiyan Wang and Michael Akritas (2010b). Rank test for heteroscedastic functional data. Journal of Multivariate Analysis. 101: 1791-1805.
#' https://doi.org/10.1016/j.jmva.2010.03.012
#'
#' Haiyan Wang, James Higgins, and Dale Blasi (2010).  Distribution-Free
#' Tests For No Effect Of Treatment In Heteroscedastic Functional Data
#' Under Both Weak And Long Range Dependence. Statistics and Probability
#' Letters. 80: 390-402. Doi:10.1016/j.spl.2009.11.016
tcontrast<-function(data,a,b, mn, h=0.45, method='rank', Ca=cbind(as.vector(rep(1, a-1)), -diag(a-1)) ){
  if (b>20) mcon=round(b^h) else mcon= rep(4, length(h))
  if (method !='rank') coln=4 else {
    coln=5;    data=cbind(data,    rank(data[,4]))
  }
  N<-sum(mn)*b
	d1<-data[, 1]
	d2<-data[, 2]
	d3<-data[, 3]
	ranks<-data[, coln]
	Rij<-as.matrix(tapply(ranks, list(d1, d2), mean) )
		   # matrix with \bar{R}<-{ij.} as the (i,j) element
	Ri<-apply(Rij, 1, mean)  # returns a vector (\wtR<-{1..}, ..., \wtR<-{a..})
       N<-sum(mn)*b
        etai<-matrix(0, length(mcon), a)
        ll<-0
        for (l in mcon){
        ll<-ll+1
        for (i in 1:a){
         for (j in 1:b){
#          for (j1 in seq(round(-b^l ), round(b^l))){
	   for (j1 in seq(-l, l) ){
          if ((j+j1>0)& (j+j1<=b)  )  {
            tmpinc<- sum((ranks[((d1==i)&(d2==j))] -Rij[i, j])*(ranks[((d1==i)&(d2==(j+j1)))] -Rij[i, (j+j1)]))*sum(mn)/(b*mn[i]*(mn[i]-1))
          etai[ll, i]<-etai[ll, i]+tmpinc
          }
         } #end of j1
         } #end of j
        }  #end of i
      } # end of l
        Ri<-as.vector(Ri)
       TSalphastat<-function(etaii, Ca, N, Ri) N * t(Ri)%*% t(Ca)%*%solve( Ca%*% diag(etaii)%*% t(Ca) ) %*% Ca %*% Ri
       TSalpha<- apply(etai, 1, TSalpha<-function(etaii) {TSalphastat(etaii, Ca, N, Ri) }  )
       palpha<-1-stats::pchisq(TSalpha, nrow(Ca))
       palpha1<- palpha
       result=rbind(TSalpha, palpha1)
       row.names(result)=c('Test_Statistic', 'p_value')
       colnames(result)=paste0('h=', h)
       result
}

Try the HeterFunctionalData package in your browser

Any scripts or data that you put into this service are public.

HeterFunctionalData documentation built on Aug. 20, 2020, 9:06 a.m.