R/statistics.R
In funStatTest: Statistical Testing for Functional Data

Documented in comp_stat stat_cff stat_hkr stat_med stat_mo stat_wmw

# WARNING - Generated by {fusen} from dev/flat_package.Rmd: do not edit by hand

#' Compute multiple statistics
#' 
#'
#' 
#' @description Computation of the different statistics defined in the package. 
#' See Smida et al (2022) for more details.
#' 
#' @details
#' For HKR statistics, only the values of the two statistics, namely `HKR1` and 
#' `HKR2` and not the eigen values (see [funStatTest::stat_hkr()] for 
#' more details).
#' 
#' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
#' trajectories (in columns) of size `n_point` (in rows).
#' @param stat character string or vector of character string, name of the 
#' statistics for which the p-values will be computed, among `"mo"`, `"med"`,
#' `"wmw"`, `"hkr"`, `"cff"`.
#' 
#' @return list of named numeric value corresponding to the statistic values
#' listed in `stat` input.
#' 
#' @inherit funStatTest_authors author
#' 
#' @references
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
#' A median test for functional data, Journal of Nonparametric Statistics, 
#' 34:2, 520-553,  
#' \doi{10.1080/10485252.2022.2064997}, 
#' [hal-03658578](https://hal.science/hal-03658578)
#' 
#' @seealso [funStatTest::stat_mo()], [funStatTest::stat_med()], 
#' [funStatTest::stat_wmw()], [funStatTest::stat_hkr()], 
#' [funStatTest::stat_cff()]
#' 
#' @export
#' 
#' @examples
#' simu_data <- simul_data(
#'     n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
#'     delta_shape = "constant", distrib = "normal"
#' )
#'
#' MatX <- simu_data$mat_sample1
#' MatY <- simu_data$mat_sample2
#'
#' res <- comp_stat(MatX, MatY, stat = c("mo", "med", "wmw", "hkr", "cff"))
#' res
comp_stat <- function(MatX, MatY, stat = c("mo", "med")) {
    
    assert_matrix(MatX)
    assert_numeric(MatX)
    assert_matrix(MatY)
    assert_numeric(MatY)
    assert_subset(
        stat, c("mo", "med", "wmw", "hkr", "cff"), empty.ok = FALSE)
    
    # original values on non-permuted samples
    stat_values <- lapply(
        stat, 
        function(stat_name) {
            tmp <- switch (
                stat_name,
                mo = stat_mo(MatX, MatY),
                med = stat_med(MatX, MatY),
                wmw = stat_wmw(MatX, MatY),
                hkr = as.matrix(unlist(stat_hkr(MatX, MatY)[1:2])),
                cff = stat_cff(MatX, MatY)
            )
            return(tmp)
        }
    )
    names(stat_values) <- stat
    return(stat_values)
}

#' Cuevas-Febrero-Fraiman statistic
#' 
#'
#' 
#' @description The Cuevas-Febrero-Fraiman statistics defined in 
#' Cuevas et al (2004) (and noted CFF in Smida et al 2022) 
#' is computed to compare two sets of functional trajectories.
#' 
#' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
#' trajectories (in columns) of size `n_point` (in rows).
#' 
#' @return numeric value corresponding to the WMW statistic value
#' 
#' @inherit funStatTest_authors author
#' 
#' @references
#' Cuevas, A, Febrero, M, and Fraiman, R (2004)
#' An anova test for functional data. Computational Statistics & Data 
#' Analysis, 47(1): 111–122. \doi{10.1016/j.csda.2003.10.021}
#' 
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
#' A median test for functional data, Journal of Nonparametric Statistics, 
#' 34:2, 520-553,  
#' \doi{10.1080/10485252.2022.2064997}, 
#' [hal-03658578](https://hal.science/hal-03658578)
#' 
#' @seealso [funStatTest::comp_stat()], [funStatTest::permut_pval()]
#' 
#' @export
#' 
#' @examples
#' simu_data <- simul_data(
#'     n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
#'     delta_shape = "constant", distrib = "normal"
#' )
#'
#' MatX <- simu_data$mat_sample1
#' MatY <- simu_data$mat_sample2
#'
#' stat_cff(MatX, MatY)
stat_cff <- function(MatX, MatY) {
    m <- ncol(MatX)
    n <- ncol(MatY)
    N <- n+m
    npoints <- nrow(MatX)
    X_bar <- apply(MatX, 1, mean)
    Y_bar <- apply(MatY, 1, mean)
    stat <- m * (norm2(X_bar - Y_bar)^2)
    return(stat)
}

#' Horváth-Kokoszka-Reeder statistics
#' 
#'
#' 
#' @description The Horváth-Kokoszka-Reeder statistics defined in 
#' Chakraborty & Chaudhuri (2015) (and noted HKR1 and HKR2 in Smida et al 2022) 
#' are computed to compare two sets of functional trajectories.
#' 
#' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
#' trajectories (in columns) of size `n_point` (in rows).
#' 
#' @return A list with the following elements
#' - `T1`: numeric value corresponding to the HKR1 statistic value
#' - `T2`: numeric value corresponding to the HKR2 statistic value
#' - `eigenval`: numeric vector of eigen values from the empirical
#' pooled covariance matrix of `MatX` and `MatY` (see Smida et al, 2022, for
#' more details)
#' 
#' @importFrom Matrix rankMatrix
#' @importFrom tibble lst
#' @importFrom stats prcomp
#' 
#' @inherit funStatTest_authors author
#' 
#' @references
#' Horváth, L., Kokoszka, P., & Reeder, R. (2013). Estimation of the mean of 
#' functional time series and a two-sample problem. Journal of the Royal 
#' Statistical Society. Series B (Statistical Methodology), 75(1), 103–122. 
#' \doi{10.1111/j.1467-9868.2012.01032.x}
#' 
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
#' A median test for functional data, Journal of Nonparametric Statistics, 
#' 34:2, 520-553,  
#' \doi{10.1080/10485252.2022.2064997}, 
#' [hal-03658578](https://hal.science/hal-03658578)
#' 
#' @seealso [funStatTest::comp_stat()], [funStatTest::permut_pval()]
#' 
#' @export
#' 
#' @examples
#' simu_data <- simul_data(
#'     n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
#'     delta_shape = "constant", distrib = "normal"
#' )
#'
#' MatX <- simu_data$mat_sample1
#' MatY <- simu_data$mat_sample2
#'
#' stat_hkr(MatX, MatY)
stat_hkr <- function(MatX, MatY) {
    X <- MatX
    Y <- MatY

    M <- ncol(X)
    N <- ncol(Y)

    npoints <- nrow(X)
    X_bar = matrix(0, npoints, 1)   #initializing sum
    for (index in 1:M) {
        X_bar = X_bar+X[,index]
    }
    X_bar = X_bar/M
    X_bar_matrix = matrix(X_bar, npoints, M)

    Y_bar = matrix(0, npoints, 1)   #initializing sum
    for (index in 1:N) {
        Y_bar = Y_bar+Y[,index]
    }
    Y_bar = Y_bar/N
    Y_bar_matrix = matrix(Y_bar, npoints, N)

    Z = matrix(0, npoints, N+M)
    Z[,1:M] = sqrt(N/M)*(X-X_bar_matrix)
    Z[,(M+1):(N+M)] = sqrt(M/N)*(Y-Y_bar_matrix)
    Z = sqrt((N+M-1)/(N+M))*Z

    # pca on Z
    tmp <- prcomp(t(Z), center = FALSE, scale. = FALSE)

    #proportion de la variance 85% pour obtenir les valeurs propres.
    #exp_var <- cumsum(tmp$sdev^2)/sum(tmp$sdev^2)
    #k <- head(which(exp_var >= 0.85), 1)

    #D <- (1/(N+M-1)) * Z %*% t(Z)
    #rk_D <- rankMatrix(D) #package{Matrix}
    #rk_D
    rk_Z <- rankMatrix(Z)
    rk_Z

    eigenvec <- as.matrix(tmp$rotation[,1:rk_Z])
    eigenval <- tmp$sdev[1:rk_Z]^2
    #eigenvec <- as.matrix(tmp$rotation[,1:k])
    #eigenval <- tmp$sdev[1:k]^2

    XY_bar_mat <- matrix(X_bar - Y_bar, nrow = 1, ncol = npoints, byrow = TRUE)
    a <- XY_bar_mat %*% eigenvec

    T1 = N*M / (N+M) * sum(a^2/eigenval)
    T2 = N*M / (N+M) * sum(a^2)

    return(lst(T1, T2, eigenval))
}

#' MED median statistic
#' 
#'
#' 
#' @description The MED median statistics defined in Smida et al (2022) is 
#' computed to compare two sets of functional trajectories.
#' 
#' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
#' trajectories (in columns) of size `n_point` (in rows).
#' 
#' @return numeric value corresponding to the MED median statistic value
#' 
#' @inherit funStatTest_authors author
#' 
#' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
#' A median test for functional data, Journal of Nonparametric Statistics, 
#' 34:2, 520-553,  
#' \doi{10.1080/10485252.2022.2064997}, 
#' [hal-03658578](https://hal.science/hal-03658578)
#' 
#' @seealso [funStatTest::comp_stat()], [funStatTest::permut_pval()]
#' 
#' @export
#' 
#' @examples
#' simu_data <- simul_data(
#'     n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
#'     delta_shape = "constant", distrib = "normal"
#' )
#'
#' MatX <- simu_data$mat_sample1
#' MatY <- simu_data$mat_sample2
#'
#' stat_med(MatX, MatY)
stat_med <- function(MatX, MatY) {
    n <- ncol(MatY)
    m <- ncol(MatX)
    npoints <- nrow(MatX)
    M <- rep(0,npoints)
    #num<-rep(0,npoints)
    for(i in 1:n){
        num <- rep(0,npoints)
        for(j in 1:m) {
            diff <- MatY[,i]-MatX[,j]
            norm_diff <- norm2(diff)
            if(norm_diff > 0) {
                num <- num + diff/norm_diff
            }
        }
        #num<-(1/m)*num
        denom <- norm2(num)
        if(denom > 0) {
            M <- M+(num/denom)
        }
    }
    M <- M/n
    return(norm2(M))
}

#' MO median statistic
#' 
#'
#' 
#' @description The MO median statistics defined in Smida et al (2022) is 
#' computed to compare two sets of functional trajectories.
#' 
#' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
#' trajectories (in columns) of size `n_point` (in rows).
#' 
#' @return numeric value corresponding to the MO median statistic value
#' 
#' @inherit funStatTest_authors author
#' 
#' @references Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
#' A median test for functional data, Journal of Nonparametric Statistics, 
#' 34:2, 520-553,  
#' \doi{10.1080/10485252.2022.2064997}, 
#' [hal-03658578](https://hal.science/hal-03658578)
#' 
#' @seealso [funStatTest::comp_stat()], [funStatTest::permut_pval()]
#' 
#' @export
#' 
#' @examples
#' simu_data <- simul_data(
#'     n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
#'     delta_shape = "constant", distrib = "normal"
#' )
#'
#' MatX <- simu_data$mat_sample1
#' MatY <- simu_data$mat_sample2
#'
#' stat_mo(MatX, MatY)
stat_mo <- function(MatX, MatY) {
    n <- ncol(MatY)
    m <- ncol(MatX)
    n_point <- nrow(MatX)
    numM <- numeric(n_point)
    Med <- numeric(n_point)
    v <- numeric(n_point)
    
    for(i in 1:n){
        num1 <- numeric(n_point)
        num2 <- numeric(n_point)
        for(k in 1:n){
            if(k!=i){
                v <- MatY[,i]-MatY[,k]
                norm_v <- norm2(v)
                if(norm_v != 0) {
                    num1 <- num1+v/norm_v
                }
            }
            
            for(j in 1:m){
                v <- MatY[,i]-MatX[,j]
                norm_v <- norm2(v)
                if(norm_v != 0) {
                    num2 <- num2+v/norm_v
                }
            }
        }
        numM <- num1+num2
        denomM <- norm2(numM)
        if(denomM != 0) {
            Med <- Med+(numM/denomM)
        }
    }
    
    Med <- Med/n
    return(norm2(Med))
}

#' Wilcoxon-Mann-Whitney (WMW) statistic
#' 
#'
#' 
#' @description The Wilcoxon-Mann-Whitney statistic defined in 
#' Chakraborty & Chaudhuri (2015) (and noted WMW in Smida et al 2022) is 
#' computed to compare two sets of functional trajectories.
#' 
#' @param MatX numeric matrix of dimension `n_point x n` containing `n` 
#' trajectories (in columns) of size `n_point` (in rows).
#' @param MatY numeric matrix of dimension `n_point x m` containing `m` 
#' trajectories (in columns) of size `n_point` (in rows).
#' 
#' @return numeric value corresponding to the WMW statistic value
#' 
#' @inherit funStatTest_authors author
#' 
#' @references
#' Anirvan Chakraborty, Probal Chaudhuri, 
#' A Wilcoxon–Mann–Whitney-type test for infinite-dimensional data, 
#' Biometrika, Volume 102, Issue 1, March 2015, Pages 239–246,
#' \doi{10.1093/biomet/asu072}
#' 
#' Zaineb Smida, Lionel Cucala, Ali Gannoun & Ghislain Durif (2022) 
#' A median test for functional data, Journal of Nonparametric Statistics, 
#' 34:2, 520-553,  
#' \doi{10.1080/10485252.2022.2064997}, 
#' [hal-03658578](https://hal.science/hal-03658578)
#' 
#' @seealso [funStatTest::comp_stat()], [funStatTest::permut_pval()]
#' 
#' @export
#' 
#' @examples
#' simu_data <- simul_data(
#'     n_point = 100, n_obs1 = 50, n_obs2 = 75, c_val = 10, 
#'     delta_shape = "constant", distrib = "normal"
#' )
#'
#' MatX <- simu_data$mat_sample1
#' MatY <- simu_data$mat_sample2
#'
#' stat_wmw(MatX, MatY)
stat_wmw <- function(MatX, MatY) {
    n <- ncol(MatY)
    m <- ncol(MatX)
    npoints <- nrow(MatX)
    vecWMW <- rep(0,npoints)
    for (i in 1:m) {
        for(j in 1:n){
            diff <- MatY[,j]-MatX[,i]
            norm_diff <- norm2(diff)
            if(norm_diff > 0) {
                vecWMW <- vecWMW + diff/norm_diff
            }
        }
    }

    vecWMW <- vecWMW/(n*m)
    return(norm2(vecWMW))
}