R/nonparTrendR.R
In nonparTrendR: A Nonparametric Trend Test for Independent and Dependent Samples

Documented in nonparTrendR_test

# File: nonparTrendR/R/nonparTrendR.R

#' Nonparametric Trend Test (Bathke, 2009)
#'
#' Performs a nonparametric trend test for independent or dependent samples
#' based on Bathke (2009).
#'
#' @param data For `type = "I"` (independent samples), a list of numeric vectors,
#'   where each vector represents a group. For `type = "D"` (dependent samples),
#'   a numeric matrix where rows are subjects and columns are
#'   conditions/time points (ordered by the expected trend).
#' @param type A character string specifying the type of data:
#'   `"I"` for independent samples (uses Formula 2 from Bathke, 2009).
#'   `"D"` for dependent samples (uses Formula 4 from Bathke, 2009, for an increasing trend).
#' @param alternative A character string specifying the alternative hypothesis,
#'   must be one of `"two.sided"` (default for independent), `"increasing"`
#'   (default for dependent), or `"decreasing"`.
#'
#' @return A list with class `"htest"` containing the following components:
#'   \item{statistic}{The value of the test statistic (\eqn{\nu}).}
#'   \item{p.value}{The p-value for the test.}
#'   \item{alternative}{A character string describing the alternative hypothesis.}
#'   \item{method}{A character string indicating the type of test performed.}
#'   \item{data.name}{A character string giving the name(s) of the data.}
#'
#' @references
#' Bathke, A. C. (2009). A unified approach to nonparametric trend tests
#' for dependent and independent samples. *Metrika*, 69(1), 17-29.
#'
#' @examples
#' # --- Independent Samples Example (Table 2 from paper) ---
#' group1_indep <- c(6.62, 6.65, 5.78, 5.63, 6.05, 6.48, 5.50, 5.37)
#' group2_indep <- c(6.25, 6.95, 5.61, 5.40, 6.89, 6.24, 5.85)
#' group3_indep <- c(7.11, 5.68, 6.23, 7.11, 5.55, 5.90, 5.98, 7.14)
#' group4_indep <- c(6.93, 7.17, 7.12, 6.43, 6.96, 7.08, 7.93)
#' group5_indep <- c(7.26, 6.45, 6.37, 6.54, 6.93, 6.40, 7.01, 7.74, 7.63, 7.62, 7.38)
#' data_independent <- list(group1_indep, group2_indep, group3_indep, group4_indep, group5_indep)
#' nonparTrendR_test(data_independent, type = "I", alternative = "increasing")
#'
#' # --- Dependent Samples Example (Panic Data from paper) ---
#' panic_data_dep <- matrix(c(
#'   8, 6, 5, 5, 4,  8, 6, 5, 4, 2,  6, 5, 5, 4, 2,  6, 6, 6, 5, 5,
#'   7, 6, 6, 6, 6,  8, 7, 3, 2, 2,  7, 6, 7, 3, 3,  6, 4, 5, 3, 3,
#'   5, 4, 3, 3, 2,  8, 6, 5, 5, 4,  7, 6, 5, 4, 2,  6, 5, 5, 4, 2,
#'   6, 6, 6, 5, 5,  8, 6, 6, 6, 6,  8, 7, 4, 2, 2,  7, 6, 7, 3, 3
#' ), nrow = 16, byrow = TRUE)
#' # For increasing trend test, data should be ordered such that higher values are expected later.
#' # If testing for decreasing trend as in your example, 
#' # reverse the columns or use alternative="decreasing"
#' # Example using original order, testing for decreasing:
#' nonparTrendR_test(panic_data_dep, type = "D", alternative = "decreasing")
#' # Example reversing columns to test for increasing trend of "improvement"
#' nonparTrendR_test(panic_data_dep[, 5:1], type = "D", alternative = "increasing")
#'
#' @importFrom stats pnorm
#' @export

nonparTrendR_test <- function(data, type = c("I", "D"), alternative = NULL) {
    type <- match.arg(type)
    data_name <- deparse(substitute(data))

    if (type == "I") {
        # Use exact logic from your working script — no modifications
        if (!is.list(data) || !all(sapply(data, is.numeric))) {
            stop("'data' must be a list of numeric vectors for type = 'I'.")
        }
        if (length(data) < 2) {
            stop("At least two groups are required for type = 'I'.")
        }
        if (is.null(alternative)) {
            alternative <- "increasing"  # One-sided by default
        }
        alternative <- match.arg(alternative, c("two.sided", "increasing", "decreasing"))

        group_sizes <- sapply(data, length)
        all_obs <- unlist(data)
        N <- length(all_obs)
        a <- length(data)
        group_ids <- rep(1:a, times = group_sizes)
        group_f <- factor(group_ids)

        ranks <- rank(all_obs)
        Rik_list <- split(ranks, group_f)
        Ri_dot <- sapply(Rik_list, sum)

        # Your original weight logic
        weights <- numeric(a)
        for (i in 1:a) {
            left <- if (i > 1) sum(group_sizes[1:(i - 1)]) else 0
            right <- if (i < a) sum(group_sizes[(i + 1):a]) else 0
            weights[i] <- left - right
        }

        numerator <- sum(weights * Ri_dot)

        # Your original denominator logic
        denominator_sum <- 0
        for (i in 1:a) {
            ni <- group_sizes[i]
            Rik <- Rik_list[[i]]
            deviations <- Rik - (N + 1) / 2
            term <- (ni / (ni - 1)) * (weights[i]^2) * sum(deviations^2)
            denominator_sum <- denominator_sum + term
        }

        if (denominator_sum <= 1e-10) {
            nu_hat <- 0
        } else {
            nu_hat <- numerator / sqrt(denominator_sum)
        }

        method_str <- "Bathke Nonparametric Trend Test (Independent Samples)"
    }

    else if (type == "D") {
        # --- Dependent Samples Logic (Formula 4, for increasing trend) ---
        if (!is.matrix(data) || !is.numeric(data)) {
            stop("'data' must be a numeric matrix for type = 'D' (subjects in rows, conditions in columns).")
        }
        n_subjects <- nrow(data)
        b_conditions <- ncol(data)
        if (n_subjects <= 1 || b_conditions <= 1) {
            stop("Matrix must have at least 2 subjects and 2 conditions for type = 'D'.")
        }
        if (is.null(alternative)) {
            alternative <- "increasing" # Default for dependent
        }
        alternative <- match.arg(alternative, c("two.sided", "increasing", "decreasing"))

        N_total_obs <- n_subjects * b_conditions

        all_ranks_matrix <- matrix(rank(data, ties.method = "average"),
                                   nrow = n_subjects, ncol = b_conditions)
        R_j_dot <- colSums(all_ranks_matrix)
        condition_scores <- (1:b_conditions) - (b_conditions + 1) / 2

        if (alternative == "decreasing") {
            condition_scores <- rev(condition_scores)
        }

        numerator <- sum(condition_scores * R_j_dot)

        s_v_hat_squared <- 0
        mean_overall_rank <- (N_total_obs + 1) / 2
        for (j in 1:b_conditions) {
            sum_sq_dev_j <- sum((all_ranks_matrix[, j] - mean_overall_rank)^2)
            s_v_hat_squared <- s_v_hat_squared + (condition_scores[j]^2 * sum_sq_dev_j)
        }
        for (j in 1:b_conditions) {
            for (l in 1:b_conditions) {
                if (j == l) next
                sum_prod_dev_jl <- sum((all_ranks_matrix[, j] - mean_overall_rank) *
                                       (all_ranks_matrix[, l] - mean_overall_rank))
                s_v_hat_squared <- s_v_hat_squared + (condition_scores[j] * condition_scores[l] * sum_prod_dev_jl)
            }
        }

        if (s_v_hat_squared <= 1e-10) {
            nu_hat <- 0
        } else {
            s_nu <- sqrt(s_v_hat_squared)
            nu_hat <- numerator / s_nu
        }

        method_str <- "Bathke Nonparametric Trend Test (Dependent Samples)"
    }

    # --- P-value Calculation ---
    if (alternative == "two.sided") {
        p_value <- 2 * (1 - pnorm(abs(nu_hat)))
    } else if (alternative == "increasing") {
        p_value <- 1 - pnorm(nu_hat)
    } else {
        p_value <- pnorm(nu_hat)
    }

    # Final output
    names(nu_hat) <- "nu_hat"
    return(structure(list(statistic = nu_hat, p.value = p_value, alternative = alternative,
                          method = method_str, data.name = data_name), class = "htest"))
}