#########
# To-do #
#########
# Add a flag to also run the sign.test
# Have nice output processing of the returned object
###Trinomial Test Function
#' Trinomial Test
#'
#' Performs the trinomial test on either two data vectors or a vector of containing
#' the counts of positive, tied, and negative differences.
#'
#' @param col1 first data vector or a vector of (positive, tie, negative) counts of differences.
#' @param col2 second data vector.
#' @param alternative the type of hypothesis test to be performed.
#' @param p_tie theoretical proportion of ties; calculated from the data by default.
#' @param print_info logical. If \code{TRUE} the hypotheses and p-value of the test are returned.
#'
#' @return An output object containing the number of observations; test statistic;
#' p-value; hypotheses; number of positive, tied, or negative differences;
#' critical value; and reverse flag notice for the trinomial test.
#' @export
#'
#' @examples
trinomial_test <- function(col1,
col2 = NULL,
alternative = c("two.sided", "greater", "less"),
p_tie = NULL,
print_info = TRUE) {
if (length(alternative) > 1) {
alternative <- alternative[1]
}
##############################################################################
#### Section 1: process the data
if (is.null(col2)) { # user has entered a vector of counts; the sum of the values is n
ns <- list(n_pos = col1[1],
n_tie = col1[2],
n_neg = col1[3])
# order determined by the Bian et al. (2011) article
n <- sum(col1)
} else { # user has entered two vectors of raw data to be subtracted; the length of the vectors is n
columns <- cbind(col1, col2) #Merge the two columns
dat <- data.frame(columns) #Create data frame with the columns
numrow1 <- nrow(dat) #Find the length of the data frame to see number of observations
dat <- dat[stats::complete.cases(dat), ] #Remove empty/NA columns
numrow2 <- nrow(dat) #Find the length of the data frame after clearing any rows with missing data values
if (numrow1 > numrow2) {
print(paste((numrow1 - numrow2), " observations have been removed due to missing values."))
col1 <- dat$col1 #Makes new versions of col1 and col2 without NA values,
col2 <- dat$col2 # but the observations are still paired properly.
}
#Create variables
n <- numrow2
ns <- calculate_ns(col1, col2)
}
##############################################################################
#### Section 2: Compute summary statistic n_diff and do sanity checks
n_diff <- ns$n_pos - ns$n_neg #This is n_d in the paper
reverse_flag <- FALSE
if (n_diff < 0) {
print("Calculated test statistic is negative.")
print("Continuing with the roles of col1 and col2 switched.") #Different statement for when col2=NULL?
print("Test statistic must be positive; provided data had negative differences count exceeding positive differences count")
reverse_flag <- TRUE
n_diff <- ns$n_neg - ns$n_pos
}
if (is.null(p_tie)){
p_tie <- ns$n_tie / n #This is p_o in the paper - we can consider making this adjustable further on if the user has any theoretical knowledge of p_o
}
p_value <- 0 #Create p-value variable
pivot_prob <- 0
##############################################################################
#### Section 3: Compute the initial p-value
# Note that prob_nd_cumsum is not used to account for greater, less, or two.sided tests
#Loop to compute the p-value
for (j in n_diff:n) {
for (k in 0:((n - j) / 2)) {
prob <- prob_nd(n = n, nd = j, k = k, p_tie = p_tie)
p_value <- p_value + prob
}
if (j == n_diff) {
pivot_prob <- p_value # store the value of the probability that is included in both greater and less
}
}
# This line should do nothing, so comment it out
#prob_nd(n=n,nd=n_diff,k=k,p_tie=p_tie)
##############################################################################
#### Section 4: Make adjustments based on the sidedness of the test and sanity checks, return the result
if (alternative == "greater") { #One-sided test
if (reverse_flag) {
true_p_val <- 1 - p_value + pivot_prob
} else {
true_p_val <- p_value
}
} else if (alternative == "less") {
if (reverse_flag) {
true_p_val <- p_value
} else {
true_p_val <- 1 - p_value + pivot_prob
}
} else if (alternative == "two.sided") {
true_p_val <- p_value * 2
if (n_diff == 0) {
true_p_val <- true_p_val - pivot_prob
}
}
critval <- generate_trinomial_info(n = n, P0s = p_tie, find_RR = FALSE)$critvals
# TO DO: make this look nicer (i.e., mimic the output of, say, t.test)
if (print_info) {
if (alternative == "greater") {
alternative_output <- c("Alternative hypothesis: prob_pos > prob_neg")
} else if (alternative == "less") {
alternative_output <- c("Alternative hypothesis: prob_pos < prob_neg")
} else if (alternative == "two.sided") {
alternative_output <- c("Alternative hypothesis: prob_pos != prob_neg")
}
# if (reverse_flag){
# print("")
# print("NOTE: Sanity check of data and alternative triggered.")
# print("reported p-value is for the OTHER sided test")
# }
cat("",
" Trinomial Test (Ganesalingam, 1994; Bian et al., 2011)",
"",
"Null hypothesis: prob_pos = prob_neg",
alternative_output,
"",
paste("statistic:", n_diff), paste("p-value:", true_p_val), sep = "\n")
}
# Work on a better return value
invisible(list(N = n,
statistic = n_diff,
p_value = true_p_val,
alternative = alternative,
Ns = ns,
critval = critval,
reverse_flag = reverse_flag))
}
# We can probably move this to internal for now.
################################################################################
# Generate information about the trinomial test under various settings
#' Show Trinomial Test Information
#'
#' Returns a range of information relevant to a trinomial test subject to various settings.
#'
#' @param n number of paired observations.
#' @param alpha significance level.
#' @param include_one logical. If \code{FALSE} the probability that all pairs are tied is excluded from \code{P0s}.
#' @param find_RR logical. If \code{TRUE} the output will include the rejection region.
#' @param digits number of significant digits to be returned.
#' @param P0s numeric vector of all possible probabilities of a tie.
#'
#' @return A list containing possible probability distributions, numbers of ties,
#' probabilities of a tie, critical values, and rejection regions for a given
#' number of observations and alpha level of a theoretical trinomial test.
#' @export
#'
#' @examples
generate_trinomial_info <- function(n,
alpha = 0.05,
include_one = TRUE,
find_RR = TRUE,
digits = NULL,
P0s = NULL) {
# Set up basic storage vectors
tmp_sum <- c()
probs_table <- c()
xs <- 0:n
# set up possible probabilities of a tie
# P0 is the probability of a tie; an unbiased estimate is n_tie / n (see last paragraph of section 3 p. 1156)
# So, for each possible number of ties, compute the estimate P0
# This currently makes no provision for the user to specify the P0s
if (is.null(P0s)) {
if (include_one) {
P0s <- (0:(n)) / n
} else {
P0s <- (0:(n - 1)) / n
}
}
# For each possible probability of a tie, compute the probability of observing 0 to n successes
# This uses the distribution function given by Bian et al. (2011) on page 1156
for (p0 in P0s) {
for (i in 1:length(xs)) {
tmp_sum[i] <- prob_nd_cumsum(n = n, nd = xs[i], p_tie = p0)
}
probs_table <- rbind(probs_table, tmp_sum[(n + 1):1])
}
rownames(probs_table) <- paste("p0=", P0s, sep = "")
colnames(probs_table) <- paste("nd=", n:0, sep = "")
if (!is.null(digits)) {
probs_table <- round(probs_table, digits)
}
probs_obj <- list(probs_table = probs_table, nd = n:0, P0s = P0s)
# try replacing with next line to get rid of $critvals$critvals in find_rejection_region
#probs_obj <- c(probs_obj, find_critical_values(probs_obj, alpha = alpha))
probs_obj$critvals <- find_critical_values(probs_obj, alpha = alpha)
if (find_RR) {
probs_obj$RejectionRegion <- find_rejection_region(probs_obj)
}
return(probs_obj)
}
## This seems to only work for n <= 170
#
# If n > 170, then the following error occurs:
# Error in if (any(x < 0)) stop("'x' must be non-negative") :
# missing value where TRUE/FALSE needed
# Called from: FUN(newX[, i], ...)
#
## The problem seems to be due to prob_nd_cumsum
#
# > GridItemTools:::prob_nd_cumsum(n=171,nd=0,p_tie=0)
# [1] NaN
#
## Which in turn is because of factorial(n)
#
# > factorial(170)
# [1] 7.257416e+306
# > factorial(171)
# [1] Inf
#
## The answer might be to cleverly re-write that code using the choose function (multiplied by a value to make it work right)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.