R/multiple_binary_test.R

#' Perform goodness-of-fit tests on multiple binary chains.
#'
#' \code{multiple_binary_test} is used to preform goodness-of-fit tests on
#' multiple binary chains of data of the same length to see if a Markov chain
#' model is appropriate.
#'
#' \code{multiple_binary_test} works by taking the supplied \code{binary_chains}
#' parameter, counting the transitions between different elements, and then
#' generating \code{n} new sets of chains with the same number of transitions.
#' It generates these new sets of chains by attempting to swap random elements
#' of the chains \code{swaps} times, only doing so if the attempted swap
#' preserves the number of transitions between the two unique elements of the
#' chains. \code{multiple_binary_test} then saves the chain generated by this
#' process, then preforms a number of swaps equivalent to the value of
#' \code{swaps} on that chain again, then recording the result in a new entry in
#' a list of data. \code{multiple_binary_test} does this \code{n} times to
#' generate the \code{n} new sets of chains. These new sets of chains are
#' effectively independent of the original one.
#'
#' Once \code{multiple_binary_test} has generated new data, it preforms various
#' tests of that data. included in the function are the likelihood ratio test,
#' the Pearson's chi square test, and a run test for a run of length specified
#' by the argument \code{run}.
#'
#' @param binary_chains A two dimensional matrix, in which there are two unique
#' values.
#' @param swaps A positive nonzero integer value for the number of swaps to be
#' attempted on the chain. Larger values will tend to yield "more independent"
#' data. Generally, the number of swaps should be much larger then the number of
#' elements in the matrix \code{binary_chains}.
#' @param n A positive nonzero integer representing the number of new sets of
#' chains to be generated.
#' @param run The length of the run to test for if one is interested in run
#' test statistics.
#' @param bins The number of bins to be displayed in histograms of test
#' statistics when one plots objects generated by \code{multiple_binary_test}.
#' @param success Denotes the data entry to be counted for run statistics.
#'
#' @examples
#' data <- as.matrix(maRkov::snoqualmie)
#' foo <- multiple_binary_test(binary_chains = data, swaps = 10000, n = 10000,
#'                             run = 3, bins = 50)
#'
#' @return  \code{multiple_binary_test} returns a list of \link{class}
#' \code{"multiple_binary_test"} with the following elements:
#'
#' \code{data}, a list of matrices, the first of which is binary_chains, and
#' the rest of which are the generated data.
#'
#' \code{test_stats_lrt}, vector of likelihood ratio test statistics for each
#' element of list \code{data}.
#'
#' \code{test_stats_chi_sq}, a vector of Pearson's chi square test statistics
#' for each element of list \code{data}.
#'
#' \code{test_stats_run}, a vector of run test statistics for a run of length
#' \code{run} for each element of list \code{data}.
#'
#' \code{p_value_lrt}, the p-value of \code{binary_chain}, calculated exactly
#' from the distribution of \code{test_stats_lrt}.
#'
#' \code{p_value_chi_sq}, the p-value of \code{binary_chain}, calculated exactly
#' from the distribution of \code{test_stats_chi_sq}.
#'
#' \code{p_value_run}, the p-value of \code{binary_chain}, calculated exactly
#' from the distribution of \code{test_stats_run}.
#'
#' \code{call}, the function call.
#'
#' \code{bins}, the number of bins specified in the function call.
#'
#' \code{run}, the length of run specified in the function call.
#' @export
multiple_binary_test <- function(binary_chains, swaps = 1000, n = 1000,
                                 run = 4, bins = 30, success = NULL) {
    # Block checks binary chain, alters it if nessecary.

    if (check_false_binary_multiple(binary_chains) == FALSE) {
        print("Error: the argument binary_chain must be a set of binary data")
        return(NULL)
    }

    if (check_true_binary_multiple(binary_chains) == FALSE) {
        binary_chain <- alter_to_true_binary_multiple(binary_chain, success)
    }

    # End block.

    # Block generates data, p-values, returns list of all info with class
    # 'single.binary.test'.

    uniques <- c()
    for (i in 1:nrow(binary_chains)) {
        uniques <- union(uniques, unique(binary_chains[i, ]))
    }
    n_chain_uniques <- length(uniques)
    temp_data <- multiple_metropolis(binary_chains, swaps, n)
    test_stats_lrt <- u6_test_stat_array(temp_data, n_chain_uniques)
    test_stats_chi_sq <- multiple_chi_sq_test_stat_array(temp_data, n_chain_uniques)
    test_stats_run <- multiple_run_test_stat_array(temp_data, run)

    p_value_lrt <- vec_greater_than(test_stats_lrt) / ( n + 1)
    p_value_chi_sq <- vec_greater_than(test_stats_chi_sq) / ( n + 1)
    p_value_run <- vec_greater_than(test_stats_run) / ( n + 1)

    out <- list(data = lapply(temp_data,
                             function(x) matrix(unlist(x),
                                                ncol = ncol(binary_chains),
                                                byrow = TRUE)),
               test_stats_lrt = test_stats_lrt,
               test_stats_chi_sq = test_stats_chi_sq,
               test_stats_run = test_stats_run,
               p_value_lrt = p_value_lrt,
               p_value_chi_sq = p_value_chi_sq,
               p_value_run = p_value_run,
               call = match.call(),
               bins = bins,
               run = run)

    class(out) <- "multiple_binary_test"

    return(out)

    # End block.
}
cwcartmell/maRkov documentation built on May 14, 2019, 1:37 p.m.