R/single_binary_test.R

#' Perform goodness-of-fit tests on a single binary chain.
#'
#' \code{single_binary_test} is used to preform goodness-of-fit tests on single
#' binary chains of data to see if a Markov chain model is appropriate.
#'
#' \code{single_binary_test} works by taking the supplied \code{binary_chain}
#' parameter, counting the transitions between different elements, and then
#' generating \code{n} new chains with the same number of transitions. It
#' generates these new chains by attempting to swap random elements of the chain
#' \code{swaps} times, only doing so if the attempted swap preserves the number
#' of transitions between the two unique elements of the chain.
#' \code{single_binary_test} then saves the chain generated by this process,
#' then preforms a number of swaps equivalent to the value of \code{swaps} on
#' that chain again, then recording the result in a matrix of new data.
#' \code{single_binary_test} does this \code{n} times to generate the \code{n}
#' new chains. These new chains are effectively independent of the original one.
#'
#' Once \code{single_binary_test} has generated new data, it preforms various
#' tests on that data. Included in the function are the likelihood ratio test,
#' the Pearson's chi square test, and a run test for a run of length specified
#' by the argument \code{run}.
#'
#' @param binary_chain A one dimensional vector with two unique values.
#' @param swaps A positive nonzero integer value for the number of swaps to be
#' attempted on the chain. Larger numbers will tend to yield "more independent"
#' data. Generally, the number of swaps should be far greater than the length
#' of \code{binary_chain}.
#' @param n A positive nonzero integer value representing the number of new
#' chains to be generated.
#' @param run The length of run to test for if one is interested in run test
#' statistics.
#' @param tiles The number of chains to be represented in the tile plot when
#' one plots objects generated by \code{single_binary_test}
#' @param bins The number of bins to be displayed in histograms of test
#' statistics when one plots objects generated by \code{single_binary_test}.
#' @param success Denotes the data entry to be counted for run statistics.
#'
#' @examples
#' data <- c(1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0)
#' foo <- single_binary_test(binary_chain = data, swaps = 10000, n = 10000,
#' run = 4, tiles = 32, bins = 100, success = 1)
#'
#' @return \code{single_binary_test} returns a list of \link{class}
#' \code{"single_binary_test"} with the following elements:
#'
#' \code{data}, a matrix of data with \code{binary_chain} in the first row, and
#' the generated n rows of data in the following columns.
#'
#' \code{test_stats_lrt}, a vector of likelihood ratio test statistics for each
#' row of data in \code{data}.
#'
#' \code{test_stats_chi_sq},  a vector of Pearson's chi square test statistics
#' for each row of data in \code{data}.
#'
#' \code{test_stats_run},  a vector of run test statistics for a run of length
#' \code{run} for each row of data in \code{data}.
#'
#' \code{p_value_lrt}, the p-value of \code{binary_chain} calculated exactly
#' from the distribution of \code{test_stats_lrt}
#'
#' \code{p_value_chi_sq}, the p-value of \code{binary_chain} calculated exactly
#' from the distribution of \code{test_stats_chi_sq}
#'
#' \code{p_value_run}, the p-value of \code{binary_chain} calculated exactly
#' from the distribution of \code{test_stats_run}
#'
#' \code{call}, the function call.
#'
#' \code{run}, the length of run specified in the function call.
#'
#' \code{tiles}, the number of tiles specified in the function call.
#'
#' \code{bins}, the number of bins specified in the function call.
#' @export
single_binary_test <- function(binary_chain, swaps = 1000, n = 1000, run = 4,
                               tiles = 30, bins = 30, success = NULL) {
    # Block checks tiles input against b.

    if (tiles > n + 1) {
        print("Warning: you asked for more tiles than generated data.")
        tiles <- min(n + 1, 30)
    }
    # End Block

    # Block checks binary chain, alters it if necessary.

    if (check_false_binary(binary_chain) == FALSE) {
        print("Error: the argument binary_chain must be a set of binary data")
        return(NULL)
    }

    if (check_true_binary(binary_chain) == FALSE) {
        binary_chain <- alter_to_true_binary(binary_chain, success)
    }

    # End block.
    # Block generates data, p-values, returns list of all info with class
    # 'single_binary_test'.

    n_chain_uniques <- length(unique(binary_chain))
    data <- metropolis(binary_chain, swaps, n)
    test_stats_lrt <- u1_test_stat_array(data, n_chain_uniques)
    test_stats_chi_sq <- chi_sq_test_stat_array(data, n_chain_uniques)
    test_stats_run <- run_test_stat_array(data, run)

    p_value_lrt <- vec_greater_than(test_stats_lrt)/(n + 1)
    p_value_chi_sq <- vec_greater_than(test_stats_chi_sq)/(n + 1)
    p_value_run <- vec_greater_than(test_stats_run)/(n + 1)

    out <- list(data =  matrix(unlist(data), ncol = length(binary_chain),
                               byrow = TRUE),
                test_stats_lrt = test_stats_lrt,
                test_stats_chi_sq = test_stats_chi_sq,
                test_stats_run = test_stats_run,
                p_value_lrt = p_value_lrt,
                p_value_chi_sq = p_value_chi_sq,
                p_value_run = p_value_run,
                call = match.call(),
                run = run,
                tiles = tiles,
                bins = bins)
    class(out) <- "single_binary_test"
    return(out)
    # End block.
}
cwcartmell/maRkov documentation built on May 14, 2019, 1:37 p.m.