R/mantel_comparison.R

Defines functions mantel_comparison

Documented in mantel_comparison

#' @title Ensemble Mantel Tests
#' @description Calculates Mantel test statistics for differences between
#' distance matrices for a list of distance matrices (one per preprocessing
#' method) supplied by the `scaling_comparison()` function.
#'
#' @param distance_matrices A list of document distance matrices generated by
#' the `scaling_comparison()` and returned in the `$distance_matrices` field.
#' @param labels Optional argument giving names for each preprocessing step.
#' This is generated by the `factorial_preprocessing()` function and returned in
#' the `$labels` field.
#' @param permutations The number of permutations to be used in each Mantel
#' test. Defaults to 1000.
#' @return A result list object where the first entry is a matrix summarizing
#' mantel test statistics. The second object in the list is a matrix of the
#' values described above. The third object is a list of all raw mantel results.
#' @examples
#' \dontrun{
#' # load the package
#' library(preText)
#' # load in the data
#' data("UK_Manifestos")
#' # preprocess data
#' preprocessed_documents <- factorial_preprocessing(
#'     UK_Manifestos,
#'     use_ngrams = TRUE,
#'     infrequent_term_threshold = 0.02,
#'     verbose = TRUE)
#' # scale documents
#' scaling_results <- scaling_comparison(preprocessed_documents$dfm_list,
#'                                       dimensions = 2,
#'                                       distance_method = "cosine",
#'                                       verbose = TRUE)
#' # run mantel tests
#' mantel_results <- mantel_comparison(scaling_results$distance_matrices,
#'                                     labels = preprocessed_documents$labels,
#'                                     permutations = 1000)
#' }
#' @export
mantel_comparison <- function(distance_matrices,
                              labels = NULL,
                              permutations = 1000){
    names <- labels
    cur_dm <- test_against <- NULL
    # get the number of distance matrices
    num_dms <- length(distance_matrices)

    # ceate data structures to store information
    result_list <- vector(mode = "list", length = num_dms*(num_dms - 1))
    mantel_matrix <- matrix(0, nrow = num_dms, ncol = num_dms)
    result_summary <- matrix(0, nrow = num_dms*(num_dms - 1)/2,ncol = 5)
    summary_counter <- 1
    list_counter <- 1

    colnames(result_summary) <- c("statistic",
                                  "p_value_one_tail",
                                  "p_value_two_tail",
                                  "lower_limit",
                                  "upper_limit")

    # set default names
    rownames(result_summary) <- as.character(1:(num_dms*(num_dms - 1)/2))
    names(result_list) <- as.character(1:(num_dms*(num_dms - 1)))

    if (!is.null(names)) {
        colnames(mantel_matrix) <- rownames(mantel_matrix) <- names
    }

    for (i in 1:num_dms) {
        cat("Currently working on preprocessing choice",i,"of",num_dms,"\n")
        ptm <- proc.time()
        # get the current focal distance matrix
        cur_dm <<- distance_matrices[[i]]

        # now loop over all of the others
        for (j in 1:num_dms) {
            if (i != j){

                test_against <<- distance_matrices[[j]]
                result <- ecodist::mantel(formula = "cur_dm ~ test_against",
                                          nperm = permutations)

                mantel_matrix[i,j] <- as.numeric(result[1])
                if (j > i) {
                    result_summary[summary_counter,1] <- as.numeric(result[1])
                    result_summary[summary_counter,2] <- as.numeric(result[2])
                    result_summary[summary_counter,3] <- as.numeric(result[4])
                    result_summary[summary_counter,4] <- as.numeric(result[5])
                    result_summary[summary_counter,5] <- as.numeric(result[6])

                    # give things the right row names if they were provided
                    if (!is.null(names)) {
                        rownames(result_summary)[summary_counter] <-
                            paste(names[i],"<->",names[j], sep = "")
                    }

                    summary_counter <- summary_counter + 1
                }
                mantel_matrix[i,j] <- as.numeric(result[1])

                result_list[[list_counter]] <- result

                # give things the right row names if they were provided
                if (!is.null(names)) {
                    names(result_list)[list_counter] <-
                        paste(names[i],"<->",names[j], sep = "")
                }

                list_counter <- list_counter + 1
            }
        }

        t2 <- proc.time() - ptm
        cat("Complete in:",t2[[3]],"seconds...\n")
    }

    return(list(summary = result_summary,
                mantel_matrix = mantel_matrix,
                raw_results = result_list))

}
matthewjdenny/preText documentation built on July 27, 2021, 1:18 a.m.