R/preText_score_plot.R

Defines functions preText_score_plot

Documented in preText_score_plot

#' @title preText specification plot
#' @description preText plots for each preprocessing specification.
#'
#' @param preText_results The output from the `preText_test()` or
#' `preText()` functions.
#' @param display_raw_rankings Logical indicating whether raw ranking
#' differences should be displayed (as opposed to relative differences).
#' @param remove_labels Option to remove preprocessing step labels. Defaults to
#' FALSE.
#' @param num_docs If display_raw_rankings = TRUE, the number of documents in
#' the corpus.
#' @param text_size The `cex` for text in dot plot generated by function.
#' Defaults to 1.
#' @return A plot
#' @examples
#' \dontrun{
#' # load the package
#' library(preText)
#' # load in the data
#' data("UK_Manifestos")
#' # preprocess data
#' preprocessed_documents <- factorial_preprocessing(
#'     UK_Manifestos,
#'     use_ngrams = TRUE,
#'     infrequent_term_threshold = 0.02,
#'     verbose = TRUE)
#' # run preText
#' preText_results <- preText(
#'     preprocessed_documents,
#'     dataset_name = "Inaugural Speeches",
#'     distance_method = "cosine",
#'     num_comparisons = 100,
#'     verbose = TRUE)
#' # generate preText score plot
#' preText_score_plot(preText_results)
#' }
#' @export
preText_score_plot <- function(preText_results,
                               display_raw_rankings = FALSE,
                               remove_labels = FALSE,
                               num_docs = NULL,
                               text_size = 1){

    Coefficient_Type <- Variable <- Coefficient <- NULL

    if (display_raw_rankings) {
        multiplier <- (num_docs * (num_docs - 1))/2
    } else {
        multiplier <- 1
    }

    if (!is.null(preText_results$dfm_level_results)) {
        results <- preText_results$dfm_level_results
    } else {
        results <- preText_results$ranked_preText_scores
    }

    data <- data.frame(Coefficient = results$preText_score*multiplier,
                       Coefficient_Type = "Difference",
                       Variable = results$preprocessing_steps,
                       stringsAsFactors = FALSE)

    data$Variable <- factor(data$Variable,
                            levels = data$Variable[1:nrow(data)])

    UMASS_BLUE <- rgb(51,51,153,195,maxColorValue = 255)
    UMASS_RED <- rgb(153,0,51,195,maxColorValue = 255)

    zp1 <- ggplot2::ggplot(data, ggplot2::aes(colour = Coefficient_Type)) +
        ggplot2::scale_color_manual(values = UMASS_BLUE)


        zp1 <- zp1 + ggplot2::theme(axis.text =
                                        ggplot2::element_text(size = text_size))


    zp1 <- zp1 + ggplot2::geom_hline(yintercept = 0,
                                     colour = gray(1/2),
                                     lty = 2)
    zp1 <- zp1 + ggplot2::geom_point(ggplot2::aes(x = Variable,
                                                  y = Coefficient),
                                     lwd = 1,
                                     shape = 21,
                                     fill = UMASS_BLUE)

    if (remove_labels) {
        zp1 <- zp1  + ggplot2::theme_bw() +
            ggplot2::coord_flip() +
            ggplot2::theme(legend.position = "none",
                           axis.text.y= ggplot2::element_blank(),
                           axis.ticks.y= ggplot2::element_blank()) +
            ggplot2::xlab("Preprocessing Combination")
    } else {
        zp1 <- zp1  + ggplot2::theme_bw() +
            ggplot2::coord_flip() +
            ggplot2::theme(legend.position = "none") +
            ggplot2::xlab("Preprocessing Combination")
    }

    if (display_raw_rankings) {
        zp1 <- zp1  + ggplot2::ylab("Unnormalized preText Score")
    } else {
        zp1 <- zp1  + ggplot2::ylab("preText Score")
    }


    print(zp1)


}
# for testing
# load("~/Dropbox/Preprocessing_Decisions/Data/Scaling/UK_Manifestos_Scaling_Results.Rdata")
# distance_matrices <- scaling_results$distance_matrices
# load("~/Dropbox/Preprocessing_Decisions/Data/128_Combination_Preprocessing_Labels.Rdata")
# load("~/Dropbox/Preprocessing_Decisions/Data/Scaling/Preprocessing_Choices.Rdata")
matthewjdenny/preText documentation built on July 27, 2021, 1:18 a.m.