#' @title preText specification plot
#' @description preText plots for each preprocessing specification.
#'
#' @param preText_results The output from the `preText_test()` or
#' `preText()` functions.
#' @param display_raw_rankings Logical indicating whether raw ranking
#' differences should be displayed (as opposed to relative differences).
#' @param remove_labels Option to remove preprocessing step labels. Defaults to
#' FALSE.
#' @param num_docs If display_raw_rankings = TRUE, the number of documents in
#' the corpus.
#' @param text_size The `cex` for text in dot plot generated by function.
#' Defaults to 1.
#' @return A plot
#' @examples
#' \dontrun{
#' # load the package
#' library(preText)
#' # load in the data
#' data("UK_Manifestos")
#' # preprocess data
#' preprocessed_documents <- factorial_preprocessing(
#' UK_Manifestos,
#' use_ngrams = TRUE,
#' infrequent_term_threshold = 0.02,
#' verbose = TRUE)
#' # run preText
#' preText_results <- preText(
#' preprocessed_documents,
#' dataset_name = "Inaugural Speeches",
#' distance_method = "cosine",
#' num_comparisons = 100,
#' verbose = TRUE)
#' # generate preText score plot
#' preText_score_plot(preText_results)
#' }
#' @export
preText_score_plot <- function(preText_results,
display_raw_rankings = FALSE,
remove_labels = FALSE,
num_docs = NULL,
text_size = 1){
Coefficient_Type <- Variable <- Coefficient <- NULL
if (display_raw_rankings) {
multiplier <- (num_docs * (num_docs - 1))/2
} else {
multiplier <- 1
}
if (!is.null(preText_results$dfm_level_results)) {
results <- preText_results$dfm_level_results
} else {
results <- preText_results$ranked_preText_scores
}
data <- data.frame(Coefficient = results$preText_score*multiplier,
Coefficient_Type = "Difference",
Variable = results$preprocessing_steps,
stringsAsFactors = FALSE)
data$Variable <- factor(data$Variable,
levels = data$Variable[1:nrow(data)])
UMASS_BLUE <- rgb(51,51,153,195,maxColorValue = 255)
UMASS_RED <- rgb(153,0,51,195,maxColorValue = 255)
zp1 <- ggplot2::ggplot(data, ggplot2::aes(colour = Coefficient_Type)) +
ggplot2::scale_color_manual(values = UMASS_BLUE)
zp1 <- zp1 + ggplot2::theme(axis.text =
ggplot2::element_text(size = text_size))
zp1 <- zp1 + ggplot2::geom_hline(yintercept = 0,
colour = gray(1/2),
lty = 2)
zp1 <- zp1 + ggplot2::geom_point(ggplot2::aes(x = Variable,
y = Coefficient),
lwd = 1,
shape = 21,
fill = UMASS_BLUE)
if (remove_labels) {
zp1 <- zp1 + ggplot2::theme_bw() +
ggplot2::coord_flip() +
ggplot2::theme(legend.position = "none",
axis.text.y= ggplot2::element_blank(),
axis.ticks.y= ggplot2::element_blank()) +
ggplot2::xlab("Preprocessing Combination")
} else {
zp1 <- zp1 + ggplot2::theme_bw() +
ggplot2::coord_flip() +
ggplot2::theme(legend.position = "none") +
ggplot2::xlab("Preprocessing Combination")
}
if (display_raw_rankings) {
zp1 <- zp1 + ggplot2::ylab("Unnormalized preText Score")
} else {
zp1 <- zp1 + ggplot2::ylab("preText Score")
}
print(zp1)
}
# for testing
# load("~/Dropbox/Preprocessing_Decisions/Data/Scaling/UK_Manifestos_Scaling_Results.Rdata")
# distance_matrices <- scaling_results$distance_matrices
# load("~/Dropbox/Preprocessing_Decisions/Data/128_Combination_Preprocessing_Labels.Rdata")
# load("~/Dropbox/Preprocessing_Decisions/Data/Scaling/Preprocessing_Choices.Rdata")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.