#' @title preText Test
#' @description Calculates preText scores for each preprocessing specification.
#'
#' @param preprocessed_documents A list object generated by the
#' `factorial_preprocessing()` function.
#' @param dataset_name A string indicating the name to be associated with the
#' results. Defaults to "Documents".
#' @param distance_method The method that should be used for calculating
#' document distances. Defaults to "cosine".
#' @param num_comparisons If method = "distribution", the number of ranks to use
#' in calculating average difference. Defaults to 50.
#' @param parallel Logical indicating whether factorial preprocessing should be
#' performed in parallel. Defaults to FALSE.
#' @param cores Defaults to 1, can be set to any number less than or equal to
#' the number of cores on one's computer
#' @param verbose Logical indicating whether more information should be printed
#' to the screen to let the user know about progress. Defaults to TRUE.
#' @return A result list object.
#' @examples
#' \dontrun{
#' # load the package
#' library(preText)
#' # load in the data
#' data("UK_Manifestos")
#' # preprocess data
#' preprocessed_documents <- factorial_preprocessing(
#' UK_Manifestos,
#' use_ngrams = TRUE,
#' infrequent_term_threshold = 0.02,
#' verbose = TRUE)
#' # run preText
#' preText_results <- preText(
#' preprocessed_documents,
#' dataset_name = "Inaugural Speeches",
#' distance_method = "cosine",
#' num_comparisons = 100,
#' verbose = TRUE)
#' }
#' @export
preText <- function(preprocessed_documents,
dataset_name = "Documents",
distance_method = "cosine",
num_comparisons = 50,
parallel = FALSE,
cores = 1,
verbose = TRUE){
ptm <- proc.time()
# extract teh dfm object list from preprocessed_documents
dfm_object_list <- preprocessed_documents$dfm_list
cat("Generating document distances...\n")
# get document distances
scaling_results <- scaling_comparison(dfm_object_list,
dimensions = 2,
distance_method = distance_method,
verbose = verbose,
cores = cores)
# extract distance matrices
distance_matrices <- scaling_results$distance_matrices
cat("Generating preText Scores...\n")
preText_results <- preText_test(
distance_matrices,
choices = preprocessed_documents$choices,
labels = preprocessed_documents$labels,
baseline_index = length(preprocessed_documents$labels),
text_size = 1,
num_comparisons = num_comparisons,
parallel = parallel,
cores = cores,
verbose = verbose)
preText_scores <- preText_results$dfm_level_results_unordered
cat("Generating regression results..\n")
reg_results <- preprocessing_choice_regression(
Y = preText_scores$preText_score,
choices = preprocessed_documents$choices,
dataset = dataset_name,
base_case_index = length(preprocessed_documents$labels))
cat("Regression results (negative coefficients imply less risk):\n")
# create temporary results os we can round coefficients
reg_results2 <- reg_results
reg_results2[,1] <- round(reg_results2[,1],3)
reg_results2[,2] <- round(reg_results2[,2],3)
print(reg_results2[,c(3,1,2)])
t2 <- proc.time() - ptm
cat("Complete in:",t2[[3]],"seconds...\n")
#extract relevant info
return(list(preText_scores = preText_scores,
ranked_preText_scores = preText_results$dfm_level_results,
choices = preprocessed_documents$choices,
regression_results = reg_results))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.