R/compute_number_of_paths.R

Defines functions plot_number_of_paths compute_number_of_paths

Documented in compute_number_of_paths plot_number_of_paths

#' Computes the number of paths
#'
#' This function computes the number of topics recursively in a backwards
#' fashion. At the final level of the alignment, the path of each topic is the
#' topic index. At intermediate levels, paths are built following the edges
#' with the highest weights from children topics to parent topics.
#'
#' Given an \code{alignment} object, this function computes the number of
#' paths at each level in the alignment. The result is a \code{data.frame}
#' mapping each level to a number from 1 to the number of current levels.
#'
#' @param aligned_topics (required) An \code{alignment} class with the
#' alignment results.
#' @param plot (optional, default = FALSE) whether to visualize the number of
#' paths.
#'
#' @seealso align_topics
#' @return a \code{data.frame} with the number of paths for each model. The
#' \code{n_topics} column shows the total possible number of topics available
#' for that model while \code{n_paths} gives the number of paths in that model.
#'
#' @examples
#' library(purrr)
#' data <- rmultinom(10, 20, rep(0.1, 20))
#' lda_params <- setNames(map(1:5, ~ list(k = .)), 1:5)
#' lda_models <- run_lda_models(data, lda_params)
#' alignment <- align_topics(lda_models)
#' compute_number_of_paths(alignment, plot = TRUE)
#'
#' @importFrom ggplot2 ggplot geom_line geom_point aes guides theme_minimal
#' scale_x_continuous scale_y_continuous labs %+%
#' @importFrom dplyr group_by summarise n n_distinct
#' @importFrom magrittr %>%
#' @export
compute_number_of_paths <- function(aligned_topics, plot = FALSE) {
  n_paths <-
    aligned_topics@topics %>%
    group_by(m) %>%
    summarise(
      n_paths = n_distinct(path),
      n_topics = n_distinct(k),
      .groups = "drop"
    )

  if (plot) plot_number_of_paths(n_paths) %>% print()
  n_paths
}


#' Plots the number of paths
#'
#' This function plots the number of paths *vs.* the number of
#' topics in each model.
#'
#' @param n_paths A \code{data.frame} with columns \code{n_topics}
#' and \code{n_key_topics}, as generated by \code{compute_number_of_paths}
#'
#' @seealso \code{compute_number_of_paths}
#'
#' @importFrom ggplot2 ggplot aes geom_line geom_point guides theme_minimal
#'  scale_x_continuous scale_y_continuous labs
#' @export
plot_number_of_paths <- function(n_paths) {
  ggplot(n_paths, aes(x = n_topics)) +
    geom_line(aes(y = n_topics), col = "gray80", size = 3) +
    geom_line(aes(y = n_paths)) +
    geom_point(aes(y = n_paths, col = n_paths < n_topics),
               size = 3) +
    guides(col = "none") +
    theme_minimal() +
    scale_x_continuous(
      breaks = n_paths$n_topics, labels = n_paths$m,
      minor_breaks = NULL) +
    scale_y_continuous(
      breaks = n_paths$n_topics,
      minor_breaks = NULL) +
    labs(x = "models", y = "# of paths")
}
lasy/alto documentation built on June 23, 2024, 6:45 a.m.