R/log_analysis.R

Defines functions extract_user_proxy_mapping read_log_file analyze_logs parse_log_filename

Documented in analyze_logs extract_user_proxy_mapping parse_log_filename read_log_file

#' Parse ShinyProxy Log File Name
#'
#' This function parses the name of a log file generated by ShinyProxy and extracts metadata:
#' `specId`, `proxyId`, `startupTime`, and `logType`.
#'
#' @param filename Character. The full path to the log file.
#' @return A data frame with the columns `specId`, `proxyId`, `startupTime`, and `logType`.
#' @examples
#' \dontrun{
#'   parse_log_filename(
#'   "path/to/containersLogs/log_file_(stdout|stderr).log"
#'   )
#' }
#' @export
#' @importFrom stringr str_match
parse_log_filename <- function(filename) {
  base_name <- basename(filename)

  # Regex pattern to capture:
  # - specId: can include underscores (non-greedy match)
  # - proxyId: a UUID (e.g., 220c8b25-691d-4922-8c55-3d69bdecb7a0)
  # - startupTime: a timestamp in the format 31_Jan_2025_04_02_35
  # - logType: either "stdout" or "stderr"
  pattern <- "^(.*?)_([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})_([0-9]{1,2}_[A-Za-z]{3}_[0-9]{4}_[0-9]{2}_[0-9]{2}_[0-9]{2})_(stdout|stderr)\\.log$"

  matches <- stringr::str_match(base_name, pattern)

  if (is.na(matches[1, 1])) {
    stop(sprintf("The file name '%s' does not match the expected pattern.", filename))
  }

  data.frame(
    specId      = matches[1, 2],
    proxyId     = matches[1, 3],
    startupTime = matches[1, 4],
    logType     = matches[1, 5],
    stringsAsFactors = FALSE
  )
}

#' Analyze ShinyProxy Log Files in a Directory
#'
#' This function scans a specified directory, extracts metadata from the log file names,
#' reads their content, and computes some statistics (total number of lines and the number of lines
#' containing "error" or "exception"). Additionally, it retrieves ShinyProxy user information from a
#' separate directory of logs.
#'
#' @param path_container_logs Character. The path to the directory containing the container logs files.
#' @param path_shinylogs Character. The path to the directory containing ShinyProxy identity logs.
#' @return A list containing two data frames:
#' \describe{
#'   \item{file_info}{Detailed information for each file.}
#'   \item{summary}{Summary information grouped by container (based on `specId`, `proxyId`, and `startupTime`).}
#' }
#' @export
#' @importFrom dplyr rowwise mutate ungroup select group_by summarise left_join
#' @importFrom purrr map_df
#' @importFrom stringr str_detect regex
#' @examples
#' \dontrun{
#'   result <- analyze_logs("path/to/containersLogs", "path/to/shinylogs")
#'   print(result$summary)
#' }
analyze_logs <- function(path_container_logs, path_shinylogs = NULL) {
  # List all files in the path_container_logs with full paths and filter for log files
  files <- list.files(path_container_logs, full.names = TRUE)
  files <- files[stringr::str_detect(files, "_(stdout|stderr)\\.log$")]

  # Extract metadata for each file
  file_info <- purrr::map_df(files, function(file) {
    info <- parse_log_filename(file)
    info$filePath <- file  # store the full path for reading file content later
    info
  })

  # Define regex patterns (case-insensitive) for different log categories:
  pattern_error   <- stringr::regex("error|exception", ignore_case = TRUE)
  pattern_warning <- stringr::regex("warning", ignore_case = TRUE)
  pattern_fatal   <- stringr::regex("fatal", ignore_case = TRUE)
  pattern_info    <- stringr::regex("info|debug|started|listening|connected|released", ignore_case = TRUE)

  # Read file contents and compute statistics
  file_info <- file_info |>
    dplyr::rowwise() |>
    dplyr::mutate(
      content         = list(readLines(filePath, warn = FALSE)),
      numLines        = length(content),
      numErrorLines   = sum(stringr::str_detect(content, pattern_error)),
      numWarningLines = sum(stringr::str_detect(content, pattern_warning)),
      numFatalLines   = sum(stringr::str_detect(content, pattern_fatal)),
      numInfoLines    = sum(stringr::str_detect(content, pattern_info))
    ) |>
    dplyr::ungroup() |>
    dplyr::select(-content)

  # Summarize statistics by container (grouping by specId, proxyId, and startupTime)
  summary_df <- file_info |>
    dplyr::group_by(specId, proxyId, startupTime) |>
    dplyr::summarise(
      stdout_lines      = sum(numLines[logType == "stdout"], na.rm = TRUE),
      stderr_lines      = sum(numLines[logType == "stderr"], na.rm = TRUE),
      total_error_lines = sum(numErrorLines, na.rm = TRUE),
      total_warning_lines = sum(numWarningLines, na.rm = TRUE),
      total_fatal_lines = sum(numFatalLines, na.rm = TRUE),
      total_info_lines  = sum(numInfoLines, na.rm = TRUE),
      .groups = "drop"
    )


  # If path_shinylogs is provided, retrieve ShinyProxy users and associated containers
  if (!is.null(path_shinylogs)) {
    files_identity <- list.files(path_shinylogs, full.names = TRUE)
    files_identity <- files_identity[stringr::str_detect(files_identity, "\\.(log|gz)$")]

    user_proxy_mapping <- purrr::map_df(files_identity, function(file) {
      log_lines <- read_log_file(file)
      extract_user_proxy_mapping(log_lines)
    })

    user_proxy_mapping <- unique(user_proxy_mapping)

    summary_df <- summary_df |>
      dplyr::left_join(user_proxy_mapping, by = c('proxyId', 'specId')) |>
      dplyr::select(specId, proxyId, user, startupTime, stdout_lines, stderr_lines, total_error_lines, total_warning_lines, total_fatal_lines, total_info_lines)
  }


  list(file_info = file_info, summary = summary_df)
}

#' Read a Log File
#'
#' Internal function to read a log file, supporting both plain text and gzipped formats.
#'
#' @param file Character. The file path to read.
#' @return A character vector of log lines.
#' @keywords internal
read_log_file <- function(file) {
  if (grepl("\\.gz$", file)) {
    readLines(gzfile(file), warn = FALSE)
  } else {
    readLines(file, warn = FALSE)
  }
}


#' Extract User and Proxy Mappings
#'
#' Internal function to extract user and container mappings from log lines.
#'
#' @param log_lines A character vector containing log file lines.
#' @return A tibble with `user`, `proxyId`, and `specId`.
#' @keywords internal
#' @importFrom tibble tibble
extract_user_proxy_mapping <- function(log_lines) {
  pattern <- "\\[user=([^ ]+) proxyId=([^ ]+) specId=([^ ]+)\\]"
  matches <- stringr::str_match(log_lines, pattern)

  tibble::tibble(
    user    = matches[, 2],
    proxyId = matches[, 3],
    specId  = matches[, 4]
  ) |>
    dplyr::filter(!is.na(user))
}

Try the shinyproxyLogs package in your browser

Any scripts or data that you put into this service are public.

shinyproxyLogs documentation built on April 12, 2025, 1:48 a.m.