R/gutenberg_mirrors.R
In gutenbergr: Download and Process Public Domain Works from Project Gutenberg

Documented in gutenberg_get_all_mirrors gutenberg_get_mirror is_working_gutenberg_mirror

#' Get the recommended mirror for Gutenberg files
#'
#' Get the recommended mirror for Gutenberg files and set the global
#' `gutenberg_mirror` options.
#'
#' @param verbose Whether to show messages about the Project Gutenberg mirror
#'   that was chosen
#'
#' @return A character vector with the url for the chosen mirror.
#'
#' @examplesIf interactive()
#'
#' gutenberg_get_mirror()
#'
#' @export
gutenberg_get_mirror <- function(verbose = TRUE) {
  mirror <- getOption("gutenberg_mirror")
  if (!is.null(mirror)) {
    if (is_working_gutenberg_mirror(mirror)) {
      return(mirror)
    } else {
      maybe_message(
        verbose,
        paste0(
          "Mirror {mirror} set by options(gutenberg_mirror = {mirror}) is not ",
          "accessible. It may not be a Gutenberg mirror or may longer be ",
          "maintained. Checking for new mirror."
        ),
        class = "mirror-refresh"
      )
    }
  }

  # figure out the mirror for this location
  harvest_base_url <- "https://www.gutenberg.org/robot/harvest"
  maybe_message(
    verbose,
    "Determining mirror for Project Gutenberg from {harvest_base_url}.",
    class = "mirror-finding"
  )
  harvest_url <- glue::glue("{harvest_base_url}?filetypes[]=txt")
  lines <- read_url(harvest_url)
  a <- stringr::str_subset(lines, stringr::fixed("<a href="))[1]
  mirror_full_url <- stringr::str_match(a, "href=\"(.*?)\"")[2]

  # parse and leave out the path
  parsed <- urltools::url_parse(mirror_full_url)
  mirror <- unclass(glue::glue_data(parsed, "{scheme}://{domain}"))
  maybe_message(
    verbose,
    "Using mirror {mirror}.",
    class = "mirror-found"
  )

  # set option for next time
  options(gutenberg_mirror = mirror)
  return(mirror)
}


#' Get all mirror data from Project Gutenberg
#'
#' Get all mirror data from \url{https://www.gutenberg.org/MIRRORS.ALL}. This
#' only includes mirrors reported to Project Gutenberg and verified to be
#' relatively stable. For more information on mirroring and getting your own
#' mirror listed, see \url{https://www.gutenberg.org/help/mirroring.html}.
#'
#' @return A tbl_df of Project Gutenberg mirrors and related data
#' \describe{
#'
#'   \item{continent}{Continent where the mirror is located}
#'
#'   \item{nation}{Nation where the mirror is located}
#'
#'   \item{location}{Location of the mirror}
#'
#'   \item{provider}{Provider of the mirror}
#'
#'   \item{url}{URL of the mirror}
#'
#'   \item{note}{Special notes}
#' }
#' @examplesIf interactive()
#'
#' gutenberg_get_all_mirrors()
#'
#' @export
gutenberg_get_all_mirrors <- function() {
  mirrors_url <- "https://www.gutenberg.org/MIRRORS.ALL"
  mirrors <- purrr::quietly(read_md_table)(
    mirrors_url,
    warn = FALSE,
    force = TRUE,
    show_col_types = FALSE
  )
  if (length(mirrors$warnings) && !(
    length(mirrors$warnings) == 1 &&
    all(stringr::str_detect(mirrors$warnings, "One or more parsing issues"))
  )) {
    cli::cli_abort(
      "Unexpected warning in {.code read_md_table()}.",
      class = "gutenbergr-error-mirror_table_reading"
    )
  }
  mirrors <-  dplyr::slice(mirrors$result, 2:(dplyr::n() - 1))

  return(mirrors)
}

#' Check if an http(s) or ftp(s) `url` resolves to a working Gutenberg mirror.
#'
#' Checks for a root level `README` file at `url` with reference to
#' `GUTINDEX.ALL`. If this exists, `url` is most likely a working
#' Gutenberg mirror.
#'
#' @return Boolean: whether the `url` resolves to a mirror.
#'
#' @keywords internal
is_working_gutenberg_mirror <- function(url) {
  base_url <- sub("/+$", "", url)
  readme_url <- paste0(base_url, "/README")
  readme <- read_url(readme_url)
  contains_pg_string <- any(grepl("GUTINDEX.ALL", readme))
  contains_pg_string
}

Any scripts or data that you put into this service are public.

gutenbergr documentation built on June 8, 2025, 11:26 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

gutenbergr
Download and Process Public Domain Works from Project Gutenberg

R/gutenberg_mirrors.R
In gutenbergr: Download and Process Public Domain Works from Project Gutenberg

Defines functions is_working_gutenberg_mirror gutenberg_get_all_mirrors gutenberg_get_mirror

Documented in gutenberg_get_all_mirrors gutenberg_get_mirror is_working_gutenberg_mirror

Try the gutenbergr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gutenbergr Download and Process Public Domain Works from Project Gutenberg

R/gutenberg_mirrors.R In gutenbergr: Download and Process Public Domain Works from Project Gutenberg

Defines functions is_working_gutenberg_mirror gutenberg_get_all_mirrors gutenberg_get_mirror

Documented in gutenberg_get_all_mirrors gutenberg_get_mirror is_working_gutenberg_mirror

Try the gutenbergr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gutenbergr
Download and Process Public Domain Works from Project Gutenberg

R/gutenberg_mirrors.R
In gutenbergr: Download and Process Public Domain Works from Project Gutenberg