R/s3_transfer_bulk.R
In cloudfs: Streamlined Interface to Interact with Cloud Storage Platforms

Documented in cloud_s3_download_bulk cloud_s3_prep_bulk cloud_s3_read_bulk cloud_s3_upload_bulk cloud_s3_write_bulk

#' @title Prepare S3 content dataframe to be used by bulk download/read
#'   functions
#' 
#' @description `cloud_s3_ls` returns a dataframe of contents of an S3 folder.
#'   This dataframe has `name` and `type` columns. `name` may be either full
#'   names or short names (depending on `full_names` parameter of `cloud_s3_ls`),
#'   but `names(name)` will always contain full names. This function:
#'   1. filters out folders
#'   2. extracts `names(name)` into `path` column.
#'   3. informs about the size of files that are to be downloaded/read and asks
#'     for confirmation
#'   
#' @param content (data.frame) Output of `cloud_s3_ls()`
#' @param what What will be done with content, either "read" or "download".
#'   This affects only how messages will look.
#' @param safe_size What is considered to be safe size in bytes to download in
#'   bulk. To show additional caution message in case if you accidentally run
#'   bulk reading on a folder with gigabytes of data.
#' @param quiet All caution messages may be turned off by setting this parameter
#'   to `TRUE`.
#'   
#' @return Transformed `content` dataframe.
#'   
#' @keywords internal
cloud_s3_prep_bulk <- function(content, what = c("read", "upload", "download"),
                             safe_size = 5e7, quiet = FALSE) {
  
  check_class(content, "data.frame")
  stopifnot(all(c("name", "type", "size_b") %in% names(content)))
  check_class(content$name, "character")
  check_class(content$type, "character")
  check_numeric(content$size_b)
  check_bool(quiet)
  what <- rlang::arg_match(what)
  cont <- 
    content %>% 
    filter(.data$type != "folder", !is.na(.data$type)) %>% 
    mutate(path = names(.data$name)) %>% 
    mutate(size_to_print = sapply(
      .data$size_b,
      function(x) format(structure(x, class = "object_size"), units = "auto")
    )) %>% 
    mutate(label = glue::glue("{path} ({size_to_print})")) %>% 
    relocate("name", "path")
  if (nrow(cont) == 0) cli::cli_abort("Nothing to {what}.")
  cli::cli_text("Attempting to {what} the following files:")
  cli::cli_text()
  cli::cli_ul()
  for (i in 1:nrow(cont)) {
    cli::cli_li("{.path {cont$path[[i]]}} ({cont$size_to_print[[i]]})")
  }
  cli::cli_end()
  total_size <- structure(sum(cont$size_b), class = "object_size")
  safe_size <- structure(safe_size, class = "object_size")
  cli::cli_text()
  cli::cli_text(
    "... with total size of {.field {format(total_size, units = 'auto')}}"
  )
  cli::cli_text()
  
  
  if (!quiet) {
    if (total_size > safe_size) {
      cli::cli_warn("This is quite a lot.")
      yeah <- cli_yeah("Do you really wish to continue?")
    } else {
      yeah <- cli_yeah("Do you wish to continue?", straight = TRUE)
    } 
    if (!yeah) cli::cli_abort("Aborting.")
  }
  cont
}

#' @title Bulk Upload Files to S3
#' 
#' @description This function facilitates the bulk uploading of multiple files
#'   from the local project folder to the project's designated S3 folder. By
#'   using [cloud_local_ls], you can obtain a dataframe detailing the contents
#'   of the local folder. Applying `cloud_s3_upload_bulk` to this dataframe
#'   allows you to upload all listed files to S3.
#' 
#' @inheritParams cloud_s3_upload
#' @inheritParams cloud_s3_prep_bulk
#' 
#' @return Invisibly returns the input `content` dataframe.
#' 
#' @examplesIf interactive()
#' # create toy plots: 2 png's and 1 jpeg
#' dir.create("toy_plots")
#' png("toy_plots/plot1.png"); plot(rnorm(100)); dev.off()
#' png("toy_plots/plot2.png"); plot(hist(rnorm(100))); dev.off()
#' png("toy_plots/plot3.jpeg"); plot(hclust(dist(USArrests), "ave")); dev.off()
#' 
#' # upload only the two png's
#' cloud_local_ls("toy_plots")  |> 
#'   dplyr::filter(type == "png")  |> 
#'   cloud_s3_upload_bulk()
#' 
#' # clean up
#' unlink("toy_plots", recursive = TRUE)
#'   
#' @export
cloud_s3_upload_bulk <- function(content, quiet = FALSE, root = NULL) {
  check_string(root, alt_null = TRUE)
  if (is.null(root)) root <- cloud_s3_get_root()
  
  cont <- cloud_s3_prep_bulk(content, what = "upload", quiet = quiet)
  n <- nrow(cont)
  cli::cli_progress_bar(
    format = "Uploading {cli::pb_bar} [{cli::pb_current}/{cli::pb_total}]",
    total = n
  )
  for (i in seq_along(cont$name)) {
    cli::cli_progress_update()
    cloud_s3_upload(cont$path[[i]], root = root)
  }
  cli::cli_alert_success("Done!")
  invisible(content)
}

#' @title Bulk Download Contents from S3
#' 
#' @description Downloads multiple files from an S3 folder based on the output 
#'   dataframe from [cloud_s3_ls]. This function streamlines the process of 
#'   downloading multiple files by allowing you to filter and select specific 
#'   files from the S3 listing and then download them in bulk.
#' 
#' @inheritParams cloud_s3_download
#' @inheritParams cloud_s3_prep_bulk
#' 
#' @return Invisibly returns the input `content` dataframe.
#' 
#' @examplesIf interactive()
#' # provided there's a folder called "toy_data" in the root of your project's
#' # S3 folder, and this folder contains "csv" files
#' cloud_s3_ls("toy_data") |> 
#'   filter(type == "csv") |> 
#'   cloud_s3_download_bulk()
#'   
#' # clean up
#' unlink("toy_data", recursive = TRUE)
#'   
#' @export
cloud_s3_download_bulk <- function(content, quiet = FALSE, root = NULL) {
  check_string(root, alt_null = TRUE)
  if (is.null(root)) root <- cloud_s3_get_root()
  
  cont <- cloud_s3_prep_bulk(content, what = "download", quiet = quiet)
  n <- nrow(cont)
  cli::cli_progress_bar(
    format = "Downloading {cli::pb_bar} [{cli::pb_current}/{cli::pb_total}]",
    total = n
  )
  for (i in seq_along(cont$name)) {
    cli::cli_progress_update()
    cloud_s3_download(cont$path[[i]], root = root)
  }
  cli::cli_alert_success("Done!")
  invisible(content)
}

#' @title Write multiple objects to S3 in bulk
#'
#' @description This function allows for the bulk writing of multiple R objects
#'   to the project's designated S3 folder. To prepare a list of objects for
#'   writing, use [cloud_object_ls], which generates a dataframe listing the
#'   objects and their intended destinations in a format akin to the output of
#'   [cloud_s3_ls]. By default, the function determines the appropriate writing
#'   method based on each file's extension. However, if a specific writing
#'   function is provided via the `fun` parameter, it will be applied to all
#'   files, which may not be ideal if dealing with a variety of file types.
#' 
#' @inheritParams cloud_s3_write  
#' @inheritParams cloud_object_prep_bulk
#'
#' @return Invisibly returns the input `content` dataframe.
#' 
#' @examplesIf interactive() 
#' # write two csv files: data/df_mtcars.csv and data/df_iris.csv
#' cloud_object_ls(
#'   dplyr::lst(mtcars = mtcars, iris = iris),
#'   path = "data",
#'   extension = "csv",
#'   prefix = "df_"
#' ) |>  
#' cloud_s3_write_bulk()
#'   
#' @export
cloud_s3_write_bulk <- function(content, fun = NULL, ..., local = FALSE,
                                quiet = FALSE, root = NULL) {
  check_string(root, alt_null = TRUE)
  if (is.null(root)) root <- cloud_s3_get_root()
  
  cont <- cloud_object_prep_bulk(content, quiet = quiet)
  n <- nrow(cont)
  cli::cli_progress_bar(
    format = "Writing {cli::pb_bar} [{cli::pb_current}/{cli::pb_total}]",
    total = n
  )
  for (i in seq_along(cont$name)) {
    cli::cli_progress_update()
    cloud_s3_write(
      x = cont$object[[i]],
      file = cont$path[[i]],
      fun = fun, ...,
      local = local,
      root = root
    )
  }
  cli::cli_alert_success("Done!")
  invisible(content)
}

#' @title Bulk Read Contents from S3
#' 
#' @description This function facilitates the bulk reading of multiple files
#'   from the project's designated S3 folder. By using [cloud_s3_ls], you can
#'   obtain a dataframe detailing the contents of the S3 folder. Applying
#'   `cloud_s3_read_bulk` to this dataframe allows you to read all listed files
#'   into a named list. The function will, by default, infer the appropriate
#'   reading method based on each file's extension. However, if a specific
#'   reading function is provided via the `fun` parameter, it will be applied
#'   uniformly to all files, which may not be suitable for diverse file types.
#' 
#' @inheritParams cloud_s3_read  
#' @inheritParams cloud_s3_prep_bulk
#' 
#' @return A named list where each element corresponds to the content of a file
#'   from S3. The names of the list elements are derived from the file names.
#' 
#' @examplesIf interactive() 
#' # provided there's a folder called "data" in the root of the project's main
#' # S3 folder, and it contains csv files
#' data_lst <- 
#'   cloud_s3_ls("data") |>  
#'   filter(type == "csv")  |>  
#'   cloud_s3_read_bulk()
#'   
#' @export
cloud_s3_read_bulk <- function(content, fun = NULL, ..., quiet = FALSE,
                             root = NULL) {
  check_string(root, alt_null = TRUE)
  if (is.null(root)) root <- cloud_s3_get_root()
  
  cont <- cloud_s3_prep_bulk(content, what = "read", quiet = quiet)
  n <- nrow(cont)
  res <- list()
  cli::cli_progress_bar(
    format = "Reading {cli::pb_bar} [{cli::pb_current}/{cli::pb_total}]",
    total = n
  )
  for (i in seq_along(cont$name)) {
    cli::cli_progress_update()
    res[[cont$name[[i]]]] <- 
      cloud_s3_read(cont$path[[i]], fun, ..., root = root)
  }
  cli::cli_alert_success("Done!")
  res
}

Any scripts or data that you put into this service are public.

cloudfs documentation built on May 29, 2024, 11:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

cloudfs
Streamlined Interface to Interact with Cloud Storage Platforms

R/s3_transfer_bulk.R
In cloudfs: Streamlined Interface to Interact with Cloud Storage Platforms

Defines functions cloud_s3_read_bulk cloud_s3_write_bulk cloud_s3_download_bulk cloud_s3_upload_bulk cloud_s3_prep_bulk

Documented in cloud_s3_download_bulk cloud_s3_prep_bulk cloud_s3_read_bulk cloud_s3_upload_bulk cloud_s3_write_bulk

Try the cloudfs package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cloudfs Streamlined Interface to Interact with Cloud Storage Platforms

R/s3_transfer_bulk.R In cloudfs: Streamlined Interface to Interact with Cloud Storage Platforms

Defines functions cloud_s3_read_bulk cloud_s3_write_bulk cloud_s3_download_bulk cloud_s3_upload_bulk cloud_s3_prep_bulk

Documented in cloud_s3_download_bulk cloud_s3_prep_bulk cloud_s3_read_bulk cloud_s3_upload_bulk cloud_s3_write_bulk

Try the cloudfs package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

cloudfs
Streamlined Interface to Interact with Cloud Storage Platforms

R/s3_transfer_bulk.R
In cloudfs: Streamlined Interface to Interact with Cloud Storage Platforms