Nothing
utils::globalVariables(c(
"datatype", "expectedFile", "method", "needBuffer", "objName",
"rasterToMatch", "studyArea", "targetCRS", "to", "touches", "useSAcrs", "V1"
))
#' Download and optionally post-process files
#'
#' \if{html}{\figure{lifecycle-maturing.svg}{options: alt="maturing"}}
#'
#' This function can be used to prepare R objects from remote or local data sources.
#' The object of this function is to provide a reproducible version of
#' a series of commonly used steps for getting, loading, and processing data.
#' This function has two stages: Getting data (download, extracting from archives,
#' loading into R) and post-processing (for `Spatial*` and `Raster*`
#' objects, this is crop, reproject, mask/intersect).
#' To trigger the first stage, provide `url` or `archive`.
#' To trigger the second stage, provide `studyArea` or `rasterToMatch`.
#' See examples.
#'
#' @note This function is still experimental: use with caution.
#'
#' @section Stage 1 - Getting data:
#'
#' See [preProcess()] for combinations of arguments.
#'
#' \enumerate{
#' \item Download from the web via either `googledrive::drive_download()`,
#' [utils::download.file()];
#' \item Extract from archive using [unzip()] or [untar()];
#' \item Load into R using `terra::rast`,
#' `sf::st_read`, or any other function passed in with `fun`;
#' \item Checksumming of all files during this process. This is put into a
#' \file{CHECKSUMS.txt} file in the `destinationPath`, appending if it is
#' already there, overwriting the entries for same files if entries already exist.
#' }
#'
#' @section Stage 2 - Post processing:
#'
#' This will be triggered if either `rasterToMatch` or `studyArea`
#' is supplied.
#'
#' \enumerate{
#' \item Fix errors. Currently only errors fixed are for `SpatialPolygons`
#' using `buffer(..., width = 0)`;
#' \item Crop using [cropTo()];
#' \item Project using [projectTo()];
#' \item Mask using [maskTo()];
#' \item Determine file name [determineFilename()] via `filename2`;
#' \item Optionally, write that file name to disk via [writeTo()].
#' }
#'
#' NOTE: checksumming does not occur during the post-processing stage, as
#' there are no file downloads. To achieve fast results, wrap
#' `prepInputs` with `Cache`.
#'
#' NOTE: `sf` objects are still very experimental.
#'
#' \subsection{postProcessing of `Spat*`, `sf`, `Raster*` and `Spatial*` objects:}{
#'
#' The following has been DEPRECATED because there are a sufficient number of
#' ambiguities that this has been changed in favour of `from` and the `*to` family.
#' See [postProcessTo()].
#'
#' DEPRECATED: If `rasterToMatch` or `studyArea` are used, then this will
#' trigger several subsequent functions, specifically the sequence,
#' *Crop, reproject, mask*, which appears to be a common sequence while
#' preparing spatial data from diverse sources.
#' See [postProcess()] documentation section on
#' *Backwards compatibility with `rasterToMatch` and/or `studyArea` arguments*
#' to understand various combinations of `rasterToMatch` and/or `studyArea`.
#' }
#'
#'
#' @section `fun`:
#'
#' `fun` offers the ability to pass any custom function with which to load
#' the file obtained by `preProcess` into the session. There are two cases that are
#' dealt with: when the `preProcess` downloads a file (including via `dlFun`),
#' `fun` must deal with a file; and, when `preProcess` creates an R object
#' (e.g., raster::getData returns an object), `fun` must deal with an object.
#'
#' `fun` can be supplied in three ways: a function, a character string
#' (i.e., a function name as a string), or a quoted expression.
#' If a character string or function, is should have the package name e.g.,
#' `"terra::rast"` or as an actual function, e.g., `base::readRDS`.
#' In these cases, it will evaluate this function call while passing `targetFile`
#' as the first argument. These will only work in the simplest of cases.
#'
#' When more precision is required, the full call can be written, surrounded by
#' `quote`, and where the filename can be referred to as `targetFile` if the function
#' is loading a file. If `preProcess` returns an object, `fun` should be set to
#' `fun = NA`.
#'
#' If there is a custom function call, is not in a package, `prepInputs` may not find it. In such
#' cases, simply pass the function as a named argument (with same name as function) to `prepInputs`.
#' See examples.
#' NOTE: passing `fun = NA` will skip loading object into R. Note this will essentially
#' replicate the functionality of simply calling `preProcess` directly.
#'
#' @section `purge`:
#'
#' In options for control of purging the `CHECKSUMS.txt` file are:
#'
#' \describe{
#' \item{`0`}{keep file}
#' \item{`1`}{delete file in `destinationPath`, all records of downloads need to be rebuilt}
#' \item{`2`}{delete entry with same `targetFile`}
#' \item{`4`}{delete entry with same `alsoExtract`}
#' \item{`3`}{delete entry with same `archive`}
#' \item{`5`}{delete entry with same `targetFile` & `alsoExtract`}
#' \item{`6`}{delete entry with same `targetFile`, `alsoExtract` & `archive`}
#' \item{`7`}{delete entry that same `targetFile`, `alsoExtract` & `archive` & `url`}
#' }
#' will only remove entries in the `CHECKSUMS.txt` that are associated with
#' `targetFile`, `alsoExtract` or `archive` When `prepInputs` is called,
#' it will write or append to a (if already exists) `CHECKSUMS.txt` file.
#' If the `CHECKSUMS.txt` is not correct, use this argument to remove it.
#'
#' @param targetFile Character string giving the filename (without relative or
#' absolute path) to the eventual file
#' (raster, shapefile, csv, etc.) after downloading and extracting from a zip
#' or tar archive. This is the file *before* it is passed to
#' `postProcess`. The internal checksumming does not checksum
#' the file after it is `postProcess`ed (e.g., cropped/reprojected/masked).
#' Using `Cache` around `prepInputs` will do a sufficient job in these cases.
#' See table in [preProcess()].
#'
#' @param archive Optional character string giving the path of an archive
#' containing `targetFile`, or a vector giving a set of nested archives
#' (e.g., `c("xxx.tar", "inner.zip", "inner.rar")`). If there is/are (an) inner
#' archive(s), but they are unknown, the function will try all until it finds
#' the `targetFile`. See table in [preProcess()]. If it is `NA`,
#' then it will *not* attempt to see it as an archive, even if it has archive-like
#' file extension (e.g., `.zip`). This may be useful when an R function
#' is expecting an archive directly.
#'
#' @param url Optional character string indicating the URL to download from.
#' If not specified, then no download will be attempted. If not entry
#' exists in the `CHECKSUMS.txt` (in `destinationPath`), an entry
#' will be created or appended to. This `CHECKSUMS.txt` entry will be used
#' in subsequent calls to
#' `prepInputs` or `preProcess`, comparing the file on hand with the ad hoc
#' `CHECKSUMS.txt`. See table in [preProcess()].
#'
#' @param alsoExtract Optional character string naming files other than
#' `targetFile` that must be extracted from the `archive`. If
#' `NULL`, the default, then it will extract all files. Other options:
#' `"similar"` will extract all files with the same filename without
#' file extension as `targetFile`. `NA` will extract nothing other
#' than `targetFile`. A character string of specific file names will cause
#' only those to be extracted. See table in [preProcess()].
#'
#' @param destinationPath Character string of a directory in which to download
#' and save the file that comes from `url` and is also where the function
#' will look for `archive` or `targetFile`. NOTE (still experimental):
#' To prevent repeated downloads in different locations, the user can also set
#' `options("reproducible.inputPaths")` to one or more local file paths to
#' search for the file before attempting to download. Default for that option is
#' `NULL` meaning do not search locally.
#'
#' @param fun Optional. If specified, this will attempt to load whatever
#' file was downloaded during `preProcess` via `dlFun`. This can be either a
#' function (e.g., sf::st_read), character string (e.g., "base::load"),
#' NA (for no loading, useful if `dlFun` already loaded the file) or
#' if extra arguments are required
#' in the function call, it must be a quoted call naming
#' `targetFile` (e.g., `quote(sf::st_read(targetFile, quiet = TRUE))`)
#' as the file path to the file to load. See details and examples below.
#'
#' @param quick Logical. This is passed internally to [Checksums()]
#' (the quickCheck argument), and to
#' [Cache()] (the quick argument). This results in faster, though
#' less robust checking of inputs. See the respective functions.
#'
#' @param purge Logical or Integer. `0/FALSE` (default) keeps existing
#' `CHECKSUMS.txt` file and
#' `prepInputs` will write or append to it. `1/TRUE` will deleted the entire
#' `CHECKSUMS.txt` file. Other options, see details.
#'
#' @param overwrite Logical. Should downloading and all the other actions occur
#' even if they pass the checksums or the files are all there.
#'
#' @param ... Additional arguments passed to
#' [postProcess()] and [reproducible::Cache()].
#' Since `...` is passed to [postProcess()], these will
#' `...` will also be passed into the inner
#' functions, e.g., [cropInputs()]. Possibly useful other arguments include
#' `dlFun` which is passed to `preProcess`. See details and examples.
#'
#' @param useCache Passed to `Cache` in various places.
#' Defaults to `getOption("reproducible.useCache", 2L)` in `prepInputs`, and
#' `getOption("reproducible.useCache", FALSE)` if calling any of the inner
#' functions manually. For `prepInputs`, this mean it will use `Cache`
#' only up to 2 nested levels, which will generally including `postProcess` and
#' the first level of `*Input` functions, e.g., `cropInputs`, `projectInputs`,
#' `maskInputs`, but not `fixErrors`.
#'
#' @param .tempPath Optional temporary path for internal file intermediate steps.
#' Will be cleared on.exit from this function.
#'
#' @inheritParams Cache
#' @author Eliot McIntire, Jean Marchal, and Tati Micheletti
#' @export
#' @return
#' This is an omnibus function that will return an R object that will have resulted from
#' the running of [preProcess()] and [postProcess()] or [postProcessTo()]. Thus,
#' if it is a GIS object, it may have been cropped, reprojected, "fixed", masked, and
#' written to disk.
#'
#' @importFrom data.table data.table
#' @importFrom digest digest
#' @importFrom methods is
#' @importFrom utils methods modifyList
#' @include checksums.R download.R postProcess.R
#' @rdname prepInputs
#' @seealso [postProcessTo()], [downloadFile()], [extractFromArchive()],
#' [postProcess()].
#' @examples
#' \donttest{
#' if (requireNamespace("terra", quietly = TRUE) &&
#' requireNamespace("sf", quietly = TRUE)) {
#' library(reproducible)
#' # Make a dummy study area map -- user would supply this normally
#' coords <- structure(c(-122.9, -116.1, -99.2, -106, -122.9, 59.9, 65.7, 63.6, 54.8, 59.9),
#' .Dim = c(5L, 2L)
#' )
#' studyArea <- terra::vect(coords, "polygons")
#' terra::crs(studyArea) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#' # Make dummy "large" map that must be cropped to the study area
#' outerSA <- terra::buffer(studyArea, 50000)
#' terra::crs(outerSA) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#' tf <- normPath(file.path(tempdir2("prepInputsEx"), "prepInputs2.shp"))
#' terra::writeVector(outerSA, tf, overwrite = TRUE)
#'
#' # run prepInputs -- load file, postProcess it to the studyArea
#'
#' studyArea2 <- prepInputs(
#' targetFile = tf, to = studyArea,
#' fun = "terra::vect",
#' destinationPath = tempdir2()
#' ) |>
#' suppressWarnings() # not relevant warning here
#'
#' # clean up
#' unlink("CHECKSUMS.txt")
#'
#' ##########################################
#' # Remote file using `url`
#' ##########################################
#' if (internetExists()) {
#' data.table::setDTthreads(2)
#' origDir <- getwd()
#' # download a zip file from internet, unzip all files, load as shapefile, Cache the call
#' # First time: don't know all files - prepInputs will guess, if download file is an archive,
#' # then extract all files, then if there is a .shp, it will load with sf::st_read
#' dPath <- file.path(tempdir(), "ecozones")
#' shpUrl <- "http://sis.agr.gc.ca/cansis/nsdb/ecostrat/zone/ecozone_shp.zip"
#'
#' # Wrapped in a try because this particular url can be flaky
#' shpEcozone <- try(prepInputs(
#' destinationPath = dPath,
#' url = shpUrl
#' ))
#' if (!is(shpEcozone, "try-error")) {
#' # Robust to partial file deletions:
#' unlink(dir(dPath, full.names = TRUE)[1:3])
#' shpEcozone <- prepInputs(
#' destinationPath = dPath,
#' url = shpUrl
#' )
#' unlink(dPath, recursive = TRUE)
#'
#' # Once this is done, can be more precise in operational code:
#' # specify targetFile, alsoExtract, and fun, wrap with Cache
#' ecozoneFilename <- file.path(dPath, "ecozones.shp")
#' ecozoneFiles <- c(
#' "ecozones.dbf", "ecozones.prj",
#' "ecozones.sbn", "ecozones.sbx", "ecozones.shp", "ecozones.shx"
#' )
#' shpEcozone <- prepInputs(
#' targetFile = ecozoneFilename,
#' url = shpUrl,
#' fun = "terra::vect",
#' alsoExtract = ecozoneFiles,
#' destinationPath = dPath
#' )
#' unlink(dPath, recursive = TRUE)
#'
#' # Add a study area to Crop and Mask to
#' # Create a "study area"
#' coords <- structure(c(-122.98, -116.1, -99.2, -106, -122.98, 59.9, 65.73, 63.58, 54.79, 59.9),
#' .Dim = c(5L, 2L)
#' )
#' studyArea <- terra::vect(coords, "polygons")
#' terra::crs(studyArea) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
#'
#' # specify targetFile, alsoExtract, and fun, wrap with Cache
#' ecozoneFilename <- file.path(dPath, "ecozones.shp")
#' # Note, you don't need to "alsoExtract" the archive... if the archive is not there, but the
#' # targetFile is there, it will not redownload the archive.
#' ecozoneFiles <- c(
#' "ecozones.dbf", "ecozones.prj",
#' "ecozones.sbn", "ecozones.sbx", "ecozones.shp", "ecozones.shx"
#' )
#' shpEcozoneSm <- Cache(prepInputs,
#' url = shpUrl,
#' targetFile = reproducible::asPath(ecozoneFilename),
#' alsoExtract = reproducible::asPath(ecozoneFiles),
#' studyArea = studyArea,
#' fun = "terra::vect",
#' destinationPath = dPath,
#' filename2 = "EcozoneFile.shp"
#' ) # passed to determineFilename
#'
#' terra::plot(shpEcozone[, 1])
#' terra::plot(shpEcozoneSm[, 1], add = TRUE, col = "red")
#' unlink(dPath)
#' }
#' }
#' }
#' }
#'
#' ## Using quoted dlFun and fun -- this is not intended to be run but used as a template
#' ## prepInputs(..., fun = quote(customFun(x = targetFile)), customFun = customFun)
#' ## # or more complex
#' ## test5 <- prepInputs(
#' ## targetFile = targetFileLuxRDS,
#' ## dlFun = quote({
#' ## getDataFn(name = "GADM", country = "LUX", level = 0) # preProcess keeps file from this!
#' ## }),
#' ## fun = quote({
#' ## out <- readRDS(targetFile)
#' ## sf::st_as_sf(out)})
#' ## )
prepInputs <- function(targetFile = NULL, url = NULL, archive = NULL, alsoExtract = NULL,
destinationPath = getOption("reproducible.destinationPath", "."),
fun = NULL,
quick = getOption("reproducible.quick"),
overwrite = getOption("reproducible.overwrite", FALSE),
purge = FALSE,
useCache = getOption("reproducible.useCache", 2),
.tempPath,
verbose = getOption("reproducible.verbose", 1),
...) {
# Download, Checksum, Extract from Archive
if (missing(.tempPath)) {
.tempPath <- tempdir2(rndstr(1, 6))
on.exit(
{
unlink(.tempPath, recursive = TRUE)
},
add = TRUE
)
}
funCaptured <- substitute(fun)
prepInputsAssertions(environment())
mess <- character(0)
##################################################################
# preProcess
##################################################################
messagePrepInputs("Running preProcess", verbose = verbose, verboseLevel = 0)
out <- preProcess(
targetFile = targetFile,
url = url,
archive = archive,
alsoExtract = alsoExtract,
destinationPath = destinationPath,
fun = fun,
quick = quick,
overwrite = overwrite,
purge = purge,
useCache = useCache,
.tempPath = .tempPath,
verbose = verbose,
...
)
##################################################################
# Load object to R
##################################################################
if (!is.null(out$targetFilePath)) {
messagePrepInputs("targetFile located at ", out$targetFilePath, verbose = verbose)
}
x <- process(out,
funCaptured = funCaptured,
useCache = useCache, verbose = verbose, ...
)
##################################################################
# postProcess
##################################################################
needPostProcess <- ...names() %in% c(
"studyArea", "rasterToMatch", "targetCRS", "to", "cropTo",
"maskTo", "projectTo", "fixErrorsIn", "useSAcrs", "writeTo"
)
if (any(needPostProcess)) {
TopoErrors <- list() # eventually to update a Google ID #TODO
x <- withCallingHandlers(
postProcessTo(from = x, ..., destinationPath = destinationPath, overwrite = overwrite),
message = function(m) {
hasTopoExcError <- grepl("TopologyException: Input geom 0 is invalid", m$message)
if (any(hasTopoExcError)) {
TopoErrors <<- append(TopoErrors, list(m$message))
}
}
)
}
return(x)
}
#' Extract files from archive
#'
#' Extract zip or tar archive files, possibly nested in other zip or tar archives.
#'
#' @param archive Character string giving the path of the archive
#' containing the `file` to be extracted. This path must exist or be `NULL`
#'
#' @param destinationPath Character string giving the path where `neededFiles` will be
#' extracted. Defaults to the archive directory.
#'
#' @param neededFiles Character string giving the name of the file(s) to be extracted.
#'
#' @param extractedArchives Used internally to track archives that have been extracted from.
#' @param filesExtracted Used internally to track files that have been extracted.
#' @param checkSums A checksums file, e.g., created by Checksums(..., write = TRUE)
#' @param needChecksums A numeric, with `0` indicating do not write a new checksums,
#' `1` write a new one,
#' `2` append new information to existing one.
#' @param checkSumFilePath The full path to the checksum.txt file
#' @param quick Passed to `Checksums`
#' @param ... Passed to `unzip` or `untar`, e.g., `overwrite`
#' @inheritParams prepInputs
#'
#' @return A character vector listing the paths of the extracted archives.
#'
#' @author Jean Marchal and Eliot McIntire
#'
extractFromArchive <- function(archive,
destinationPath = getOption("reproducible.destinationPath", dirname(archive)),
neededFiles = NULL, extractedArchives = NULL, checkSums = NULL,
needChecksums = 0, filesExtracted = character(),
checkSumFilePath = character(), quick = FALSE,
verbose = getOption("reproducible.verbose", 1),
.tempPath, ...) {
if (!is.null(archive)) {
if (!(any(c(knownInternalArchiveExtensions, knownSystemArchiveExtensions) %in% fileExt(archive)))) {
stop(
"Archives of type ", fileExt(archive), " are not currently supported. ",
"Try extracting manually then placing extracted files in ", destinationPath
)
}
}
if (!is.null(archive) && !is.null(neededFiles)) {
neededFiles <- setdiff(neededFiles, archive)
}
# neededFiles <- setdiff(basename2(neededFiles), basename2(archive))
if (length(neededFiles) == 0) neededFiles <- NULL
result <- if (!is.null(neededFiles)) {
checkSums[checkSums$expectedFile %in% makeRelative(neededFiles, destinationPath), ]$result
} else {
"NotOK"
}
extractedObjs <- list(filesExtracted = character())
# needs to pass checkSums & have all neededFiles files
neededFilesRel <- makeRelative(neededFiles, destinationPath)
hasAllFiles <- if (NROW(checkSums)) {
all(neededFilesRel %in% checkSums$expectedFile) # need basename2 for comparison with checkSums
} else {
FALSE
}
if (!(all(compareNA(result, "OK")) && hasAllFiles)) {
if (!is.null(archive)) {
if (!file.exists(archive[1])) {
stop(
"No archive exists with filename: ", archive[1],
". Please pass an archive name to a path that exists"
)
}
args <- list(archive[1], exdir = destinationPath[1])
funWArgs <- .whichExtractFn(archive[1], args)
# need to deal with \\ vs. / and also needs to stay relative
filesInArchive <- makeRelative(.listFilesInArchive(archive), destinationPath)
if (is.null(neededFiles)) {
neededFiles <- filesInArchive
}
neededFiles <- checkRelative(neededFiles, absolutePrefix = destinationPath, filesInArchive)
neededFilesRel <- makeRelative(neededFiles, destinationPath) # neededFiles may have been changed
neededFiles <- makeAbsolute(neededFiles, destinationPath)
result <- if (NROW(checkSums)) {
checkSums[checkSums$expectedFile %in% neededFilesRel, ]$result
} else {
logical(0)
}
# need to re-Checksums because
checkSums <- .checkSumsUpdate(
destinationPath = destinationPath,
newFilesToCheck = neededFiles,
checkSums = checkSums,
checkSumFilePath = checkSumFilePath
)
# isOK will have "directories" so it will be longer than neededFiles
isOK <- if (!is.null(checkSums)) {
.compareChecksumsAndFilesAddDirs(checkSums, neededFiles, destinationPath)
} else {
FALSE
}
# recheck, now that we have the whole file list
if (!(all(isOK)) || NROW(result) == 0) {
# don't extract if we already have all files and they are fine
# use binary addition -- 1 is new file, 2 is append
if (needChecksums == 0) needChecksums <- 2
filesInArchiveAbs <- makeAbsolute(filesInArchive, destinationPath)
if (length(archive) > 1) {
filesExtracted <- c(
filesExtracted,
.callArchiveExtractFn(funWArgs$fun, funWArgs$args,
absolutePrefix = destinationPath,
files = basename2(archive[2]), .tempPath = .tempPath
)
)
# recursion, removing one archive
extractedObjs <- extractFromArchive(
archive[-1],
destinationPath = destinationPath,
neededFiles = neededFiles,
extractedArchives = extractedArchives,
checkSums = checkSums,
quick = quick,
needChecksums = needChecksums,
checkSumFilePath = checkSumFilePath,
filesExtracted = filesExtracted,
verbose = verbose,
.tempPath = .tempPath
)
} else if (any(neededFiles %in% filesInArchiveAbs) || is.null(neededFiles)) {
possibleFolders <- dir.exists(filesInArchive)
if (sum(possibleFolders)) {
filesInArchive <- setdiff(filesInArchive, possibleFolders)
}
neededFilesRel <- if (is.null(neededFiles)) {
NULL
} else {
if (!is.null(names(isOK))) {
names(isOK)[!isOK]
} else {
makeRelative(neededFiles[!isOK], destinationPath)
}
}
filesToExtractNow <- intersect(filesInArchive, neededFilesRel)
dt <- data.table(files = filesToExtractNow)
# extractingTheseFiles <- paste(filesToExtractNow, collapse = "\n")
# extractingTheseFiles <- paste(basename2(filesInArchive[basename2(filesInArchive) %in%
# neededFiles]), collapse = ", ")
# if (!any(nzchar(filesToExtractNow)))
# extractingTheseFiles <- paste0("all files: ",
# paste(filesInArchive, collapse = "\n"))
messagePrepInputs("From:\n", archive[1], " \n", "Extracting", verbose = verbose)
messageDF(dt, verbose = verbose, colour = getOption("reproducible.messageColourPrepInputs"))
filesExtracted <- c(
filesExtracted,
.callArchiveExtractFn(funWArgs$fun,
funWArgs$args,
absolutePrefix = destinationPath,
files = filesToExtractNow,
.tempPath = .tempPath
)
)
} else {
# don't have a 2nd archive, and don't have our neededFiles file
# isArchive <- grepl(fileExt(filesInArchive), pattern = "(zip|tar|rar)", ignore.case = TRUE)
isArchive <- grepl(fileExt(filesInArchive),
pattern = paste0("(", paste(knownArchiveExtensions, collapse = "|"), ")"), ignore.case = TRUE
)
if (any(isArchive)) {
arch <- makeRelative(filesInArchive[isArchive], destinationPath)
filesExtracted <- c(
filesExtracted,
.callArchiveExtractFn(funWArgs$fun, funWArgs$args,
files = arch,
absolutePrefix = destinationPath,
.tempPath = .tempPath
)
)
filesExtracted <- unique(filesExtracted) # maybe unnecessary
prevExtract <- lapply(makeAbsolute(arch, destinationPath), function(ap) {
extractFromArchive(
archive = ap, destinationPath = destinationPath,
neededFiles = neededFiles,
extractedArchives = extractedArchives,
filesExtracted = filesExtracted,
checkSums = checkSums,
needChecksums = needChecksums,
checkSumFilePath = checkSumFilePath,
quick = quick,
.tempPath = .tempPath
)
})
extractedArchives <- c(prevExtract[[1]]$extractedArchives, extractedArchives)
filesExtracted <- unique(c(prevExtract[[1]]$filesExtracted, filesExtracted))
}
}
} else {
messagePrepInputs(" Skipping extractFromArchive: all files already present", verbose = verbose)
filesExtracted <- checkSums[checkSums$expectedFile %in%
makeRelative(filesInArchive, destinationPath), ]$expectedFile
filesExtracted <- makeAbsolute(filesInArchive, destinationPath)
}
}
} else {
if (!is.null(archive)) { # if archive is null, it means there was no archive passed
messagePrepInputs(" Skipping extractFromArchive: all needed ",
"files now present",
verbose = verbose
)
}
filesExtracted <- setdiff(neededFiles, if (!is.null(archive)) makeRelative(archive, destinationPath))
}
list(
extractedArchives = c(extractedArchives, archive),
neededFiles = neededFiles, # these may have been corrected for user supplying incorrect basename path
filesExtracted = unique(c(filesExtracted, extractedObjs$filesExtracted)),
needChecksums = needChecksums,
checkSums = checkSums
)
}
#' Try to pick a file to load
#'
#' @keywords internal
#' @rdname guessAtTarget
#' @name guessAtTarget
#' @importFrom utils unzip untar
#' @inheritParams postProcess
#' @param filesExtracted A character vector of all files that have been extracted (e.g.,
#' from an archive)
#' @param destinationPath Full path of the directory where the target file should be
#' @keywords internal
.guessAtTargetAndFun <- function(targetFilePath,
destinationPath = getOption("reproducible.destinationPath", "."),
filesExtracted, fun = NULL, verbose = getOption("reproducible.verbose", 1)) {
possibleFiles <- unique(c(targetFilePath, filesExtracted))
whichPossFile <- possibleFiles %in% targetFilePath
if (isTRUE(any(whichPossFile))) {
possibleFiles <- possibleFiles[whichPossFile]
}
isShapefile <- FALSE
isRaster <- FALSE
isRDS <- FALSE
fileExt <- fileExt(possibleFiles)
feKnown <- .fileExtsKnown() # An object in helpers.R
funPoss <- lapply(fileExt, function(fe) feKnown[startsWith(prefix = feKnown[[1]], fe), ])
funPoss <- do.call(rbind, funPoss)
if (length(funPoss)) {
isShapefile <- fileExt %in% funPoss[funPoss[, "type"] == vectorType(), "extension"]
isRaster <- fileExt %in% funPoss[funPoss[, "type"] == rasterType(), "extension"]
isRDS <- fileExt %in% funPoss[funPoss[, "extension"] == "rds", "extension"]
if (any(isShapefile)) {
if (is.null(fun)) {
if (requireNamespace("sf", quietly = TRUE)) {
if (!isTRUE(grepl("st_read", fun))) {
messagePrepInputs(
"Using sf::st_read on shapefile because sf package is available; to force old ",
"behaviour with 'raster::shapefile' use fun = 'raster::shapefile' or ",
"options('reproducible.shapefileRead' = 'raster::shapefile')"
)
}
}
}
}
}
if (is.null(fun)) {
fun <- unique(funPoss[, "fun"])
if (length(fun) > 1) {
if (sum(isRaster) > 0 && sum(isShapefile) > 0) {
isRaster[isRaster] <- FALSE
funPoss <- funPoss[funPoss$type == vectorType(), ]
fun <- unique(funPoss[, "fun"])
message("The archive has both a shapefile and a raster; selecting the shapefile. If this is incorrect, specify targetFile")
} else {
stop(
"more than one file; can't guess at function to load with; ",
"please supply 'fun' or 'targetFile' argument to reduce ambiguity"
)
}
}
if (length(fun) == 0) stop("Can't guess at which function to use to read in the object; please supply 'fun'")
}
if (is.null(targetFilePath) || length(targetFilePath) == 0) {
secondPartOfMess <- if (any(isShapefile)) {
c(
" Trying ", fun, " on ", paste(possibleFiles[isShapefile], collapse = ", "), ".",
" If that is not correct, please specify a different targetFile",
" and/or fun."
)
} else if (is.null(fun)) {
c(" Also, file extension does not unambiguously specify how it should be loaded. Please specify fun.")
} else {
c(
" Trying ", fun, ".\n",
" If that is not correct, please specify a targetFile",
" and/or different fun. The current files in the destinationPath",
" are: \n",
paste(possibleFiles, collapse = "\n")
)
}
messagePrepInputs(c(" targetFile was not specified.", secondPartOfMess), verbose = verbose)
targetFilePath <- if (is.null(fun)) {
NULL
} else if (length(possibleFiles[isShapefile]) > 0) {
possibleFiles[isShapefile]
} else {
if (any(isRaster)) {
possibleFiles[isRaster]
} else if (any(isRDS)) {
possibleFiles[isRDS]
} else {
messagePrepInputs(" Don't know which file to load. Please specify targetFile.", verbose = verbose)
}
}
if (length(targetFilePath) > 1) {
messagePrepInputs(" More than one possible files to load:\n",
paste(targetFilePath, collapse = "\n"),
"\nPicking the last one. If not correct, specify a targetFile.",
verbose = verbose
)
targetFilePath <- targetFilePath[length(targetFilePath)]
}
}
list(targetFilePath = targetFilePath, fun = fun)
}
#' @importFrom utils untar unzip
.whichExtractFn <- function(archive, args) {
out <- NULL
if (!(is.null(archive))) {
if (!is.na(archive)) {
ext <- tolower(fileExt(archive))
if (!ext %in% knownArchiveExtensions) {
stop(
"preProcess can only deal with archives with following extensions:\n",
paste(knownArchiveExtensions, collapse = ", ")
)
}
if (ext == "zip") {
fun <- unzip
args <- c(args, list(junkpaths = FALSE))
} else if (ext %in% c("tar", "tar.gz", "gz")) {
fun <- untar
} else if (ext == "rar") {
fun <- "unrar"
} else if (ext == "7z") {
fun <- "7z"
}
out <- list(fun = fun, args = args)
}
}
return(out)
}
#' @keywords internal
#' @importFrom utils capture.output
.callArchiveExtractFn <- function(fun, args, files, overwrite = TRUE,
absolutePrefix = getOption("reproducible.destinationPath", "."),
verbose = getOption("reproducible.verbose", 1), .tempPath) {
argList <- list(files = files)
argList$files <- makeRelative(argList$files, absolutePrefix)
if (missing(.tempPath)) {
.tempPath <- tempdir2(rndstr(1, 6))
on.exit(
{
unlink(.tempPath, recursive = TRUE)
},
add = TRUE
)
}
if (is.character(fun)) {
if (!fun %in% knownSystemArchiveExtensions) {
fun <- eval(fun)
}
}
origExdir <- args$exdir
if (!is.null(args$exdir)) {
args$exdir <- .tempPath
}
if (is.character(fun)) {
messagePrepInputs(paste0("The archive appears to be not a .zip. Trying a system call to ", fun), verbose = verbose)
extractSystemCallPath <- .testForArchiveExtract()
if (grepl(x = extractSystemCallPath, pattern = "7z")) {
prependPath <- if (isWindows()) {
paste0("\"", extractSystemCallPath, "\"")
} else {
extractSystemCallPath
}
# This spits out a message on non-Windows about arguments that are ignored
suppressMessages({
output <- system(paste0(prependPath, " x -aoa -o\"", .tempPath, "\" \"", args[[1]], "\""),
wait = TRUE,
ignore.stdout = FALSE,
ignore.stderr = FALSE,
invisible = TRUE,
show.output.on.console = FALSE, intern = TRUE
)
})
} else {
system(paste0("unrar x ", args[[1]], " ", .tempPath), wait = TRUE, ignore.stdout = TRUE)
}
# list of full paths of all extracted files!
# extractedFiles <- list.files(path = .tempPath, recursive = TRUE, include.dirs = TRUE)
# internalFolders <- extractedFiles[fileExt(extractedFiles) == ""]
# extractedFiles <- setdiff(x = extractedFiles, y = internalFolders)
} else {
# Try the direct, then indirect
isUnzip <- if (identical(unzip, fun)) TRUE else ("overwrite" %in% names(formals(fun)))
argList <- if (isUnzip) {
c(argList, overwrite = overwrite)
} else {
c(argList)
}
opt <- options("warn")$warn
on.exit(options(warn = opt), add = TRUE)
options(warn = 1)
tooBig <- file.size(args[[1]]) > 2e9
worked <- FALSE
if (isUnzip && !tooBig) {
fattrs <- unzip(args[[1]], list = TRUE)
ids <- which(fattrs[["Name"]] %in% argList$files)
tooBig <- any(fattrs[ids, ]["Length"][[1]] >= 4294967295) ## files >= 4GB are truncated; see ?unzip
}
if (!tooBig) {
mess <- capture.output(
{
extractedFiles <- do.call(fun, c(args, argList))
},
type = "message"
)
worked <- if (isUnzip) {
all(normPath(file.path(args$exdir, argList[[1]])) %in% normPath(extractedFiles))
} else {
isTRUE(extractedFiles == 0)
}
}
if (!isTRUE(worked) || isTRUE(tooBig)) {
unz <- Sys.which("unzip")
sZip <- Sys.which("7z")
if (!isTRUE(tooBig)) {
messagePrepInputs("File unzipping using R does not appear to have worked.",
" Trying a system call of unzip...",
verbose = verbose
)
} else {
messPart1 <- "R's unzip utility cannot handle a zip file this size.\n"
if (nchar(sZip) > 0) {
messagePrepInputs(messPart1, verbose = verbose)
} else {
messagePrepInputs(
paste(
messPart1,
"Install 7zip and add it to your PATH (see https://www.7-zip.org/)."
),
verbose = verbose
)
}
}
if (file.exists(args[[1]])) {
pathToFile <- normPath(args[[1]])
} else {
if (file.exists(file.path(args$exdir, args[[1]]))) {
pathToFile <- normPath(file.path(args$exdir, args[[1]]))
} else {
warning(mess)
stop(
"prepInputs cannot find the file ", basename2(args[[1]]), ".",
" The file might have been moved during unzipping or is corrupted."
)
}
}
if (nchar(sZip) > 0) {
messagePrepInputs("Using '7zip'")
op <- setwd(.tempPath)
on.exit(
{
setwd(op)
},
add = TRUE
)
lstFiles <- system(paste0(sZip, " l ", pathToFile), intern = TRUE, wait = TRUE)
startAndEnd <- grep("-----------", lstFiles)
if (diff(startAndEnd) > 1) {
lstFiles <- lstFiles[(startAndEnd[1] + 1):(startAndEnd[2] - 1)]
}
if (length(files)) {
filesAreInArch <- unlist(lapply(files, function(x) any(grepl(x, lstFiles))))
if (all(filesAreInArch)) {
arg22 <- paste("e", pathToFile, paste(files, collapse = " "))
} else {
stop(paste(files, collapse = ", "), " not in ", basename2(pathToFile))
}
} else {
arg22 <- paste0(" e ", pathToFile)
}
system2(sZip,
args = arg22,
wait = TRUE,
stdout = NULL
)
} else if (nchar(unz) > 0) {
messagePrepInputs("Using 'unzip'")
system2(unz,
args = paste0(pathToFile, " -d ", .tempPath),
wait = TRUE,
stdout = NULL
)
} else {
if (nchar(unz) == 0) {
stop(
"unzip command cannot be found.",
" Please try reinstalling Rtools if on Windows, and/or add unzip to system path",
" (e.g., see 'https://cran.r-project.org/bin/windows/Rtools/'.)"
)
}
stop(
"There was no way to unzip all files; try manually. The file is located at: \n",
pathToFile
)
}
}
if (!isUnzip) {
extractedFiles <- files
}
}
extractedFiles <- list.files(
path = .tempPath,
# list of full paths of all extracted files!
recursive = TRUE,
include.dirs = TRUE
)
from <- makeAbsolute(extractedFiles, .tempPath)
on.exit(
{
if (any(file.exists(from))) {
suppressWarnings(try(unlink(from), silent = TRUE))
}
},
add = TRUE
)
args$exdir <- origExdir
to <- file.path(args$exdir, extractedFiles)
suppressWarnings({
out <- hardLinkOrCopy(from, to, verbose = 0)
})
if (!isTRUE(all(file.exists(to)))) {
stop(paste("Could not move extractedfiles from", .tempPath, "to", args$exdir))
}
extractedFiles <- to
unlink(.tempPath, recursive = TRUE)
if (length(extractedFiles) == 0) {
stop(
"preProcess could not extract the files from the archive ", args[[1]], ".",
"Please try to extract it manually to the destinationPath"
)
}
return(extractedFiles)
}
#' @keywords internal
.isArchive <- function(filename) {
if (!is.null(filename)) {
filename <- if (length(filename)) {
isArchive <- fileExt(filename) %in% knownArchiveExtensions
if (any(isArchive)) {
filename[isArchive]
} else {
NULL
}
} else {
NULL
}
}
return(filename)
}
#' @keywords internal
#' @importFrom utils capture.output
#' @importFrom data.table rbindlist as.data.table setDT setDF
appendChecksumsTable <- function(checkSumFilePath, filesToChecksum,
destinationPath = getOption("reproducible.destinationPath", "."),
append = TRUE, verbose = getOption("reproducible.verbose", 1)) {
if (append) {
# a checksums file already existed, need to keep some of it
cs <- suppressWarnings(try(read.table(checkSumFilePath, header = TRUE), silent = TRUE))
if (is(cs, "try-error")) {
# meant that it was an empty CHECKSUMS.txt file -- rebuild it
append <- FALSE
} else {
setDT(cs)
nonCurrentFiles <- cs[!makeRelative(file, destinationPath) %in%
makeRelative(filesToChecksum, destinationPath)]
setDF(cs)
}
messStart <- "Appending "
} else {
messStart <- "Writing "
}
csf <- if (append) tempfile(fileext = ".TXT") else checkSumFilePath
areAbs <- isAbsolutePath(filesToChecksum)
if (any(!areAbs)) {
filesToChecksum[!areAbs] <- file.path(destinationPath, filesToChecksum[!areAbs])
}
capture.output(type = "message", {
currentFiles <- Checksums(
path = destinationPath, write = TRUE, # write = !append || NROW(nonCurrentFiles) == 0,
files = filesToChecksum,
checksumFile = csf,
verbose = verbose
)
})
if (append) { # a checksums file already existed, need to keep some of it
messagePrepInputs(messStart, "checksums to CHECKSUMS.txt. If you see this messagePrepInputs repeatedly, ", verbose = verbose)
messagePrepInputs(" you can specify targetFile (and optionally alsoExtract) so it knows", verbose = verbose)
messagePrepInputs(" what to look for.", verbose = verbose)
currentFilesToRbind <- data.table::as.data.table(currentFiles)
keepCols <- c("expectedFile", "checksum.x", "algorithm.x", "filesize.x")
currentFilesToRbind <- currentFilesToRbind[, keepCols, with = FALSE]
data.table::setnames(currentFilesToRbind,
old = keepCols,
new = c("file", "checksum", "algorithm", "filesize")
)
currentFilesToRbind <- rbindlist(list(nonCurrentFiles, currentFilesToRbind), fill = TRUE)
# Attempt to not change CHECKSUMS.txt file if nothing new occurred
currentFilesToRbind <- unique(currentFilesToRbind)
anyDuplicates <- duplicated(currentFilesToRbind)
if (any(anyDuplicates)) {
messagePrepInputs("The current targetFile is not the same as the expected targetFile in the ",
"CHECKSUMS.txt; appending new entry in CHECKSUMS.txt. If this is not ",
"desired, please check files for discrepancies",
verbose = verbose
)
}
# Sometimes a checksums file doesn't have filesize
if (!is.null(cs$filesize)) {
if (!is.character(cs$filesize)) {
cs$filesize <- as.character(cs$filesize)
}
}
if (!identical(cs, as.data.frame(currentFilesToRbind))) {
writeChecksumsTable(as.data.frame(currentFilesToRbind), checkSumFilePath, dots = list())
}
}
return(currentFiles)
}
#' List files in either a `.zip` or or `.tar` file
#'
#' Makes the outputs from`.tar``.zip` the same, which they aren't by default.
#'
#' @param archive A character string of a single file name to list files in.
#'
#' @return A character string of all files in the archive.
#'
#' @keywords internal
#' @rdname listFilesInArchive
.listFilesInArchive <- function(archive) {
needSystemCall <- (length(archive) > 0 && fileExt(archive[1]) %in% knownSystemArchiveExtensions)
if (length(archive) > 0) {
if (file.exists(archive[1])) {
needSystemCall <- needSystemCall || file.size(archive[1]) > 2e9
}
}
if (needSystemCall) {
extractSystemCallPath <- .testForArchiveExtract()
funWArgs <- list(fun = extractSystemCallPath)
} else {
funWArgs <- .whichExtractFn(archive[1], NULL)
}
filesInArchive <- NULL
if (!is.null(funWArgs$fun)) {
if (file.exists(archive[1])) {
if (!needSystemCall) {
filesInArchive <- funWArgs$fun(archive[1], list = TRUE)
if ("Name" %in% names(filesInArchive)) {
# for zips, rm directories (length = 0)
filesInArchive <- filesInArchive[filesInArchive$Length != 0, ]$Name
} else {
# untar
filesInArchive
}
} else {
if (grepl(x = extractSystemCallPath, pattern = "7z")) {
extractSystemCall <- paste0("\"", extractSystemCallPath, "\"", " l \"", path.expand(archive[1]), "\"")
if (isWindows()) {
filesOutput <- captureWarningsToAttr(
system(extractSystemCall, show.output.on.console = FALSE, intern = TRUE)
)
warn <- attr(filesOutput, "warning")
attr(filesOutput, "warning") <- NULL
} else {
# On Linux/MacOS
filesOutput <- captureWarningsToAttr(
system(extractSystemCall, intern = TRUE, ignore.stderr = TRUE)
)
warn <- attr(filesOutput, "warning")
attr(filesOutput, "warning") <- NULL
}
} else {
archiveExtractBinary <- .archiveExtractBinary()
if (is.null(archiveExtractBinary)) {
stop("unrar is not on this system; please install it")
}
filesOutput <- system(paste0("unrar l ", archive[1]), intern = TRUE)
}
if (exists("warn", inherits = FALSE) && isTRUE(any(grepl("had status 2", warn)))) {
stop(warn)
}
if (isTRUE(any(grepl("(Can not open the file as archive)|(Errors: 1)", filesOutput)))) {
stop("archive appears defective")
}
filesInBetween <- grep(pattern = "----", filesOutput)
filesLines <- filesOutput[(min(filesInBetween) + 1):(max(filesInBetween) - 1)]
filesInArchive <- unlist(lapply(X = seq_along(filesLines), FUN = function(line) {
fullString <- unlist(strsplit(filesLines[[line]], split = " "))
return(fullString[length(fullString)])
}))
if (length(filesInArchive) == 0) {
stop("preProcess could not find any files in the archive ", archive)
}
}
}
}
return(filesInArchive)
}
.compareChecksumsAndFilesAddDirs <- function(checkSums, files, destinationPath) {
isOK <- NULL
if (!is.null(files)) {
checkSumsDT <- data.table(checkSums)
if (NCOL(checkSumsDT) == 0) {
checkSumsDT <- Copy(.emptyChecksumsResult)
}
dirs <- makeRelative(dirname(files), destinationPath) # basename2(unique(dirname(files)))
dirs <- dirs[nzchar(dirs)]
filesDT <- data.table(files = unique(makeRelative(c(files, dirs), destinationPath)))
isOKDT <- checkSumsDT[filesDT, on = c(expectedFile = "files")]
isOKDT2 <- checkSumsDT[filesDT, on = c(actualFile = "files"), nomatch = NA]
# fill in any OKs from "actualFile" into the isOKDT
isOKDT[compareNA(isOKDT2$result, "OK"), "result"] <- "OK"
if (!all(compareNA(isOKDT$result, "OK"))) {
isOKDT <- checksumsDirsOk(isOKDT)
}
isOK <- compareNA(isOKDT$result, "OK")
names(isOK) <- makeRelative(filesDT$files, destinationPath)
}
isOK
}
#' Tests if unrar or 7zip exist
#'
#' @return
#' unrar or 7zip path if exist, or NULL
#'
#' @author Tati Micheletti
#'
#' @keywords internal
#' @rdname archiveExtractBinary
#' @name archiveExtractBinary
.archiveExtractBinary <- function(verbose = getOption("reproducible.verbose", 1)) {
possPrograms <- unique(unlist(lapply(c("7z", "unrar"), Sys.which)))
extractSystemCallPath <- if (!all(possPrograms == "")) {
possPrograms[nzchar(possPrograms)][1] # take first one if there are more than one
} else {
""
}
if (!(isWindows())) { ## TODO: macOS ?? #266
if (grepl("7z", extractSystemCallPath)) {
SevenZrarExists <- system("apt -qq list p7zip-rar", intern = TRUE, ignore.stderr = TRUE)
SevenZrarExists <- grepl(SevenZrarExists, pattern = "installed")
if (isFALSE(SevenZrarExists)) {
messagePrepInputs("To extract .rar files, you will need p7zip-rar, not just p7zip-full. Try: \n",
"--------------------------\n",
"apt install p7zip-rar\n",
"--------------------------\n",
verbose = verbose
)
}
}
}
if (identical(extractSystemCallPath, "")) {
if (isWindows()) {
extractSystemCallPath <- Sys.which("7z.exe")
if (extractSystemCallPath == "") {
messagePrepInputs("prepInputs is looking for 'unrar' or '7z' in your system...", verbose = verbose)
extractSystemCallPath <- list.files("C:/Program Files",
pattern = "unrar.exe|7z.exe",
recursive = TRUE,
full.names = TRUE
)
if (extractSystemCallPath == "" || length(extractSystemCallPath) == 0) {
extractSystemCallPath <- list.files(dirname(Sys.getenv("SystemRoot")),
pattern = "unrar.exe|7z.exe",
recursive = TRUE,
full.names = TRUE
)
if (extractSystemCallPath == "" || length(extractSystemCallPath) == 0) {
extractSystemCallPath <- NULL
messagePrepInputs(missingUnrarMess, verbose = verbose)
} else {
messagePrepInputs("The extracting software was found in an unusual location: ",
extractSystemCallPath, ".",
"If you receive an error when extracting the archive, please install ",
"'7zip' or 'unrar' in 'Program Files' directory.",
verbose = verbose
)
}
}
extractSystemCallPath <- extractSystemCallPath[1]
}
} else {
messagePrepInputs(missingUnrarMess,
"Try installing with, e.g.,: \n",
"--------------------------\n",
"apt install p7zip p7zip-rar p7zip-full -y\n",
"yum install p7zip p7zip-plugins -y\n",
"--------------------------",
verbose = verbose
)
}
}
if (!exists("extractSystemCallPath", inherits = FALSE)) extractSystemCallPath <- NULL
if (!nzchar(extractSystemCallPath)) extractSystemCallPath <- NULL
return(extractSystemCallPath)
}
#' Returns unrar path and creates a shortcut as .unrarPath
#' Was not incorporated in previous function so it can be
#' used in the tests
#'
#' @return
#' unrar or 7zip path if exist, and assign it to .unrarPath
#' Stops and advise user to install it if unrar doesn't exist
#'
#' @author Tati Micheletti
#'
#' @keywords internal
#' @rdname testForArchiveExtract
#' @name testForArchiveExtract
.testForArchiveExtract <- function() {
if (!is.null(.unrarPath)) {
extractSystemCallPath <- .unrarPath
} else {
# Find the path to unrar and assign to a package-stored object
usrTg <- paste(sample(x = LETTERS, size = 15), collapse = "")
# Cache for project-level persistence
extractSystemCallPath <- Cache(.archiveExtractBinary, userTags = usrTg)
utils::assignInMyNamespace(".unrarPath", extractSystemCallPath) # assign in namespace for pkg
}
if (is.null(extractSystemCallPath)) {
clearCache(userTags = usrTg, ask = FALSE)
stop(
"prepInputs did not find '7-Zip' nor 'unrar' installed.",
" Please install it before running prepInputs for a '.rar' archive"
)
}
return(extractSystemCallPath)
}
#' The known path for unrar or 7z
#' @rdname unrarPath
#' @name unrarPath
.unrarPath <- NULL
missingUnrarMess <- "The archive is a 'rar' archive; your system does not have unrar or 7zip;\n"
proj6Warn <- "NOT UPDATED FOR PROJ"
knownInternalArchiveExtensions <- c("zip", "tar", "tar.gz", "gz")
knownSystemArchiveExtensions <- c("rar", "7z")
knownArchiveExtensions <- c(knownInternalArchiveExtensions, knownSystemArchiveExtensions)
prepInputsAssertions <- function(env) {
noisy <- nullOr(c("character", "logical"), c("alsoExtract"), env = env)
noisy <- nullOr(c("character", "logical"), "useCache", env)
noisy <- nullOr(c("numeric", "logical"), c("purge", "verbose"), env)
noisy <- nullOr("character", c(
"destinationPath", "targetFile", "url", "archive",
".tempPath"
), env = env)
noisy <- nullOr("logical", c("quick", "overwrite"), env = env)
}
nullOr <- function(clses, vals, env) {
vapply(vals, function(val) {
out <- is.null(env[[val]])
if (out) { # pull out fast if NULL
return(out)
}
out <- inherits(env[[val]], clses)
if (isFALSE(out)) {
stop(
val, " must be of class", "(es)"[(length(clses) > 1)], " ",
paste(clses, collapse = ", "),
" or set to its default. It is currently ", class(env[[val]]), "."
)
}
out
}, logical(1))
}
is.nulls <- function(x) lapply(x, is.null)
process <- function(out, funCaptured,
useCache = getOption("reproducible.useCache"),
verbose = getOption("reproducible.verbose"),
...) {
theFun <- out$fun
suppressWarnings({
naFun <- all(is.na(theFun))
})
funChar <- if (is.character(out$funChar)) out$funChar else NULL
out <- modifyList(out, list(...))
argsFromPrepInputsFamily <- unique(c(
.namesPostProcessFormals(), formalArgs(prepInputs), formalArgs(preProcess),
"checkSums", "dots", "object"
))
args <- NULL
# keep the ones for theFun
if (naFun %in% FALSE && !is.call(theFun)) {
formsForTheFun <- names(formals3(theFun))
argsFromPrepInputsFamily <- setdiff(argsFromPrepInputsFamily, names(formals3(theFun)))
argsPassingToTheFun <- out[!names(out) %in% argsFromPrepInputsFamily]
args <- argsPassingToTheFun[names(argsPassingToTheFun) %in% formsForTheFun]
}
otherFiles <- out$checkSums[result == "OK"]
.cacheExtra <- NULL
if (NROW(otherFiles)) {
.cacheExtra <- .robustDigest(sort(otherFiles$checksum.x))
}
if (!(naFun || is.null(theFun))) {
x <- if (is.null(out$object)) {
messagePrepInputs("Loading object into R", verbose = verbose)
needRaster <- any(grepl("raster$|stack$|brick$", funCaptured))
needTerra <- any(grepl("terra|rast$", funCaptured))
if (needRaster) {
.requireNamespace("raster", stopOnFALSE = TRUE)
}
if (needRaster || needTerra) {
## Don't cache the reading of a raster
## -- normal reading of raster on disk is fast b/c only reads metadata
do.call(theFun, append(list(asPath(out$targetFilePath)), args))
} else {
if (identical(theFun, base::load)) {
if (is.null(args$envir)) {
messagePrepInputs(" Running base::load, returning objects as a list. Pass envir = anEnvir ",
"if you would like it loaded to a specific environment",
verbose = verbose
)
tmpEnv <- new.env(parent = emptyenv())
returnAsList <- TRUE
} else {
tmpEnv <- args$envir
args$envir <- NULL
returnAsList <- FALSE
}
args2 <- append(list(file = out$targetFilePath, envir = tmpEnv), args)
objs <- do.call(theFun, args2)
if (returnAsList) {
as.list(tmpEnv, all.names = TRUE)
}
} else {
useCache2 <- useCache
if (fileExt(out$targetFilePath) %in% c("qs", "rds") &&
!isTRUE(getOption("reproducible.useMemoise"))) {
useCache2 <- FALSE
messagePrepInputs("targetFile is already a binary; skipping Cache while loading")
}
withCallingHandlers(
if (is.call(theFun)) { # an actual call, not just captured function name
# put `targetFilePath` in the first position -- allows quoted call to use first arg
out <- append(
append(
list(targetFilePath = out[["targetFilePath"]]),
out[-which(names(out) == "targetFilePath")]
),
args
)
out[["targetFile"]] <- out[["targetFilePath"]] # handle both
obj <- Cache(eval(theFun, envir = out),
useCache = useCache2, .cacheExtra = .cacheExtra,
.functionName = funChar
)
} else {
args2 <- append(list(asPath(out$targetFilePath)), args)
obj <- Cache(do.call, theFun, args2,
useCache = useCache2, .cacheExtra = .cacheExtra,
.functionName = funChar
)
},
message = function(m) {
m$message <- grep("No cachePath supplied|useCache is FALSE", m$message, invert = TRUE, value = TRUE)
if (length(m$message)) {
mm <- gsub("(.*)\n$", "\\1", m$message)
messagePrepInputs(mm)
}
tryInvokeRestart("muffleMessage")
}
)
obj
}
}
} else {
# if (is.null(fun) || is.na(fun)) {
x <- out$object
# } else {
# # x <- out$object
# env1 <- new.env()
# list2env(list(...), envir = env1)
# eval(theFun, envir = env1)
# }
}
} else {
x <- if ((is.null(theFun) || is.na(theFun)) && !is.null(out$object)) {
out$object
} else {
messagePrepInputs("No loading of object into R; fun = ", theFun, verbose = verbose)
out
}
}
x
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.