Nothing
utils::globalVariables(c(
"reproducible.inputPaths", "successfulCheckSumFilePath", "successfulDir"
))
#' @param n Number of non-null arguments passed to `preProcess`.
#' E.g., passing `n = 1` returns combinations with only a single non-NULL parameter.
#' If `NULL` (default), all parameter combinations are returned.
#'
#' @export
#' @rdname preProcess
preProcessParams <- function(n = NULL) {
p1 <- data.frame(
url = c("char", "NULL", "NULL", "NULL"),
targetFile = c("NULL", "char", "NULL", "NULL"),
archive = c("NULL", "NULL", "char", "NULL"),
alsoExtract = c("NULL", "NULL", "NULL", "char"),
Result = c(
"Download, extract all files if an archive, guess at 'targetFile', load into R.",
"Load 'targetFile' into R.",
"Extract all files, guess at 'targetFile', load into R.",
"Guess at 'targetFile' from files in 'alsoExtract', load into R."
),
FirstChecksum = c(
"Write or append all new files.",
"Write or append 'targetFile'.",
"Write or append all new files.",
"Write or append all new files."
),
SecondChecksum = c(
"Same as first; no 'targetFile'.*",
"No downloading, so no checksums used.",
"No downloading, so no checksums used.",
"No downloading, so no checksums used."
),
stringsAsFactors = FALSE
)
p2 <- data.frame(
url = c("char", "char", "char", "NULL", "NULL", "NULL"),
targetFile = c("char", "NULL", "NULL", "char", "char", "NULL"),
archive = c("NULL", "char", "NULL", "NULL", "char", "char"),
alsoExtract = c("NULL", "NULL", "char", "char", "NULL", "char"),
Result = c(
"Download, extract all files if an archive, load 'targetFile' into R.",
"Download, extract all files, guess at 'targetFile', load into R.",
"Download, extract only named files in 'alsoExtract', guess at 'targetFile', load into R.",
"Load 'targetFile' into R.",
"Extract all files, load 'targetFile' into R.",
" Extract only named files in 'alsoExtract', guess at 'targetFile', load into R."
),
FirstChecksum = c(
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files."
),
SecondChecksum = c(
"Use Checksums, skip downloading.",
"Same as first; no 'targetFile'.*",
"Same as first; no 'targetFile'.*",
"No downloading, so no checksums used.",
"No downloading, so no checksums used.",
"No downloading, so no checksums used."
),
stringsAsFactors = FALSE
)
p3 <- data.frame(
url = c("char", "char", "char", "char", "char", "char", "NULL"),
targetFile = c("char", "NULL", "NULL", "char", "char", "char", "char"),
archive = c("char", "char", "char", "NULL", "NULL", "char", "char"),
alsoExtract = c("NULL", "char", "'similar'", "char", "'similar", "NULL", "char"),
Result = c(
"Download, extract all files, load 'targetFile' into R.",
"Download, extract files named in 'alsoExtract', guess at 'targetFile', load into R.",
"Download, extract all files (can't understand \"similar\"), guess at 'targetFile', load into R.",
"Download, if an archive, extract files named in 'targetFile' and 'alsoExtract', load 'targetFile' into R.",
"Download, if an archive, extract files with same base as 'targetFile', load 'targetFile' into R.",
"Download, extract all files from archive, load 'targetFile' into R.",
"Extract files named in 'alsoExtract' from archive, load 'targetFile' into R."
),
FirstChecksum = c(
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files.",
"Write or append all new files."
),
SecondChecksum = c(
"Use Checksums, skip downloading.",
"Use Checksums, skip downloading.",
"Same as first; no 'targetFile'.",
"Use Checksums, skip downloading.",
"Use Checksums, skip downloading.",
"Use Checksums, skip downloading.",
"No downloading, so no checksums used."
),
stringsAsFactors = FALSE
)
p4 <- data.frame(
url = c("char", "char"),
targetFile = c("char", "char"),
archive = c("char", "char"),
alsoExtract = c("char", "'similar'"),
Result = c(
"Download, extract files named in 'targetFile' and 'alsoExtract', load 'targetFile' into R.",
"Download, extract all files with same basename as 'targetFile', load 'targetFile' into R."
),
FirstChecksum = c(
"Write or append all new files.",
"Write or append all new files."
),
SecondChecksum = c(
"Use Checksums, skip downloading.",
"Use Checksums, skip downloading."
),
stringsAsFactors = FALSE
)
if (is.null(n)) {
rbind(p1, p2, p3, p4)
} else if (n == 1) {
p1
} else if (n == 2) {
p2
} else if (n == 3) {
p3
} else if (n == 4) {
p4
}
}
#' Download, Checksum, Extract files
#'
#' This does downloading (via `downloadFile`), checksumming (`Checksums`),
#' and extracting from archives (`extractFromArchive`), plus cleaning up of input
#' arguments (e.g., paths, function names).
#' This is the first stage of three used in `prepInputs`.
#'
#' @return
#' A list with 5 elements: `checkSums` (the result of a `Checksums`
#' after downloading), `dots` (cleaned up `...`, including deprecated argument checks),
#' `fun` (the function to be used to load the `preProcess`ed object from disk),
#' and `targetFilePath` (the fully qualified path to the `targetFile`).
#'
#' @section Combinations of `targetFile`, `url`, `archive`, `alsoExtract`:
#'
#' Use `preProcessParams()` for a table describing various parameter combinations and their
#' outcomes.
#'
#' `*` If the `url` is a file on Google Drive, checksumming will work
#' even without a `targetFile` specified because there is an initial attempt
#' to get the remove file information (e.g., file name). With that, the connection
#' between the `url` and the filename used in the \file{CHECKSUMS.txt} file can be made.
#'
#' @inheritParams prepInputs
#' @inheritParams downloadFile
#'
#' @author Eliot McIntire
#' @export
#' @importFrom data.table fread setDT
preProcess <- function(targetFile = NULL, url = NULL, archive = NULL, alsoExtract = NULL,
destinationPath = getOption("reproducible.destinationPath", "."),
fun = NULL, dlFun = NULL,
quick = getOption("reproducible.quick"),
overwrite = getOption("reproducible.overwrite", FALSE),
purge = FALSE,
verbose = getOption("reproducible.verbose", 1),
.tempPath, ...) {
st <- Sys.time()
messagePreProcess("Running `preProcess`", verbose = verbose, verboseLevel = 0)
.message$IndentUpdate()
if (missing(.tempPath)) {
.tempPath <- tempdir2(rndstr(1, 6))
on.exit(
{
unlink(.tempPath, recursive = TRUE)
},
add = TRUE
)
}
dlFunCaptured <- substitute(dlFun)
prepInputsAssertions(environment())
verboseCFS <- verbose
isAlreadyQuoted <- any(grepl("quote", dlFunCaptured))
if (isAlreadyQuoted) {
dlFunCaptured <- eval(dlFunCaptured)
}
dots <- list(...)
fun <- .checkFunInDots(fun = fun, dots = dots)
dots <- .checkDeprecated(dots, verbose = verbose)
teamDrive <- getTeamDrive(dots)
# remove trailing slash -- causes unzip fail if it is there
# A user could pass `NULL` to destinationPath -- overriding the argument default -- reset default here
if (is.null(destinationPath))
destinationPath <- getOption("reproducible.destinationPath", ".")
destinationPath <- normPath(destinationPath)
checkSumFilePath <- identifyCHECKSUMStxtFile(destinationPath)
if (!is.null(archive)) {
if (any(is.na(archive))) {
if (all(!is.character(archive))) {
archive <- as.character(archive)
}
}
}
targetFileGuess <- NULL
if (is.null(targetFile) || is.null(archive)) {
targetFileGuess <- .guessAtFile(
url = url, archive = archive, targetFile = targetFile,
destinationPath = destinationPath, verbose = verbose,
team_drive = teamDrive
)
if (is.null(archive)) {
archive <- .isArchive(targetFileGuess)
}
}
if (is.logical(alsoExtract)) {
alsoExtract <- c("none", "all")[alsoExtract + 1]
}
targetFilePath <- getTargetFilePath(
targetFile, archive, targetFileGuess, verbose,
destinationPath, alsoExtract, checkSumFilePath
)
targetFile <- makeRelative(targetFilePath, destinationPath)
alsoExtract <- guessAlsoExtract(targetFile, alsoExtract, checkSumFilePath)
if (!dir.exists(destinationPath)) {
if (isFile(destinationPath)) {
stop("destinationPath must be a directory")
}
checkPath(destinationPath, create = TRUE)
}
if (isTRUE(!is.na(targetFile)))
messagePreProcess("Preparing: ", targetFile, verbose = verbose)
needChecksums <- 0
archive <- setupArchive(archive, destinationPath)
filesToCheck <- na.omit(unique(c(archive, targetFilePath, alsoExtract)))
# Need to run checksums on all files in destinationPath because we may not know what files we
# want if targetFile, archive, alsoExtract not specified
# This will switch destinationPath to be same as reproducible.inputPaths
# This means that we need to modify some of the paths that were already absolute to destinationPath
inputPaths <- runChecksums(destinationPath, checkSumFilePath, filesToCheck, verbose)
list2env(inputPaths, environment()) # reproducible.inputPaths, destinationPathUser, destinationPath, checkSums
if (!is.null(inputPaths$destinationPathUser)) { # i.e., it changed
targetFilePath <- makeRelative(targetFilePath, inputPaths$destinationPathUser)
targetFilePath <- makeAbsolute(targetFilePath, destinationPath)
filesToCheck <- makeRelative(filesToCheck, inputPaths$destinationPathUser)
filesToCheck <- makeAbsolute(filesToCheck, destinationPath)
if (!is.null(archive)) {
archive <- makeRelative(archive, inputPaths$destinationPathUser)
archive <- makeAbsolute(archive, destinationPath)
}
}
if (is(checkSums, "try-error")) {
needChecksums <- 1
checkSums <- .emptyChecksumsResult
}
# This will populate a NULL archive if archive is local or
archiveOut <- dealWithArchive(
archive, url, targetFile, checkSums, alsoExtract,
destinationPath, teamDrive, verbose
)
list2env(archiveOut, envir = environment()) # checkSums, archive, fileGuess
# NOW -- archive will exist if it didn't before
# Double check that targetFile and alsoExtract are correct paths, given that in archive, they may be in sub-folders
needEmptyChecksums <- FALSE
if (is.logical(purge)) purge <- as.numeric(purge)
if (purge == 1) {
unlink(checkSumFilePath)
needChecksums <- 1
}
if (purge > 1) {
checkSums <- .purge(
checkSums = checkSums, purge = purge, targetFile = targetFile,
archive = archive, url = url, alsoExtract = alsoExtract
)
needChecksums <- 2
}
neededFiles <- c(targetFile, makeAbsolute(alsoExtract, destinationPath)) # if (!is.null(alsoExtract)) basename2(alsoExtract))
if (is.null(neededFiles)) neededFiles <- makeAbsolute(archive)
# alsoExtract can be set to NA to say "don't try to extract anything else"; these would be in neededFiles now
# --> remove them
if (any(is.na(neededFiles))) neededFiles <- na.omit(neededFiles)
# remove "similar" from needed files. It is for extracting.
neededFiles <- grep("similar$", neededFiles, value = TRUE, invert = TRUE)
# Deal with "similar" in alsoExtract -- maybe this is obsolete with new feature that uses file_name_sans_ext
neededFilesOrig <- NULL
if (is.null(alsoExtract)) {
filesInsideArchive <- .listFilesInArchive(archive) # will be relative
neededFiles <- checkRelative(neededFiles, destinationPath, filesInsideArchive)
if (isTRUE(length(filesInsideArchive) > 0)) {
checkSums <- .checkSumsUpdate(destinationPath, makeAbsolute(filesInsideArchive, destinationPath),
checkSums = checkSums
)
}
neededFiles <- unique(c(neededFiles, filesInsideArchive))
} else {
outFromSimilar <- .checkForSimilar(neededFiles, alsoExtract, archive, targetFile,
destinationPath = destinationPath, checkSums,
checkSumFilePath = checkSumFilePath, url, verbose = verboseCFS
)
verboseCFS <- verbose - 1
neededFilesOrig <- unique(makeAbsolute(neededFiles, destinationPath))
list2env(outFromSimilar, environment()) # neededFiles, checkSums
}
neededFiles <- unique(makeAbsolute(neededFiles, destinationPath))
neededFiles <- unique(c(neededFilesOrig, neededFiles))
neededFiles <- grep("none$", neededFiles, value = TRUE, invert = TRUE)
# alsoExtract <- grep("none$", alsoExtract, value = TRUE, invert = TRUE)
filesToChecksum <- if (is.null(archive) || isTRUE(is.na(archive))) {
NULL
} else {
archive
}
filesToChecksum <- unique(c(filesToChecksum, neededFiles))
isOK <- .compareChecksumsAndFilesAddDirs(checkSums, filesToChecksum, destinationPath) # also checks dirs, so adds lines
if (isTRUE(!all(isOK))) {
results <- .tryExtractFromArchive(
archive = if (isTRUE(is.na(archive))) NULL else archive,
neededFiles = neededFiles,
alsoExtract = alsoExtract, destinationPath = destinationPath,
checkSums = checkSums, needChecksums = needChecksums,
checkSumFilePath = checkSumFilePath,
filesToChecksum = filesToChecksum,
targetFilePath = targetFilePath, quick = quick,
verbose = verbose, .tempPath = .tempPath
)
list2env(results, environment()) # neededFiles, checkSums, filesExtr, targetFilePath, filesToChecksum, needChecksums
if (needChecksums > 0) {
checkSums <- appendChecksumsTable(
checkSumFilePath = checkSumFilePath,
filesToChecksum = unique(filesToChecksum),
destinationPath = destinationPath,
append = results$needChecksums >= 2
)
needChecksums <- 0
}
} else {
# May need to update the guessed "targetFilePath"
targetFilePoss <- makeRelative(targetFilePath, destinationPath)
if (isTRUE(!targetFilePoss %in% names(isOK))) {
whNewTargetFilePath <- grep(targetFilePoss, names(isOK))
if (length(whNewTargetFilePath)) {
targetFilePath <- names(isOK)[whNewTargetFilePath]
}
}
}
# Check for local copies in all values of reproducible.inputPaths
# At the end of this function, the files will be present in destinationPath, if they existed
# in options("reproducible.inputPaths")
localChecks <- .checkLocalSources(neededFiles,
checkSums = checkSums,
checkSumFilePath = checkSumFilePath,
otherPaths = reproducible.inputPaths,
destinationPath, needChecksums = needChecksums, verbose = verbose
)
list2env(localChecks, environment()) # neededFiles, checkSums, needChecksums, successfulDir, successfulCheckSumFilePath
# Change the destinationPath to the reproducible.inputPaths temporarily, so
# download happens there. Later it will be linked to the user destinationPath
if (!is.null(reproducible.inputPaths)) {
# may already have been changed above
outCheck <- if (!is.null(targetFilePath)) {
!file.exists(targetFilePath)
} else {
TRUE
} ## if NULL, it doesn't exist and we want to proceed
if (isTRUE(outCheck)) { # skip if it already existed locally
if (is.null(destinationPathUser)) {
destinationPathUser <- destinationPath
}
on.exit(
{
destinationPath <- destinationPathUser
},
add = TRUE
)
if (!identical(destinationPath, reproducible.inputPaths)) {
# CHANGE destinationPath FOR REMAINDER OF THIS FUNCTION
neededFilesNew <- makeRelative(neededFiles, destinationPath)
targetFilePathNew <- makeRelative(targetFilePath, destinationPath)
destinationPathNew <- reproducible.inputPaths[1]
archiveExistInDestDir <- if (!isNULLorNA(archive)) {
file.exists(archive)
} else {
FALSE
}
existInDestDir <- if (!isNULLorNA(neededFiles)) {
file.exists(neededFiles)
} else {
FALSE
}
if (any(existInDestDir)) {
from <- neededFiles[existInDestDir]
to <- makeAbsolute(neededFilesNew[existInDestDir],
absoluteBase = destinationPathNew
)
linkOrCopyUpdateOnly(from, to, verbose = verbose)
}
if (any(archiveExistInDestDir)) {
from <- archive[archiveExistInDestDir]
to <- makeAbsolute(makeRelative(archive[archiveExistInDestDir], destinationPath),
absoluteBase = destinationPathNew
)
linkOrCopyUpdateOnly(from, to, verbose = verbose - 1)
}
targetPath <- targetFilePathNew
destinationPath <- destinationPathNew
neededFiles <- neededFilesNew
}
if (isTRUE(any(grepl(archive, pattern = destinationPathUser)))) {
# might have a "." as destinationPath -- messes with grepl
patt <- if (grepl("^\\.", destinationPathUser)) {
gsub("^\\.", "^\\\\.", destinationPathUser)
} else {
destinationPathUser
}
archive <- gsub(archive, pattern = patt, replacement = destinationPath)
}
targetFilePath <- makeAbsolute(targetFilePath, destinationPath)
neededFiles <- makeAbsolute(neededFiles, destinationPath)
}
}
###############################################################
# Download
###############################################################
downloadFileResult <- downloadFile(
archive = if (isTRUE(is.na(archive))) NULL else archive,
targetFile = targetFile, neededFiles = neededFiles, destinationPath = destinationPath,
quick = quick, checkSums = checkSums, dlFun = dlFunCaptured, url = url,
checksumFile = asPath(checkSumFilePath), needChecksums = needChecksums,
overwrite = overwrite, purge = purge, # may need to try purging again if no target,
# archive or alsoExtract were known yet
verbose = verbose, .tempPath = .tempPath, ...
)
downloadFileResult <- .fixNoFileExtension(
downloadFileResult = downloadFileResult,
targetFile = targetFile, archive = archive,
destinationPath = destinationPath, verbose = verbose
)
# Post downloadFile -- put objects into this environment
if (!is.null(downloadFileResult$targetFilePath)) {
targetFilePath <- makeAbsolute(downloadFileResult$neededFiles, destinationPath)
}
checkSums <- downloadFileResult$checkSums
needChecksums <- downloadFileResult$needChecksums
neededFiles <- downloadFileResult$neededFiles
# If the download was of an archive, then it is possible the archive path is wrong
if (!isTRUE(is.na(archive))) {
if (identical(downloadFileResult$downloaded, downloadFileResult$archive)) {
archive <- downloadFileResult$downloaded
}
# archive specified, alsoExtract is NULL --> now means will extract all
if (is.null(archive)) archive <- downloadFileResult$archive
}
###############################################################
# redo "similar" after download
###############################################################
outFromSimilar <- .checkForSimilar(
neededFiles = neededFiles, alsoExtract = alsoExtract,
archive = archive, targetFile = targetFile,
destinationPath = destinationPath, checkSums = checkSums,
checkSumFilePath = checkSumFilePath,
url = url, verbose = verboseCFS
)
verboseCFS <- verbose - 1
list2env(outFromSimilar, environment()) # neededFiles, checkSums
# don't include targetFile in neededFiles -- extractFromArchive deals with it separately
if (length(neededFiles) > 1) alsoExtract <- setdiff(neededFiles, targetFile)
# To this point, we only have the archive in hand -- include this in the list of filesToChecksum
filesToChecksum <- if (isTRUE(is.na(archive)) || (is.null(archive))) {
downloadFileResult$downloaded
} else {
archive
}
on.exit(
{
if (needChecksums > 0) {
appendChecksumsTable(
checkSumFilePath = checkSumFilePath,
filesToChecksum = filesToChecksum,
destinationPath = destinationPath,
append = (needChecksums == 2)
)
}
needChecksums <- 0
},
add = TRUE
)
# Stage 1 - Extract from archive
filesToChecksum <- unique(c(filesToChecksum, neededFiles))
isOK <- .compareChecksumsAndFilesAddDirs(checkSums, filesToChecksum, destinationPath)
if (isTRUE(!all(isOK))) {
filesExtracted <- .tryExtractFromArchive(
archive = archive,
neededFiles = neededFiles,
alsoExtract = alsoExtract,
destinationPath = destinationPath,
checkSums = checkSums,
needChecksums = needChecksums,
checkSumFilePath = checkSumFilePath,
filesToChecksum = filesToChecksum,
targetFilePath = targetFilePath,
quick = quick, verbose = verbose,
.tempPath = .tempPath
)
# this changes targetFilePath to have folder if the extraction included a folder
list2env(filesExtracted, environment()) # neededFiles, checkSums, filesExtr, targetFilePath, filesToChecksum, needChecksums
} else {
if (!is.null(.isArchive(archive))) {
messagePreProcess("Skipping extractFromArchive attempt: no files missing", verbose = verbose)
}
if (!is.null(targetFilePath))
if (isTRUE(!is.na(targetFilePath)))
if (any(!makeAbsolute(targetFilePath, destinationPath) %in%
makeAbsolute(neededFiles, destinationPath))) {
if (!basename2(targetFilePath) %in% makeRelative(neededFiles, destinationPath)) {
targetFilePathPoss <- grep(basename2(targetFilePath), neededFiles, value = TRUE)
if (length(targetFilePath) > 1)
targetFilePath <- targetFilePathPoss
}
}
filesExtr <- c(filesToChecksum, neededFiles)
filesExtr <- setdiff(filesExtr, .isArchive(filesExtr))
}
filesExtr <- unique(c(filesExtr, filesToChecksum))
# link back to destinationPath if options("reproducible.inputPaths") was used.
# destinationPath had been overwritten to be options("reproducible.inputPaths")
if (!is.null(reproducible.inputPaths)) {
if (!is.null(destinationPathUser)) { # retrieved file locally
foundInInputPaths <- grepl(normPath(destinationPath), normPath(filesExtr))
# Make sure they are all in options("reproducible.inputPaths"), accounting for
# the fact that some may have been in sub-folders -- i.e., don't deal with these
to <- targetFilePath
if (isTRUE(any(foundInInputPaths))) {
whFilesExtrInIP <- which(file.exists(filesExtr[foundInInputPaths]))
if (length(whFilesExtrInIP)) {
from <- filesExtr[whFilesExtrInIP]
to <- makeAbsolute(makeRelative(from, destinationPath), destinationPathUser)
if (!isTRUE(all(from %in% to))) {
messagePreProcess("...using file(s) in getOption('reproducible.inputPaths')...",
verbose = verbose)
}
outHLC <- hardLinkOrCopy(from, to, verbose = verbose - 1)
filesExtr[foundInInputPaths] <- to
}
}
# targetFilePath may be already in destinationPathUser, depending on when it was created
if (!is.null(targetFilePath)) {
if (!identical(to, targetFilePath)) {
targetFilePathTmp <- to[basename(to) %in% basename(targetFilePath)]
if (any(file.exists(targetFilePathTmp))) {
targetFilePath <- targetFilePathTmp
} else {
targetFilePath <- makeAbsolute(
makeRelative(targetFilePath, destinationPath),
destinationPathUser
)
}
}
}
destinationPath <- destinationPathUser
} else {
foundInLocalPaths <- grepl(normPath(destinationPath), normPath(filesExtr))
# Make sure they are all in options("reproducible.inputPaths"), accounting for
# the fact that some may have been in sub-folders -- i.e., don't deal with these
if (isTRUE(any(foundInLocalPaths))) {
whFilesExtrInLP <- which(file.exists(filesExtr[foundInLocalPaths]))
if (length(whFilesExtrInLP)) {
from <- filesExtr[whFilesExtrInLP]
for (riP in reproducible.inputPaths) {
to <- makeAbsolute(makeRelative(from, destinationPath), riP)
if (all(from %in% to)) {
break
}
}
# Check that CHECKSUMS.txt in destinationPath has one or more of the files
a <- fread(checkSumFilePath)
common <- checkSums[checkSums$expectedFile %in% a$file]
missingFiles <- common[!a, on = c("expectedFile" = "file", "checksum.x" = "checksum")]
if (NROW(missingFiles)) {
messagePreProcess("... linking to getOption('reproducible.inputPaths')...",
verbose = verbose)
outHLC <- hardLinkOrCopy(from, to, verbose = verbose)
} else {
messagePreProcess("Skipping copy from inputPaths; all files present", verbose = verbose)
}
}
}
}
}
# if it was a nested file
if (any(fileExt(neededFiles) %in% c("zip", "tar", "rar")) && !isTRUE(is.na(archive))) {
nestedArchives <- neededFiles[fileExt(neededFiles) %in% c("zip", "tar", "rar")]
nestedArchives <- makeAbsolute(nestedArchives[1], destinationPath)
messagePreProcess("There are still archives in the extracted files.",
" preProcess will try to extract the files from ", basename2(nestedArchives), ".",
" If this is incorrect, please supply archive.",
verbose = verbose
)
# Guess which files inside the new nested
nestedTargetFile <- .listFilesInArchive(archive = nestedArchives)
outFromSimilar <- .checkForSimilar(
alsoExtract = alsoExtract,
archive = nestedArchives,
neededFiles = nestedTargetFile,
destinationPath = destinationPath,
checkSums = checkSums,
checkSumFilePath = checkSumFilePath,
targetFile = targetFile, verbose = verboseCFS
)
neededFiles <- outFromSimilar$neededFiles
checkSums <- outFromSimilar$checkSums
# don't include targetFile in neededFiles -- extractFromArchive deals with it separately
if (length(neededFiles) > 1) alsoExtract <- setdiff(neededFiles, targetFile)
# To this point, we only have the archive in hand -- include this in the list of filesToChecksum
filesToChecksum <- if (is.null(archive)) downloadFileResult$downloaded else basename2(archive)
on.exit(
{
if (needChecksums > 0) {
# needChecksums 1 --> write a new checksums.txt file
# needChecksums 2 --> append to checksums.txt
appendChecksumsTable(
checkSumFilePath = checkSumFilePath,
filesToChecksum = filesToChecksum,
destinationPath = destinationPath,
append = (needChecksums == 2)
)
}
needChecksums <- 0
},
add = TRUE
)
extractedFiles <- .tryExtractFromArchive(
archive = nestedArchives, neededFiles = neededFiles,
alsoExtract = alsoExtract, destinationPath = destinationPath,
checkSums = checkSums, needChecksums = needChecksums,
checkSumFilePath = checkSumFilePath, filesToChecksum = filesToChecksum,
targetFilePath = targetFilePath, quick = quick,
verbose = verbose, .tempPath = .tempPath
)
filesExtr <- c(filesExtr, extractedFiles$filesExtr)
}
targetParams <- .guessAtTargetAndFun(targetFilePath, destinationPath,
filesExtracted = filesExtr,
fun, verbose = verbose
) # passes through if all known
targetFile <- makeRelative(targetParams$targetFilePath, destinationPath)
targetFilePath <- targetParams$targetFilePath
funChar <- targetParams$fun
## targetFilePath might still be NULL, need destinationPath too
if (is.null(targetFilePath)) {
if (is.null(filesExtr)) {
if (!is.null(downloadFileResult$downloaded)) {
targetFilePath <- downloadFileResult$downloaded
}
} else {
targetFilePath <- filesExtr
}
}
if (is.null(targetFile) && !is.null(targetFilePath)) {
targetFile <- makeRelative(targetFilePath, destinationPath)
}
## Convert the fun as character string to function class, if not already
fun <- .extractFunction(funChar)
if (needChecksums > 0) {
## needChecksums 1 --> write a new CHECKSUMS.txt file
## needChecksums 2 --> append to CHECKSUMS.txt file
## needChecksums 3 --> append to checkSumFilePath file OR successfulCheckSumFilePath, not both
if (needChecksums == 3) {
# successfulCheckSumFilePath we do not need to update. Determine which one this is, and do
# other
if (identical(checkSumFilePath, successfulCheckSumFilePath)) { # if it was in checkSumFilePath
checkSumFilePath <- identifyCHECKSUMStxtFile(successfulDir) # run Checksums in IP
}
}
csps <- destinationPath
if (!is.null(reproducible.inputPaths)) {
csps <- c(csps, reproducible.inputPaths)
}
for (csp in csps) {
checkSumFilePath <- identifyCHECKSUMStxtFile(csp)
checkSums <- appendChecksumsTable(
checkSumFilePath = checkSumFilePath,
filesToChecksum = basename2(unique(filesToChecksum)),
destinationPath = csp,
append = needChecksums >= 2
)
needChecksums <- 0
}
if (!is.null(reproducible.inputPaths) && needChecksums != 3) {
checkSumFilePathInputPaths <- identifyCHECKSUMStxtFile(reproducible.inputPaths[[1]])
#suppressMessages({
checkSums <- appendChecksumsTable(
checkSumFilePath = checkSumFilePathInputPaths,
filesToChecksum = unique(filesToChecksum),
destinationPath = destinationPath,
append = needChecksums == 2,
verbose = verbose - 1
)
#})
needChecksums <- 0
}
on.exit(
{
needChecksums <- 0
},
add = TRUE,
after = FALSE
) # effectively remove appendChecksums in other
# on.exit because it is done here
}
failStop <- FALSE
if (is.null(targetFilePath)) {
failStop <- TRUE
} else if (isTRUE(all(is.na(targetFilePath)))) { # this must come before next; but no need to change failStop
if (length(targetFilePath) > 1)
targetFilePath <- NA
# failStop <- FALSE
} else if (!isTRUE(file.exists(targetFilePath))) {
failStop <- TRUE
}
if (isTRUE(failStop)) {
stop(
"targetFile appears to be misspecified at: ", targetFilePath, ". ",
"Possibly, it does not exist in the specified archive, ",
"or the file doesn't exist in destinationPath"
)
}
archiveInChecksums <- checkSums$actualFile %in% makeRelative(archive, destinationPath) # basename2 is needed in checksums
if (any(archiveInChecksums)) {
checkSums[which(archiveInChecksums), result := "ArchiveOK"]
}
out <- list(
checkSums = checkSums,
dots = dots,
fun = fun,
funChar = funChar,
targetFilePath = targetFilePath,
destinationPath = destinationPath,
object = downloadFileResult$object
)
.message$IndentRevert()
stNext <- reportTime(st, mess = "`preProcess` done; took ", minSeconds = 10)
return(out)
}
#' Purge individual line items from checksums file
#'
#' @inheritParams downloadFile
#' @keywords internal
#' @rdname purge
.purge <- function(checkSums, purge, targetFile, archive, alsoExtract, url, destinationPath) {
purgeChar <- as.character(purge)
checkSums <- tryCatch(
switch(purgeChar,
"2" = checkSums[!(checkSums$expectedFile %in% makeRelative(targetFile, destinationPath)), ],
"3" = checkSums[!(checkSums$expectedFile %in% makeRelative(archive, destinationPath)), ],
"4" = checkSums[!(checkSums$expectedFile %in% makeRelative(alsoExtract, destinationPath)), ],
"5" = checkSums[!(checkSums$expectedFile %in% makeRelative(unique(c(targetFile, alsoExtract)), destinationPath)), ], # nolint
"6" = checkSums[!(checkSums$expectedFile %in%
makeRelative(unique(c(targetFile, alsoExtract, archive)), destinationPath)), ], # nolint
"7" = checkSums[!(checkSums$expectedFile %in%
makeRelative(unique(c(targetFile, alsoExtract, archive, url)), destinationPath)), ] # nolint
),
error = function(x) checkSums
)
checkSums
}
#' @keywords internal
.emptyChecksumsResult <- data.table::data.table(
expectedFile = character(),
actualFile = character(),
result = character()
)
#' @keywords internal
.emptyChecksumsFileContent <- data.frame(
file = character(),
checksum = character(),
filesize = character(),
algorithm = character()
)
#' @keywords internal
#' @importFrom utils getFromNamespace
.extractFunction <- function(fun, envir = parent.frame()) {
if (!is.null(fun)) {
if (is.name(fun)) {
possFun <- get0(fun, envir = envir)
if (is.null(possFun)) {
env <- whereInStack(fun)
fun <- get(fun, envir = env)
}
}
if (is.call(fun)) {
fun
} else {
suppressWarnings(isNAFun <- is.na(fun))
if (!any(isNAFun)) {
if (!is.function(fun)) {
if (any(grepl("::", fun))) {
fun2 <- strsplit(fun, "::")[[1]]
pkg <- fun2[1]
fun <- fun2[2]
fun <- getFromNamespace(fun, pkg)
} else {
fun <- get(fun, envir)
}
}
}
}
}
fun
}
#' @keywords internal
.guessAtFile <- function(url, archive, targetFile, destinationPath,
verbose = getOption("reproducible.verbose", 1), team_drive = NULL) {
# if (is.null(targetFile)) {
guessedFile <- if (!is.null(url)) {
gf <- file.path(destinationPath, basename2(url))
# Test for just Google ID supplied
isGID <- isGoogleID(url)
# isGID <- all(grepl("^[A-Za-z0-9_-]{33}$", url), # Has 33 characters as letters, numbers or - or _
# !grepl("\\.[^\\.]+$", url))
# doesn't have an extension
if (any(isGoogleDriveURL(url), isGID)) {
if (isGID) messagePreProcess("url seems to be a Google Drive ID", verbose = verbose, verboseLevel = 2)
# if (grepl("drive.google.com", url)) {
# ie <- isTRUE(internetExists())
# if (ie) {
gf <- assessGoogle(
url = url, archive = archive, targetFile = targetFile,
destinationPath = destinationPath, verbose = verbose, team_drive = NULL
)
gf <- makeAbsolute(gf, destinationPath)
# }
}
gf
} else {
NULL
}
normPath(guessedFile)
}
isGoogleID <- function(url) {
all(grepl("^[A-Za-z0-9_-]{33}$", url), # Has 33 characters as letters, numbers or - or _
!grepl("\\.[^\\.]+$", url)) ||
grepl("drive.google.com", url)
}
isGoogleDriveURL <- function(url) {
grepl("drive.google.com", url)
}
# COPIED FROM REQUIRE
# urlExists <- function(url) {
# con <- url(url)
# on.exit(try(close(con), silent = TRUE), add = TRUE)
# for (i in 1:5) {
# a <- try(suppressWarnings(readLines(con, n = 1)), silent = TRUE)
# try(close(con), silent = TRUE)
# ret <- if (is(a, "try-error")) FALSE else TRUE
# if (isTRUE(ret)) {
# break
# } else {
# Sys.sleep(0.1)
# }
# }
# ret
# }
#' @keywords internal
.checkSumsUpdate <- function(destinationPath, newFilesToCheck, checkSums,
checkSumFilePath = NULL, verbose = getOption("reproducible.verbose", 1)) {
if (!is.null(newFilesToCheck)) {
if (is.null(checkSumFilePath) || length(checkSumFilePath) == 0) {
checkSumFilePath <- identifyCHECKSUMStxtFile(destinationPath)
}
if (!file.exists(checkSumFilePath)) {
checkSums
} else {
checkSums2 <- try(Checksums(
path = destinationPath, write = FALSE,
files = newFilesToCheck,
checksumFile = checkSumFilePath,
verbose = verbose - 1
), silent = TRUE)
if (!is(checkSums2, "try-error")) {
checkSums <- rbindlist(list(checkSums, checkSums2))
data.table::setkey(checkSums, result)
checkSums <- unique(checkSums, fromLast = TRUE, by = "expectedFile")
checkSums <- rbindlist(list(
checkSums[compareNA("OK", result)],
checkSums[compareNA("FAIL", result)],
checkSums[is.na(result)]
))
} else {
stop("checkSumFilePath is not a CHECKSUMS.txt file")
}
}
}
checkSums
}
#' @keywords internal
.similarFilesInCheckSums <- function(file, checkSums, alsoExtract) {
if (NROW(checkSums)) {
if (!missing(alsoExtract)) {
file <- unique(c(file, alsoExtract))
}
anySimilarInCS <- vapply(filePathSansExt(file), function(fi) {
res <- checkSums[grepl(paste0(fi, "\\."), checkSums$expectedFile), ]$result
res <- na.omit(res)
if (length(res) == 0) res <- "NotOK"
isTRUE(all(compareNA("OK", res)))
}, FUN.VALUE = logical(1))
all(anySimilarInCS)
} else {
FALSE
}
}
#' @keywords internal
.checkForSimilar <- function(neededFiles, alsoExtract, archive, targetFile,
destinationPath, checkSums, checkSumFilePath,
url, verbose = getOption("reproducible.verbose", 1)) {
lookForSimilar <- FALSE
if (is.null(alsoExtract) || length(alsoExtract) == 0) {
messagePreProcess("alsoExtract is unspecified; assuming that all files must be extracted",
verbose = verbose)
lookForSimilar <- "all"
} else {
if (!all(is.na(alsoExtract))) {
if ("similar" %in% basename2(alsoExtract)) {
lookForSimilar <- TRUE
}
}
}
if (isTRUE(lookForSimilar) || ("all" %in% lookForSimilar && !is.null(archive))) {
allFiles <- .listFilesInArchive(archive)
archiveFilesInCS <- allFiles %in% checkSums$expectedFile
rerunChecksums <- TRUE
if (any(archiveFilesInCS)) {
if (all(archiveFilesInCS)) {
isOK <- checkSums[expectedFile %in% allFiles]$result %in% "OK"
if (all(isOK)) {
rerunChecksums <- FALSE
} else { # some not OK, but present
allFiles <- allFiles[!isOK]
}
} else { # some files in the archive are not yet in checkSums -- rerunChecksums on these
allFiles <- allFiles[!archiveFilesInCS]
}
}
if (rerunChecksums) {
neededFiles <- checkRelative(neededFiles, destinationPath, allFiles)
if (is.null(targetFile) || isTRUE(all(is.na(targetFile)))) {
messagePreProcess("No targetFile supplied. ", messageChecksummingAllFiles,
verbose = verbose)
neededFiles <- allFiles
} else if ("all" %in% lookForSimilar) {
messagePreProcess(messageChecksummingAllFiles, verbose = verbose)
neededFiles <- allFiles
} else {
allOK <- .similarFilesInCheckSums(targetFile, checkSums, alsoExtract)
if (!allOK) {
filePatternToKeep <- gsub(basename2(targetFile),
pattern = fileExt(basename2(targetFile)), replacement = ""
)
filesToGet <- grep(allFiles, pattern = filePatternToKeep, value = TRUE)
neededFiles <- c(neededFiles, filesToGet)
}
}
if (exists("filesToGet", inherits = FALSE)) {
if (length(filesToGet) == 0) {
rerunChecksums <- FALSE
}
}
neededFiles <- unique(makeAbsolute(neededFiles, destinationPath))
if (!is.null(neededFiles) && rerunChecksums) {
checkSums <- .checkSumsUpdate(
destinationPath = destinationPath, newFilesToCheck = neededFiles,
checkSums = checkSums,
checkSumFilePath = checkSumFilePath, verbose = verbose
)
}
}
}
list(neededFiles = neededFiles, checkSums = checkSums)
}
#' @keywords internal
.checkLocalSources <- function(neededFiles, checkSumFilePath, checkSums, otherPaths, needChecksums,
destinationPath, verbose = getOption("reproducible.verbose", 1)) {
foundInInputPaths <- character()
successfulCheckSumFilePath <- character()
successfulDir <- character()
if (!is.null(neededFiles)) {
filesInHand <- checkSums[compareNA(checkSums$result, "OK"), ]$expectedFile
neededFilesRel <- makeRelative(neededFiles, destinationPath)
if (!all(neededFilesRel %in% filesInHand)) {
for (op in otherPaths) {
recursively <- if (!is.null(getOption("reproducible.inputPathsRecursive"))) {
getOption("reproducible.inputPathsRecursive")
} else {
FALSE
}
opFiles <- dir(op, recursive = recursively, full.names = TRUE)
if (any(neededFilesRel %in% basename2(opFiles))) {
isNeeded <- basename2(opFiles) %in% neededFilesRel
dirNameOPFiles <- dirname(opFiles[isNeeded])
# foundRecursively <- dirNameOPFiles != dirname(opFiles[isNeeded])
# foundRecursively <- basename2(opFiles[isNeeded][foundRecursively])
uniqueDirsOPFiles <- rev(unique(dirNameOPFiles))
checkSumFilePathTry <- checkSumFilePath
# check CHECKSUMS.txt files, first the one in destinationPath, then ones in inputPaths
for (dirOPFiles in uniqueDirsOPFiles) {
# for (i in seq(1 + length(uniqueDirsOPFiles))) {
checkSumFilePathTry <- identifyCHECKSUMStxtFile(dirOPFiles)
checkSumsInputPath <- suppressMessages(
Checksums(
path = dirOPFiles, write = FALSE,
files = neededFilesRel,
checksumFile = checkSumFilePathTry,
verbose = verbose
)
)
isOK <- checkSumsInputPath[checkSumsInputPath$expectedFile %in% neededFilesRel, ]$result
if (length(isOK)) {
if (all(compareNA(isOK, "OK"))) {
needChecksums <- 3 # Basically this means that we *may* have to update
# checksums file in either destinationPath or
# options("reproducible.inputPaths")
successfulCheckSumFilePath <- checkSumFilePathTry
successfulDir <- unique(dirOPFiles)
break
}
}
checkSumFilePathTry <- identifyCHECKSUMStxtFile(dirOPFiles)
}
checkSumsIPOnlyNeeded <- checkSumsInputPath[compareNA(checkSumsInputPath$result, "OK"), ]
filesInHandIP <- checkSumsIPOnlyNeeded$expectedFile
filesInHandIPLogical <- neededFilesRel %in% filesInHandIP
if (any(filesInHandIPLogical)) {
outHLC <- hardLinkOrCopy(
from = makeAbsolute(filesInHandIP, dirOPFiles),
to = neededFiles[filesInHandIPLogical], verbose = verbose
)
checkSums <- rbindlist(list(checkSumsIPOnlyNeeded, checkSums))
checkSums <- unique(checkSums, by = "expectedFile")
}
foundInInputPaths <- c(foundInInputPaths, filesInHandIP)
if (isTRUE(all(filesInHandIPLogical))) {
break
}
}
}
}
# do a check here that destinationPath is already the inputPaths
# need to emulate the above behaviour
reproducible.inputPaths <- getOption("reproducible.inputPaths", NULL)
if (!is.null(reproducible.inputPaths)) {
reproducible.inputPaths <- normPath(reproducible.inputPaths)
}
if (identical(destinationPath, reproducible.inputPaths)) {
foundInInputPaths <- filesInHand
successfulDir <- destinationPath
successfulCheckSumFilePath <- checkSumFilePath
}
}
list(
checkSums = checkSums, needChecksums = needChecksums,
# foundRecursively = foundRecursively,
neededFiles = neededFiles,
successfulCheckSumFilePath = successfulCheckSumFilePath,
successfulDir = successfulDir,
foundInInputPaths = makeAbsolute(foundInInputPaths, destinationPath)
)
}
#' Hardlink, symlink, or copy a file
#'
#' Attempt first to make a hardlink. If that fails, try to make
#' a symlink (on non-windows systems and `symlink = TRUE`).
#' If that fails, copy the file.
#'
#' @note Use caution with files-backed objects (e.g., rasters). See examples.
#'
#' @param from,to Character vectors, containing file names or paths.
#' `to` can alternatively be the path to a single existing directory.
#' @param symlink Logical indicating whether to use symlink (instead of hardlink).
#' Default `FALSE`.
#' @inheritParams prepInputs
#' @seealso [file.link()], [file.symlink()], [file.copy()].
#'
#' @author Alex Chubaty and Eliot McIntire
#' @export
#' @return
#' This function is called for its side effects, which will be a `file.link` is that
#' is available or `file.copy` if not (e.g., the two directories are not on the
#' same physical disk).
#'
#' @examples
#'
#' tmpDir <- file.path(tempdir(), "symlink-test")
#' tmpDir <- normalizePath(tmpDir, winslash = "/", mustWork = FALSE)
#' dir.create(tmpDir)
#'
#' f0 <- file.path(tmpDir, "file0.csv")
#' write.csv(iris, f0)
#'
#' d1 <- file.path(tmpDir, "dir1")
#' dir.create(d1)
#' write.csv(iris, file.path(d1, "file1.csv"))
#'
#' d2 <- file.path(tmpDir, "dir2")
#' dir.create(d2)
#' f2 <- file.path(tmpDir, "file2.csv")
#'
#' ## create link to a file
#' linkOrCopy(f0, f2)
#' file.exists(f2) ## TRUE
#' identical(read.table(f0), read.table(f2)) ## TRUE
#'
#' ## deleting the link shouldn't delete the original file
#' unlink(f0)
#' file.exists(f0) ## FALSE
#' file.exists(f2) ## TRUE
#'
#' if (requireNamespace("terra", quietly = TRUE)) {
#' ## using spatRasters and other file-backed objects
#' f3a <- system.file("ex/test.grd", package = "terra")
#' f3b <- system.file("ex/test.gri", package = "terra")
#' r3a <- terra::rast(f3a)
#' f4a <- file.path(tmpDir, "raster4.grd")
#' f4b <- file.path(tmpDir, "raster4.gri")
#' linkOrCopy(f3a, f4a) ## hardlink
#' linkOrCopy(f3b, f4b) ## hardlink
#' r4a <- terra::rast(f4a)
#'
#' isTRUE(all.equal(r3a, r4a)) # TRUE
#'
#' ## cleanup
#' unlink(tmpDir, recursive = TRUE)
#' }
linkOrCopy <- function(from, to, symlink = TRUE, overwrite = TRUE,
verbose = getOption("reproducible.verbose", 1)) {
existsLogical <- file.exists(from)
existsTo <- file.exists(to)
toCollapsed <- paste(to, collapse = "\n")
fromCollapsed <- paste(from, collapse = "\n")
result <- TRUE
## if the filename is the same, you can't copy from self to self
if (!all(normPath(to) %in% normPath(from))) {
if (any(existsLogical)) {
toDirs1 <- unique(dirname(to))
dirDoesntExist1 <- !dir.exists(toDirs1)
## some directories in `from` won't look like directories in the prev `to`
## e.g., test/folder1/folder2
## folder2 could be a file or a dir, and "dir.exists(to)" won't know which
## because test/folder1 didn't exist
## So, identify the dirs in the `from`, and those ones will also be dirs in `to`
fromDirs <- dir.exists(from)
toDirs2 <- to[fromDirs]
dirDoesntExist2 <- rep(TRUE, length(toDirs2))
if (any(dirDoesntExist1) || any(dirDoesntExist2)) {
needCreate <- unique(c(toDirs1[dirDoesntExist1], toDirs2[dirDoesntExist2]))
if (any(is.na(needCreate))) {
needCreate <- na.omit(needCreate)
}
lapply(needCreate, dir.create, recursive = TRUE)
}
isDir <- dir.exists(to)
dups <- duplicated(from)
if (any(existsTo)) {
unlink(to[existsTo])
}
# Try hard link first -- the only type that R deeply recognizes
result <- captureWarningsToAttr(
file.link(from[!dups & !isDir], to[!dups & !isDir])
)
warns <- attr(result, "warning")
attr(result, "warning") <- NULL
if (isTRUE(all(result))) {
messagePreProcess("Hardlinked ", hardlinkOrSymlinkMessagePrefix, ":", verbose = verbose)
messagePreProcess("\n", toCollapsed, "\n",
whPointsToMess, "\n",
fromCollapsed,
verbose = verbose - 1
)
messagePreProcess(messageNoCopyMade, verbose = verbose)
} else {
if (grepl("cannot link.+different disk drive", warns) && !isTRUE(symlink)) {
messageColoured("An attempt was made to use hard links to make a quick pointer ",
"from one (set of) file(s) to another; \nthis is not possible because ",
"the files would be on different drives. Consider changing the paths\n",
"so that they will be on the same physical drive", colour = "red")
message(warns)
}
}
if (any(grepl("file already exists", warns))) {
messagePreProcess("File named ", toCollapsed, " already exists; will try to use it/them", verbose = verbose)
result <- TRUE
}
# On *nix types -- try symlink
if (isFALSE(all(result)) && isTRUE(symlink)) {
if (!isWindows()) {
result <- suppressWarnings(file.symlink(from[!result], to[!result]))
if (isTRUE(all(result))) {
messagePreProcess("Symlinked", hardlinkOrSymlinkMessagePrefix, verbose = verbose)
messagePreProcess("\n", toCollapsed, "\n",
whPointsToMess, "\n",
fromCollapsed,
verbose = verbose - 1
)
messagePreProcess(messageNoCopyMade, verbose = verbose)
}
}
}
if (isFALSE(all(result))) {
len <- length(from[!result])
if (len < 50) fromCollapsed[!result] else c(head(fromCollapsed[!result]), tail(fromCollapsed[!result]))
if (len < 50) {
fromMess <- fromCollapsed[!result]
toMess <- toCollapsed[!result]
} else {
fromMess <- c(head(fromCollapsed[!result]), tail(fromCollapsed[!result]))
toMess <- c(head(toCollapsed[!result], 24), "... (omitting many)", tail(toCollapsed[!result], 24))
}
result2 <- file.copy(from[!result], to[!result], overwrite = overwrite)
messagePreProcess("Copy of file: ", fromMess, ", was created at: ", toMess, verbose = verbose)
}
} else {
messagePreProcess("File ", fromCollapsed, " does not exist. Not copying.", verbose = verbose)
result <- FALSE
}
}
return(result)
}
#' @keywords internal
.tryExtractFromArchive <- function(archive,
neededFiles,
filesToChecksum,
alsoExtract,
destinationPath,
checkSums,
needChecksums,
checkSumFilePath,
targetFilePath,
quick, verbose = getOption("reproducible.verbose", 1),
.tempPath) {
if (missing(.tempPath)) {
.tempPath <- tempdir2(rndstr(1, 6))
on.exit(
{
unlink(.tempPath, recursive = TRUE)
},
add = TRUE
)
}
alsoExtract <- grep("none$", alsoExtract, value = TRUE, invert = TRUE) # remove "none" from neededFiles; for extracting
neededFiles <- c(neededFiles, if (!isNULLorNA(alsoExtract)) alsoExtract)
neededFiles <- setdiff(neededFiles, "similar") # remove "similar" from neededFiles; for extracting
neededFiles <- unique(makeAbsolute(neededFiles, destinationPath)) # unique is b/c neededFiles was absolute and alsoExtract was rel
alsoExtract <- makeAbsolute(alsoExtract, destinationPath)
filesExtr <- NULL
if (!isNULLorNA(archive)) {
# if (!is.na(archive)) {
if (any(file.exists(archive))) {
filesExtracted <- extractFromArchive(
archive = archive,
destinationPath = destinationPath,
neededFiles = neededFiles,
checkSums = checkSums,
needChecksums = needChecksums,
checkSumFilePath = checkSumFilePath,
quick = quick,
verbose = verbose,
.tempPath = .tempPath
)
neededFiles <- filesExtracted$neededFiles # will have been potentially corrected if user supplied incorrect relative paths
targetFilePath <- checkRelative(targetFilePath, destinationPath, neededFiles, verbose = verbose - 1)
filesToChecksum <- checkRelative(filesToChecksum, destinationPath, neededFiles, verbose = verbose - 1)
checkSums <- .checkSumsUpdate(
destinationPath = destinationPath,
newFilesToCheck = filesExtracted$filesExtracted,
checkSums = filesExtracted$checkSums, verbose = verbose
)
# filesToChecksum may be wrong because of relative path without subfolder
filesToChecksum <- unique(c(
filesToChecksum, targetFilePath, # alsoExtract, # alsoExtract will be in filesExtracted$filesExtracted
filesExtracted$filesExtracted
))
needChecksums <- filesExtracted$needChecksums
data.table::setDT(filesExtracted$checkSums)
dontNeedChecksums <- if (NROW(filesExtracted$checkSums) > 0) {
filesExtracted$checkSums[
filesExtracted$checkSums$expectedFile %in%
filesToChecksum & compareNA(result, "OK"),
expectedFile
]
} else {
dontNeedChecksums <- character()
}
filesToChecksum <- setdiff(filesToChecksum, dontNeedChecksums)
if (needChecksums > 0) {
checkSums <- appendChecksumsTable(
checkSumFilePath = checkSumFilePath,
filesToChecksum = unique(filesToChecksum),
destinationPath = destinationPath,
append = needChecksums >= 2
)
needChecksums <- 0
}
## targetFilePath might still be NULL, need destinationPath too
filesExtr <- unique(c(
filesToChecksum,
if (is.null(filesExtracted$filesExtracted) ||
length(filesExtracted$filesExtracted) == 0) {
character()
} else {
filesExtracted$filesExtracted
}
))
}
# }
}
if (!is.null(filesExtr)) {
filesExtr <- unique(filesExtr)
}
list(
filesToChecksum = filesToChecksum, filesExtr = filesExtr,
needChecksums = needChecksums,
targetFilePath = targetFilePath,
neededFiles = neededFiles, checkSums = checkSums
)
}
#' @keywords internal
.decodeMagicNumber <- function(magicNumberString) {
fileExt <- if (any(grepl(pattern = "Zip", x = magicNumberString))) {
".zip"
} else if (any(grepl(pattern = "RAR", x = magicNumberString))) {
".rar"
} else if (any(grepl(pattern = "tar", x = magicNumberString))) {
".tar"
} else if (any(grepl(pattern = "TIFF", x = magicNumberString))) {
".tif"
} else if (any(grepl(pattern = "Shapefile", x = magicNumberString))) {
".shp"
} else {
NULL
}
return(fileExt)
}
#' @keywords internal
.guessFileExtension <- function(file) {
if (isWindows()) {
tryCatch(
{
possLocs <- c(
"C:/cygwin/bin/file.exe",
"C:\\cygwin64/bin/file.exe"
)
findFile <- file.exists(possLocs)
if (any(findFile)) {
fileLoc <- possLocs[findFile][1]
}
magicNumber <- captureWarningsToAttr(
system(paste(fileLoc, file), intern = TRUE)
)
warn <- attr(magicNumber, "warning")
attr(magicNumber, "warning") <- NULL
if (length(warn) > 0) {
splitted <- unlist(strsplit(x = file, split = ":/"))
fileAdapted <- file.path(paste0("/mnt/", tolower(splitted[1])), splitted[2])
magicNumber <- captureWarningsToAttr(
shell(paste0("'file ", fileAdapted, "'"), "bash",
intern = TRUE,
wait = TRUE, translate = FALSE, mustWork = TRUE
)
)
warn <- attr(magicNumber, "warning")
attr(magicNumber, "warning") <- NULL
}
fileExt <- if (length(warn) == 0) {
.decodeMagicNumber(magicNumberString = magicNumber)
} else {
NULL
}
return(fileExt)
},
error = function(e) {
fileExt <- NULL
return(fileExt)
}
)
} else {
magicNumber <- system(paste0("file ", file), wait = TRUE, intern = TRUE)
fileExt <- .decodeMagicNumber(magicNumberString = magicNumber)
return(fileExt)
}
}
#' @keywords internal
.fixNoFileExtension <- function(downloadFileResult, targetFile, archive,
destinationPath, verbose = getOption("reproducible.verbose", 1)) {
needFinalCopy <- TRUE
if (!is.null(downloadFileResult$downloaded) &&
identical(fileExt(downloadFileResult$downloaded), "")) {
if (!is.null(targetFile) && !identical(fileExt(normPath(basename2(downloadFileResult$neededFiles))), "")) {
if (is.null(archive)) {
messagePreProcess(
"Downloaded file has no extension: targetFile is provided, but archive is not.\n",
" Downloaded file will be considered as the targetFile. If the downloaded file is an archive\n",
" that contains the targetFile, please specify both archive and targetFile.",
verbose = verbose
)
newFileWithExtension <- downloadFileResult$neededFiles
} else {
messagePreProcess(
"Downloaded file has no extension: both targetFile and archive are provided.\n",
" Downloaded file will be considered as the archive.",
verbose = verbose
)
newFileWithExtension <- downloadFileResult$archive
}
} else {
if (!is.null(archive)) {
messagePreProcess(
"Downloaded file has no extension: archive is provided. \n",
" downloaded file will be considered as the archive.",
verbose = verbose
)
downloadFileResult$neededFiles <- archive
newFileWithExtension <- downloadFileResult$neededFiles
} else {
messagePreProcess(
"Downloaded file has no extension: neither archive nor targetFile are provided. \n",
"prepInputs will try accessing the file type.",
verbose = verbose
)
fileExt <- .guessFileExtension(file = normPath(downloadFileResult$downloaded))
if (is.null(fileExt)) {
messagePreProcess("The file was not recognized by prepInputs. ",
"Will assume the file is an archive and add '.zip' extension. ",
"If this is incorrect or return error, please supply archive or targetFile",
verbose = verbose
)
fileExt <- ".zip"
}
newFileWithExtension <- paste0(downloadFileResult$neededFiles, fileExt)
hardLinkOrCopy(downloadFileResult$neededFiles, newFileWithExtension, verbose = 0)
needFinalCopy <- FALSE
downloadFileResult$neededFiles <- makeAbsolute(.listFilesInArchive(newFileWithExtension), destinationPath)
downloadFileResult$archive <- newFileWithExtension
downloadFileResult$targetFilePath <- normPath(downloadFileResult$neededFiles)
}
}
if (isTRUE(needFinalCopy)) {
hardLinkOrCopy(
verbose = 0,
from = normPath(downloadFileResult$downloaded),
to = normPath(newFileWithExtension)
)
}
downloadFileResult$downloaded <- newFileWithExtension
}
downloadFileResult
}
moveAttributes <- function(source, receiving, attrs = NULL) {
if (!is.null(receiving)) {
sourceAttributes <- attributes(source)
if (length(sourceAttributes) > 0) {
if (!is.null(attrs)) {
sourceAttributes <- attrs
}
for (i in length(sourceAttributes)) {
attr(receiving, names(sourceAttributes)[i]) <- sourceAttributes[[i]]
}
}
}
receiving
}
.checkDeprecated <- function(dots, verbose = getOption("reproducible.verbose", 1)) {
if (!is.null(dots$cacheTags)) {
messagePreProcess("cacheTags is being deprecated;",
" use userTags which will pass directly to Cache.",
verbose = verbose
)
dots$userTags <- dots$cacheTags
dots$cacheTags <- NULL
}
if (!is.null(dots$postProcessedFilename)) {
messagePreProcess("postProcessedFilename is being deprecated;",
" use filename2, used in determineFilename.",
verbose = verbose
)
dots$filename2 <- dots$postProcessedFilename
dots$postProcessedFilename <- NULL
}
if (!is.null(dots$writeCropped)) {
messagePreProcess("writeCropped is being deprecated;",
" use filename2, used in determineFilename.",
verbose = verbose
)
dots$filename2 <- dots$writeCropped
dots$writeCropped <- NULL
}
if (!is.null(dots$rasterInterpMethod)) {
messagePreProcess("rasterInterpMethod is being deprecated;",
" use method which will pass directly to projectRaster.",
verbose = verbose
)
dots$method <- dots$rasterInterpMethod
dots$rasterInterpMethod <- NULL
}
if (!is.null(dots$rasterDatatype)) {
messagePreProcess("rasterDatatype is being deprecated;",
" use datatype which will pass directly to writeRaster.",
verbose = verbose
)
dots$datatype <- dots$rasterDatatype
dots$rasterDatatype <- NULL
}
if (!is.null(dots$pkg)) {
messagePreProcess("pkg is being deprecated;",
"name the package and function directly, if needed,\n",
" e.g., 'pkg::fun'.",
verbose = verbose
)
dots$pkg <- NULL
}
dots
}
.checkFunInDots <- function(fun = NULL, dots) {
if (is.null(fun)) {
if (!is.null(dots$pkg)) {
fun <- paste0(dots$pkg, "::", fun)
}
}
fun
}
hardLinkOrCopy <- function(from, to, overwrite = FALSE, verbose = TRUE) {
linkOrCopy(from, to, symlink = FALSE, verbose = verbose)
}
escapeRegexChars <- function(str, repl = c("(", ")")) {
for (r in repl) {
str <- gsub(paste0("\\", r, ""), paste0("\\\\", r), str)
}
str
}
hardlinkOrSymlinkMessagePrefix <- "version of file(s) created"
hardlinkOrSymlinkMessagePrefixForGrep <- escapeRegexChars(hardlinkOrSymlinkMessagePrefix)
messageNoCopyMade <- "... no copy/copies made."
whPointsToMess <- "which point(s) to"
whPointsToMessForGrep <- escapeRegexChars(whPointsToMess)
getTeamDrive <- function(dots) {
if (requireNamespace("googledrive", quietly = TRUE)) {
teamDrive <- if (packageVersion("googledrive") < "2.0.0") {
dots[["team_drive"]]
} else {
dots[["shared_drive"]]
}
} else {
teamDrive <- NULL
}
}
getTargetFilePath <- function(targetFile, archive, fileGuess, verbose,
destinationPath, alsoExtract, checkSumFilePath) {
if (is.null(targetFile)) {
if ((is.null(archive) || is.na(archive)) && !is.null(fileGuess)) {
messagePreProcess("targetFile was not supplied; guessed and will try ", fileGuess,
". If this is incorrect, please supply targetFile",
verbose = verbose
)
targetFile <- makeRelative(fileGuess, destinationPath)
targetFilePath <- makeAbsolute(targetFile, destinationPath)
} else {
# Case when archive is passed, and fileGuess exists
# if ((!is.null(archive) || !is.na(archive)) && !is.null(fileGuess)) {
# messagePreProcess("archieve was supplied, but targetFile not; guessed and will try ", fileGuess,
# ". If this is incorrect, please supply targetFile",
# verbose = verbose
# )
# targetFile <- makeRelative(fileGuess, destinationPath)
# targetFilePath <- makeAbsolute(targetFile, destinationPath)
# } else {
targetFilePath <- NULL
# }
}
} else {
if (length(targetFile) > 1) {
stop("targetFile should be only 1 file")
}
targetFilePath <- normPath(makeAbsolute(targetFile, destinationPath))
}
targetFilePath
}
guessAlsoExtract <- function(targetFile, alsoExtract, checkSumFilePath) {
if (is.null(alsoExtract)) {
if (file.exists(checkSumFilePath)) {
if (file.size(checkSumFilePath) > 0) {
# if alsoExtract is not specified, then try to find all files in CHECKSUMS.txt with
# same base name, without extension
checksumsTmp <- as.data.table(read.table(checkSumFilePath))
alsoExtract <- grep(paste0(filePathSansExt(targetFile), "\\."), checksumsTmp$file,
value = TRUE
)
rm(checksumsTmp) # clean up
}
}
}
if (any(is.na(alsoExtract))) {
alsoExtract <- NA
} else if (!is.null(alsoExtract)) { # must keep relative because user may not know what path is in archive
if (isTRUE(all(is.na(alsoExtract)))) {
alsoExtract <- character()
}
}
alsoExtract
}
updateArchiveWithGuess <- function(archive, guess) {
if (!is.null(guess)) {
if (is.null(archive)) {
archive <- .isArchive(guess)
}
if (isTRUE(!is.na(archive))) {
archive <- moveAttributes(guess, archive)
}
}
archive
}
setupArchive <- function(archive, destinationPath) {
if (!is.null(archive)) {
if (!is.na(archive)) {
tmpArchive <- archive
archive <- makeAbsolute(archive, destinationPath)
# archive <- file.path(destinationPath, basename2(archive))
# filesToCheck <- unique(c(filesToCheck, archive))
archive <- moveAttributes(tmpArchive, archive)
}
}
archive
}
runChecksums <- function(destinationPath, checkSumFilePath, filesToCheck, verbose) {
reproducible.inputPaths <- getOption("reproducible.inputPaths", NULL)
if (!is.null(reproducible.inputPaths)) {
reproducible.inputPaths <- checkPath(reproducible.inputPaths, create = TRUE)
}
if (!is.null(reproducible.inputPaths)) {
reproducible.inputPaths <- path.expand(reproducible.inputPaths)
}
destinationPathUser <- NULL
possDirs <- unique(c(destinationPath, reproducible.inputPaths))
csfps <- vapply(possDirs, function(dp) identifyCHECKSUMStxtFile(dp), character(1))
allDone <- FALSE
for (dp in possDirs) {
for (csfp in csfps) { # there can be a mismatch between checksums and file location
# csfp <- identifyCHECKSUMStxtFile(dp)
checkSumsTmp1 <- try(Checksums(
path = dp, write = FALSE, checksumFile = csfp,
files = makeRelative(filesToCheck, absoluteBase = destinationPath),
verbose = verbose - 1
), silent = TRUE)
checkSums <- NULL
if (!is(checkSumsTmp1, "try-error")) {
checkSums <- checkSumsTmp1
if (!all(is.na(checkSums$result))) { # found something
if (isTRUE(any(dp %in% reproducible.inputPaths))) {
destinationPathUser <- destinationPath
destinationPath <- dp
on.exit(
{
destinationPath <- destinationPathUser
},
add = TRUE
)
}
allDone <- TRUE
break
}
}
}
if (isTRUE(allDone))
break
}
list(
reproducible.inputPaths = reproducible.inputPaths,
destinationPathUser = destinationPathUser,
destinationPath = destinationPath,
checkSums = checkSums
)
}
dealWithArchive <- function(archive, url, targetFile, checkSums, alsoExtract, destinationPath, teamDrive, verbose) {
fileGuess <- NULL
if (is.null(archive)) {
if (!is.null(url)) {
allOK <- .similarFilesInCheckSums(targetFile, checkSums, alsoExtract)
if (!allOK) { # skip identification of archive if we have all files with same basename as targetFile
# BUT if we don't have all files with identical root name (basename sans ext), then assess for
# an archive, either remotely, in the case of google or from the basename of url
fileGuess <- .guessAtFile(
url = url, archive = archive,
targetFile = targetFile, destinationPath = destinationPath,
verbose = verbose, team_drive = teamDrive
)
archive <- .isArchive(fileGuess)
# The fileGuess MAY have a fileSize attribute, which can be attached to "archive"
archive <- moveAttributes(fileGuess, receiving = archive)
sourceAttributes <- attributes(fileGuess)
if (length(sourceAttributes) > 0 && !is.null(archive)) {
for (i in length(sourceAttributes)) {
attr(archive, names(sourceAttributes)[i]) <- sourceAttributes[[i]]
}
}
checkSums <- .checkSumsUpdate(
destinationPath = destinationPath,
newFilesToCheck = archive,
checkSums = checkSums,
verbose = verbose
)
}
}
}
list(
checkSums = checkSums,
archive = archive,
fileGuess = fileGuess
)
}
isNULLorNA <- function(x) {
out <- TRUE
if (!is.null(x)) {
if (!isTRUE(is.na(x))) {
out <- FALSE
}
}
out
}
identifyCHECKSUMStxtFile <- function(path) {
file.path(path, "CHECKSUMS.txt")
}
linkOrCopyUpdateOnly <- function(from, to, verbose) {
fifrom <- file.info(from)
fito <- file.info(to)
whNotSame <- fifrom$ctime != fito$ctime
needCopy <- !whNotSame %in% FALSE
if (any(needCopy)) {
linkOrCopy(from[needCopy], to[needCopy],
verbose = verbose - 1
)
}
}
messageChecksummingAllFiles <- "Checksumming all files in archive"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.