#' @import methods
#' @import httr
#' @import rappdirs
#' @importFrom utils tar zip untar unzip
#' @importFrom dplyr mutate
.BiocFileCacheBase = setClass(
"BiocFileCacheBase",
slots=c(cache="character")
)
.BiocFileCacheReadOnly = setClass(
"BiocFileCacheReadOnly",
contains="BiocFileCacheBase",
slots=c(rid="character")
)
.BiocFileCache = setClass(
"BiocFileCache",
contains="BiocFileCacheBase"
)
#' BiocFileCache class
#'
#' This class represents the location of files stored on disk. Use the
#' return value to add and retrieve files that persist across
#' sessions.
#'
#' @details The package defines 'BiocFileCache', 'BiocFileCacheBase' and
#' 'BiocFileCacheReadOnly' classes.
#'
#' Slots unique to 'BiocFileCache' and related classes:
#' \itemize{
#' \item{'cache': }{character(1) on-disk location (directory path) of the
#' cache}
#' \item{'rid': }{character() of unique rids in the cache. }
#' }
#'
#' The cache creates an RSQLite database to keep track of local and remote
#' resources. Each item located in the database will have the following
#' information:
#' \itemize{
#' \item{'rid': }{resource id. Autogenerated. This is a unique identifier
#' automatically generated when a resource is added to the cache}
#' \item{'rname': }{resource name. This is given by the user when a
#' resource is added to the cache. It does not have to be unique
#' and can be updated at anytime. We recommend descriptive key
#' words and identifers.}
#' \item{'create_time': }{The date and time a resource is added to the cache.}
#' \item{'access_time': }{The date and time a resource is utilized
#' within the cache. The access time is updated when the resource
#' is updated or accessed}
#' \item{'rpath': }{resource path. This is the path to the local
#' (on-disk) file}
#' \item{'rtype': }{resource type. Either "relative", "local", or
#' "web", indicating if the resource has a remote origin}
#' \item{'fpath': }{If rtype is "web", this is the link to the
#' remote resource. It will be utilized to download or update the
#' remote data}
#' \item{'last_modified_time': }{For a remote resource, the
#' last_modified (if available) information for the local copy of
#' the data. This information is checked against the remote
#' resource to determine if the local copy is stale and needs to
#' be updated}
#' }
#'
#' All functions have a quick implementation where if the BiocFileCache object
#' is not passed as an argument, the function uses default 'BiocFileCache()' for
#' implementation. e.g 'bfcinfo()' can be used instead of
#' 'bfcinfo(BiocFileCache())'. The only function this is not available for is
#' 'bfcmeta()<-'; The BiocFileCache object must be defined as a varaible and
#' passed as an argument. See vignette("BiocFileCache") for more details.
#'
#' @param cache character(1) On-disk location (directory path) of
#' cache. For default location see
#' \code{\link[rappdirs]{user_cache_dir}}.
#' @param ask logical(1) Ask before creating, updating, overwriting,
#' or removing cache or local file locations.
#' @return For 'BiocFileCache': a \code{BiocFileCache} instance.
#' @examples
#' # bfc <- BiocFileCache() # global cache
#' # bfc
#' bfc0 <- BiocFileCache(tempfile()) # temporary catch for examples
#' @name BiocFileCache-class
#' @aliases BiocFileCache
#' @export BiocFileCache
BiocFileCache <-
function(cache=user_cache_dir(appname="BiocFileCache"), ask = TRUE)
{
stopifnot(
is.character(cache), length(cache) == 1L, !is.na(cache),
is.logical(ask), length(ask) == 1L, !is.na(ask)
)
if (!file.exists(cache)) {
ans <- !ask
if (ask && !.biocfilecache_flags$get_create_asked()) {
ans <- .util_ask(cache, "\n does not exist, create directory?")
.biocfilecache_flags$set_create_asked()
}
if (ans) {
dir.create(cache, recursive=TRUE)
} else {
cache <- file.path(tempdir(), "BiocFileCache")
if (!file.exists(cache)) {
message("using temporary cache ", cache)
dir.create(cache, recursive=TRUE)
}
}
}
bfc <- .BiocFileCache(cache=cache)
.sql_create_db(bfc)
bfc
}
#' @export
setGeneric("bfccache",
function(x) standardGeneric("bfccache")
)
#' @describeIn BiocFileCache Get the location of the on-disk cache.
#' @param x A \code{BiocFileCache} instance or, if missing, the result
#' of \code{BiocFileCache()}.
#' @return For 'bfccache': character(1) location of the directory
#' containing the cache.
#' @examples
#' bfccache(bfc0)
#' @aliases bfccache
#' @exportMethod bfccache
setMethod("bfccache", "BiocFileCacheBase", function(x) x@cache)
#' @rdname BiocFileCache-class
#' @aliases bfccache,missing-method
#' @exportMethod bfccache
setMethod("bfccache", "missing", function(x) bfccache(BiocFileCache()))
#' @describeIn BiocFileCache Get the number of objects in the file
#' cache.
#' @return For 'length': integer(1) Number of objects in the file
#' cache.
#' @examples
#' length(bfc0)
#' @importFrom stats setNames
#' @exportMethod length
setMethod("length", "BiocFileCacheBase", function(x) length(bfcrid(x)))
#' @rdname BiocFileCache-class
#' @aliases bfcrid
#' @export
setGeneric("bfcrid", function(x) standardGeneric("bfcrid"))
#' @rdname BiocFileCache-class
#' @aliases bfcrid,missing-method
#' @exportMethod bfcrid
setMethod("bfcrid", "missing", function(x) bfcrid(BiocFileCache()))
#' @describeIn BiocFileCache Get the rids of the object.
#' @aliases bfcrid,BiocFileCacheReadOnly-method
#' @exportMethod bfcrid
setMethod("bfcrid", "BiocFileCacheReadOnly", function(x) x@rid)
#' @rdname BiocFileCache-class
#' @aliases bfcrid,BiocFileCache-method
#' @exportMethod bfcrid
setMethod("bfcrid", "BiocFileCache", function(x) .get_all_rids(x))
#' @describeIn BiocFileCache Subset a BiocFileCache object.
#' @param drop Ignored.
#' @return For '[': A subset of the BiocFileCache object.
#' @exportMethod [
setMethod("[", c("BiocFileCache", "character", "missing"),
function(x, i, j, ..., drop=TRUE)
{
stopifnot(all(i %in% bfcrid(x)))
stopifnot(identical(unname(drop), TRUE))
.BiocFileCacheReadOnly(x, rid=as.character(i))
})
#' @rdname BiocFileCache-class
#' @aliases [,BiocFileCacheReadOnly,character,missing-method
#' @exportMethod [
setMethod("[", c("BiocFileCacheReadOnly", "character", "missing"),
function(x, i, j, ..., drop=TRUE)
{
stopifnot(all(i %in% bfcrid(x)))
stopifnot(identical(unname(drop), TRUE))
initialize(x, rid=as.character(i))
})
#' @rdname BiocFileCache-class
#' @aliases [,BiocFileCache,missing,missing-method
#' @exportMethod [
setMethod("[", c("BiocFileCache", "missing", "missing"),
function(x, i, j, ..., drop=TRUE)
{
stopifnot(identical(unname(drop), TRUE))
.BiocFileCacheReadOnly(x, rid=bfcrid(x))
})
#' @rdname BiocFileCache-class
#' @aliases [,BiocFileCacheReadOnly,missing,missing-method
#' @exportMethod [
setMethod("[", c("BiocFileCacheReadOnly", "missing", "missing"),
function(x, i, j, ..., drop=TRUE)
{
x # no-op
})
#' @describeIn BiocFileCache Get a file path for select resources from
#' the cache.
#' @param i character() 'rid' identifiers.
#' @param j Ignored.
#' @return For '[[': named character(1) rpath for the given resource
#' in the cache.
#' @exportMethod [[
setMethod("[[", c("BiocFileCacheBase", "character", "missing"),
function(x, i, j)
{
stopifnot(length(i) == 1L, i %in% bfcrid(x))
.sql_get_rpath(x, i)
})
#' @describeIn BiocFileCache Set the file path of selected resources
#' from the cache.
#' @param value character(1) Replacement file path.
#' @return For '[[<-': Updated BiocFileCache, invisibly.
#' @exportMethod [[<-
setReplaceMethod("[[", c("BiocFileCache", "character", "missing", "character"),
function(x, i, j, ..., value)
{
stopifnot(length(i) == 1L, length(value) == 1L)
stopifnot(file.exists(value))
.sql_set_time(x, i)
.sql_set_rpath(x, i, value)
rtype <- unname(.sql_get_rtype(x, i))
if (identical(rtype, "relative") || identical(rtype, "web")) {
warning("updating rpath, changing rtype to 'local'")
.sql_set_rtype(x, i, "local")
}
x
})
#' @export
setGeneric("bfcnew",
function(x, rname, rtype=c("relative", "local"), ext=NA_character_)
standardGeneric("bfcnew"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcnew,missing-method
#' @exportMethod bfcnew
setMethod("bfcnew", "missing",
function(x, rname, rtype=c("relative", "local"), ext=NA_character_)
{
bfcnew(x=BiocFileCache(), rname=rname, rtype=rtype, ext=ext)
})
#' @describeIn BiocFileCache Add a resource to the database
#' @param rname character(1) Name of object in file cache. For
#' 'bfcupdate' a character vector of replacement rnames.
#' @param ext character(1) A file extension to add to the local
#' copy of the file (e.g., \sQuote{sqlite}, \sQuote{txt},
#' \sQuote{tar.gz}).
#' @return For 'bfcnew': named character(1), the path to save your
#' object / file. The name of the return value is the unique rid
#' for the resource.
#' @examples
#' path <- bfcnew(bfc0, "NewResource")
#' path
#' @aliases bfcnew
#' @exportMethod bfcnew
setMethod("bfcnew", "BiocFileCache",
function(x, rname, rtype=c("relative", "local"), ext=NA_character_)
{
stopifnot(
is.character(rname), length(rname) > 0L, !any(is.na(rname)),
is.character(ext), length(ext) > 0L
)
rtype <- match.arg(rtype)
.sql_add_resource(x, rname, rtype, NA_character_, ext)
})
#' @export
setGeneric("bfcadd",
function(
x, rname, fpath = rname, rtype=c("auto", "relative", "local", "web"),
action=c("copy", "move", "asis"), proxy="",
download=TRUE, config=list(), ext=NA_character_, ...
) standardGeneric("bfcadd"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcadd,missing-method
#' @exportMethod bfcadd
setMethod("bfcadd", "missing",
function(
x, rname, fpath = rname, rtype=c("auto", "relative", "local", "web"),
action=c("copy", "move", "asis"), proxy="",
download=TRUE, config=list(), ext=NA_character_, ...
)
{
bfcadd(x=BiocFileCache(), rname=rname, fpath=fpath, rtype=rtype,
action=action, proxy=proxy, download=download, config=config,
ext=ext, ...)
})
#' @describeIn BiocFileCache Add an existing resource to the database
#' @param fpath For bfcadd(), character(1) path to current file
#' location or remote web resource. If none is given, the rname is
#' assumed to also be the path location. For bfcupdate()
#' character() vector of replacement web resources.
#' @param rtype character(1) 'local', 'relative', or 'web' indicating
#' if the resource is a local file, a relative path in the cache,
#' or a web resource. For \code{bfcnew}: local or relative are
#' only options. For \code{bfcadd}, the default 'auto' creates
#' relative or web paths, based on the path prefix.
#' @param action character(1) How to handle the file: create a
#' \code{copy} of \code{fpath} in the cache directory; \code{move}
#' the file to the cache directory; or \code{asis} leave the file
#' in current location but save the path in the cache. If 'rtype
#' == "relative"', action can not be "asis".
#' @param proxy character(1) (Optional) proxy server.
#' @param download logical(1) If \code{rtype=web}, should remote
#' resource be downloaded locally immediately.
#' @param config list() passed as config argument in \code{httr::GET}
#' @param ... For 'bfcadd', 'bfcupdate' and 'bfcdownload': Additional
#' arguments passed to internal download functions for use with
#' \code{httr::GET}. For 'bfcrpaths': Additional arguments passed
#' to 'bfcadd', or \code{exact} passed to 'bfcquery'. For
#' 'bfcquery': Additional arguments passed to \code{grepl}. For
#' 'exportbfc': Additional arguments to the selected outputMethod
#' function. See \code{utils::tar} or \code{utils::zip} for more
#' information. For 'importbfc': Additional arguments to the
#' selected archiveMethod function. See \code{utils::untar} or
#' \code{utils::unzip} for more information.
#' @return For 'bfcadd': named character(1), the path to save your
#' object / file. The name of the character is the unique rid for
#' the resource.
#' @examples
#' fl1 <- tempfile(); file.create(fl1)
#' bfcadd(bfc0, "Test1", fl1) # copy
#' fl2 <- tempfile(); file.create(fl2)
#' bfcadd(bfc0, "Test2", fl2, action="move") # move
#' fl3 <- tempfile(); file.create(fl3)
#' add3 <- bfcadd(bfc0, "Test3", fl3, rtype="local", action="asis") # reference
#' rid3 <- names(add3)
#'
#' bfc0
#' file.exists(fl1) # TRUE
#' file.exists(fl2) # FALSE
#' file.exists(fl3) # TRUE
#'
#' # add a remote resource
#' url <- "http://httpbin.org/get"
#' bfcadd(bfc0, "TestWeb", fpath=url)
#' @aliases bfcadd
#' @exportMethod bfcadd
setMethod("bfcadd", "BiocFileCache",
function(
x, rname, fpath = rname,
rtype = c("auto", "relative", "local", "web"),
action = c("copy", "move", "asis"),
proxy = "", download = TRUE, config = list(), ext=NA_character_,
...)
{
stopifnot(
is.character(rname), length(rname) > 0L, !any(is.na(rname)),
is.character(fpath), length(fpath) > 0L, !any(is.na(fpath))
)
stopifnot(all(action %in% c("copy", "move", "asis")),
all(rtype %in% c("auto", "relative", "local", "web")))
if (missing(rtype)) rtype <- match.arg(rtype)
if (missing(action)) action <- match.arg(action)
stopifnot((length(action) == 1) || (length(action) == length(fpath)))
stopifnot((length(rtype) == 1) || (length(rtype) == length(fpath)))
if (length(action) == 1) action = rep(action, length(fpath))
if (length(rtype) == 1) rtype = rep(rtype, length(fpath))
rtype <- .util_standardize_rtype(rtype, fpath, action)
stopifnot(all(rtype == "web" | file.exists(fpath)))
rpath <- .sql_add_resource(x, rname, rtype, fpath, ext)
rid <- names(rpath)
for(i in seq_along(rpath)){
if (rtype[i] %in% c("local", "relative")) {
switch(
action[i],
copy = file.copy(fpath[i], rpath[i]),
move = file.rename(fpath[i], rpath[i]),
asis = {
.sql_set_rpath(x, rid[i], fpath[i])
rpath[i] <- bfcrpath(x, rids = rid[i])
}
)
} else if (download) { # rtype == "web"
.util_download(x, rid[i], proxy, config, "bfcadd()", ...)
}
}
rpath
})
#' @export
setGeneric("bfcinfo",
function(x, rids) standardGeneric("bfcinfo"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcinfo,missing-method
#' @exportMethod bfcinfo
setMethod("bfcinfo", "missing",
function(x, rids)
{
bfcinfo(x=BiocFileCache(), rids=rids)
})
#' @describeIn BiocFileCache list resources in database
#' @param rids character() Vector of rids.
#' @return For 'bfcinfo': A \code{bfc_tbl} of current resources in the
#' database.
#' @examples
#' bfcinfo(bfc0)
#' @aliases bfcinfo
#' @exportMethod bfcinfo
setMethod("bfcinfo", "BiocFileCacheBase",
function(x, rids)
{
if (missing(rids))
rids <- bfcrid(x)
stopifnot(all(rids %in% bfcrid(x)))
tbl <- .sql_get_resource_table(x, rids)
tbl <- mutate(tbl, rpath = unname(bfcrpath(x, rids=rids)))
class(tbl) <- c("tbl_bfc", class(tbl))
tbl
})
setOldClass("tbl_bfc")
#' @describeIn BiocFileCache Get the rids of the object
#' @exportMethod bfcrid
setMethod("bfcrid", "tbl_bfc", function(x) .get_tbl_rid(x))
#' @export
setGeneric("bfcpath",
function(x, rids) standardGeneric("bfcpath"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcpath,missing-method
#' @exportMethod bfcpath
setMethod("bfcpath", "missing",
function(x, rids)
{
bfcpath(x=BiocFileCache(), rids=rids)
})
#' @describeIn BiocFileCache display rpaths of resource.
#' @return For 'bfcpath': the file path location to load
#' @examples
#' bfcpath(bfc0, rid3)
#' @aliases bfcpath
#' @exportMethod bfcpath
setMethod("bfcpath", "BiocFileCacheBase",
function(x, rids)
{
if (missing(rids))
rids <- bfcrid(x)
stopifnot(length(rids) > 0L, all(rids %in% bfcrid(x)))
path <- .sql_get_rpath(x, rids)
path
})
#' @export
setGeneric("bfcrpath",
function(x, rnames, ..., rids, exact = TRUE) standardGeneric("bfcrpath"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcrpath,missing-method
#' @exportMethod bfcrpath
setMethod("bfcrpath", "missing",
function(x, rnames, ..., rids, exact = TRUE)
{
bfcrpath(x=BiocFileCache(), rnames=rnames, ..., rids=rids, exact=exact)
})
#' @describeIn BiocFileCache display rpath of resource. If 'rnames' is
#' in the cache the path is returned, if it is not it will try to
#' add it to the cache with 'bfcadd'
#' @param rnames character() to match against rnames. Each element of
#' \code{rnames} must match exactly one record. Use \code{exact =
#' FALSE} to use regular expression matching.
#' @return For 'bfcrpath': The local file path location to load.
#' @examples
#' bfcrpath(bfc0, rids = rid3)
#' @aliases bfcrpath
#' @exportMethod bfcrpath
setMethod("bfcrpath", "BiocFileCacheBase",
function(x, rnames, ..., rids, exact = TRUE)
{
if (!missing(rnames) && !missing(rids))
stop("specify either 'rnames' or 'rids' not both.")
update_time_and_path <- function(x, i) {
.sql_get_rpath(x, i)
}
add_or_return_rname <- function(x, rname, ..., exact) {
res <- bfcrid(bfcquery(x, rname, field="rname", exact = exact))
if (length(res) == 0L) {
tryCatch({
message("adding rname '", rname, "'")
names(bfcadd(x, rname, ...))
}, error=function(e) {
warning(
"\ntrying to add rname '", rname, "' produced error:",
"\n ", conditionMessage(e)
)
NA_character_
})
} else if (length(res) == 1L) {
names(update_time_and_path(x, res))
} else {
warning("'rnames' ",
if (exact) "exact" else "regular expression",
" pattern",
"\n '", rname, "'",
"\n is not unique; use 'bfcquery()' to see matches.")
NA_character_
}
}
if (missing(rids))
rids <- bfcrid(x)
if (!missing(rnames)) {
rids0 <- vapply(
rnames, add_or_return_rname, character(1), x=x, ..., exact = exact
)
if (anyNA(rids0)) {
rmdx <- setdiff(bfcrid(x), rids)
if (length(rmdx) > 0L)
bfcremove(x, rmdx)
stop("not all 'rnames' found or unique.")
}
bfcrpath(x, rids = rids0)
} else {
stopifnot(all(rids %in% bfcrid(x)))
update_time_and_path(x, rids)
}
})
#' @export
setGeneric("bfcupdate",
function(x, rids, value, ...) standardGeneric("bfcupdate"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcupdate,missing-method
#' @exportMethod bfcupdate
setMethod("bfcupdate", "missing",
function(x, rids, value, ...)
{
bfcupdate(x=BiocFileCache(), rids=rids, value=value, ...)
})
#' @describeIn BiocFileCache Update a resource in the cache
#' @param rpath character() vector of replacement rpaths.
#' @return For 'bfcupdate': an updated \code{BiocFileCache} object,
#' invisibly.
#' @examples
#' bfcupdate(bfc0, rid3, rpath=fl3, rname="NewRname")
#' bfc0[[rid3]] = fl1
#' bfcupdate(bfc0, "BFC5", fpath="http://google.com")
#' @aliases bfcupdate
#' @exportMethod bfcupdate
setMethod("bfcupdate", "BiocFileCache",
function(x, rids, rname=NULL, rpath=NULL, fpath=NULL,
proxy="", config=list(), ask=TRUE, ...)
{
stopifnot(!missing(rids), all(rids %in% bfcrid(x)))
stopifnot(
is.null(rname) || (length(rids) == length(rname)),
is.null(rpath) || (length(rids) == length(rpath)),
is.null(fpath) || (length(rids) == length(fpath))
)
stopifnot(
is.null(rname) || is.character(rname),
is.null(rpath) || is.character(rpath),
is.null(fpath) || is.character(fpath)
)
info <- NULL
for (i in seq_along(rids)) {
.sql_set_time(x, rids[i])
if (!is.null(rname)) {
.sql_set_rname(x, rids[i], rname[i])
}
if (!is.null(rpath)) {
if (!file.exists(rpath[i]))
stop(
"bfcupdate() failed",
"\n rid: ", rids[i],
"\n rpath: ", sQuote(rpath[i]),
"\n reason: rpath does not exist.",
call.=FALSE
)
.sql_set_rpath(x, rids[i], rpath[i])
rtype <- unname(.sql_get_rtype(x, rids[i]))
if (identical(rtype, "relative") || identical(rtype, "web")) {
warning("updating rpath, changing rtype to 'local'")
.sql_set_rtype(x, rids[i], "local")
}
}
if (!is.null(fpath)) {
if (.sql_get_rtype(x, rids[i]) != "web")
stop("bfcupdate() failed",
"\n rid: ", rids[i],
"\n reason: resource rtype is not 'web'",
call.=FALSE)
if (ask) {
doit <- .util_ask(
"Setting a new remote path results in immediate\n",
" download and overwriting of existing file.\n",
" Continue?"
)
} else {
doit <- TRUE
}
if (doit) {
.util_download_and_rename(
x, rids[i], proxy, config, "bfcupdate()", fpath[i], ...
)
.sql_set_fpath(x, rids[i], fpath[i])
}
}
}
invisible(x)
})
#' @rdname BiocFileCache-class
#' @export
setGeneric("bfcmeta<-",
function(x, name, ..., value)
standardGeneric("bfcmeta<-"),
signature = "x"
)
#' @describeIn BiocFileCache add meta data table in database
#' @param name character(1) name of metadata table.
#' @return For 'bfcmeta': updated BiocFileCache, invisibly
#' @examples
#' meta = data.frame(list(rid = paste("BFC",seq_len(bfccount(bfc0)), sep=""),
#' num=seq(bfccount(bfc0),1,-1),
#' data=c(paste("Letter",
#' letters[seq_len(bfccount(bfc0))]))),
#' stringsAsFactors=FALSE)
#' bfcmeta(bfc0, name="resourcedata") <- meta
#' @aliases bfcmeta<-
#' @exportMethod bfcmeta<-
setReplaceMethod("bfcmeta", "BiocFileCacheBase",
function(x, name, ..., value)
{
stopifnot("rid" %in% colnames(value))
rids <- value$rid
stopifnot(all(rids %in% bfcrid(x)))
stopifnot(is.character(name), length(name) == 1L, !is.na(name))
if (name %in% .RESERVED$TABLES)
stop(
"'", name, "' cannot be added; reserved table names: ",
paste(sQuote(.RESERVED$TABLES), collapse=", ")
)
if (any(colnames(value) %in% .RESERVED$COLUMNS))
stop(
"'value' (metadata) cannot contain colnames ",
paste(sQuote(.RESERVED$COLUMNS), collapse= ", ")
)
.sql_meta_gets(x, name, value, ...)
x
})
#' @export
setGeneric("bfcmetaremove",
function(x, name, ...) standardGeneric("bfcmetaremove"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcmetaremove,missing-method
#' @exportMethod bfcmetaremove
setMethod("bfcmetaremove", "missing",
function(x, name, ...)
{
bfcmetaremove(x=BiocFileCache(), name=name, ...)
})
#' @describeIn BiocFileCache remove meta data table in database
#' @return For 'bfcmetaremove': updated BiocFileCache, invisibly
#' @examples
#' \dontrun{bfcmetaremove(bfc0, "resourcedata")}
#' @aliases bfcmetaremove
#' @exportMethod bfcmetaremove
setMethod("bfcmetaremove", "BiocFileCacheBase",
function(x, name, ...)
{
stopifnot(
!missing(name), is.character(name), length(name) == 1L, !is.na(name)
)
if (name %in% .RESERVED$TABLES)
stop("reserved table '", name, "' cannot be removed")
.sql_meta_remove(x, name, ...)
invisible(x)
})
#' @export
setGeneric("bfcmetalist",
function(x) standardGeneric("bfcmetalist"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcmetalist,missing-method
#' @exportMethod bfcmetalist
setMethod("bfcmetalist", "missing",
function(x)
{
bfcmetalist(x=BiocFileCache())
})
#' @describeIn BiocFileCache retrieve listing of metadata tables
#' @return For 'bfcmetalist': returns a character() of all metadata tables
#' currently in the database. If no metadata tables are available returns
#' character(0)
#' @examples
#' bfcmetalist(bfc0)
#' @aliases bfcmetalist
#' @exportMethod bfcmetalist
setMethod("bfcmetalist", "BiocFileCacheBase",
function(x)
{
.sql_meta_list(x)
})
#' @export
setGeneric("bfcmeta",
function(x, name, ...) standardGeneric("bfcmeta"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcmeta,missing-method
#' @exportMethod bfcmeta
setMethod("bfcmeta", "missing",
function(x, name, ...)
{
bfcmeta(x=BiocFileCache(), name=name, ...)
})
#' @describeIn BiocFileCache retrieve metadata table
#' @return For 'bfcmeta': returns a data.frame representation of database
#' table
#' @examples
#' tbl = bfcmeta(bfc0, "resourcedata")
#' tbl
#' @aliases bfcmeta
#' @exportMethod bfcmeta
setMethod("bfcmeta", "BiocFileCacheBase",
function(x, name, ...)
{
if (missing(name)) {
tbls <- paste(sQuote(bfcmetalist(x)), collapse=", ")
if (!nzchar(tbls))
tbls <- NA_character_
stop("metadata table 'name' missing, possible values: ", tbls)
}
stopifnot(is.character(name), length(name) == 1L, !is.na(name))
.sql_meta(x, name, ...)
})
#' @export
setGeneric("bfcquerycols",
function(x) standardGeneric("bfcquerycols")
)
#' @rdname BiocFileCache-class
#' @aliases bfcquerycols,missing-method
#' @exportMethod bfcquerycols
setMethod("bfcquerycols", "missing",
function(x)
{
bfcquerycols(x=BiocFileCache())
})
#' @describeIn BiocFileCache Get all the possible columns to query
#' @return For 'bfcquerycols': character() all columns in all database tables
#' available for query.
#' @examples
#' bfcquerycols(bfc0)
#' @aliases bfcquerycols
#' @exportMethod bfcquerycols
setMethod("bfcquerycols", "BiocFileCacheBase",
function(x)
{
.get_all_colnames(x)
})
#' @export
setGeneric("bfcquery",
function(x, query, field=c("rname", "rpath", "fpath"), ..., exact = FALSE)
standardGeneric("bfcquery"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcquery,missing-method
#' @exportMethod bfcquery
setMethod("bfcquery", "missing",
function(x, query, field=c("rname", "rpath", "fpath"), ..., exact = FALSE)
{
bfcquery(x=BiocFileCache(), query=query, field=field, ..., exact = exact)
})
#' @describeIn BiocFileCache query resource
#' @param query character() Regular expression pattern(s) to match in
#' resource. It will match the pattern against \code{fields},
#' using \code{&} logic across query element. By default, case
#' sensitive. When \code{exact = TRUE}, \code{query} uses exact
#' matching.
#' @param field character() column names in resource to query, using
#' \code{||} logic across multiple field elements. By default,
#' matches pattern agains rname, rpath, and fpath. If exact
#' matching, may only be a single value.
#' @param exact logical(1) when FALSE, treat \code{query} as a regular
#' expression. When TRUE, use exact matching. For \code{bfcquery},
#' the default is \code{FALSE} (regular expression matching; for
#' \code{bfcrpath}, the default is \code{TRUE} (exact matching).
#' @return For 'bfcquery': A \code{bfc_tbl} of current resources in
#' the database whose \code{field} contained query. If multiple
#' values are given, the resource must contain all of the
#' patterns. A tbl with zero rows is returned when no resources
#' match the query.
#' @examples
#' bfcquery(bfc0, "Test")
#' bfcquery(bfc0, "^Test1$", field="rname")
#' @aliases bfcquery
#' @exportMethod bfcquery
setMethod("bfcquery", "BiocFileCacheBase",
function(x, query, field=c("rname", "rpath", "fpath"), ..., exact = FALSE)
{
stopifnot(is.character(query))
stopifnot(all(field %in% .get_all_colnames(x)))
tbl <- .sql_get_resource_table(x)
keep <- TRUE
FUN <-
if (exact) {
function(pattern, x, ...) x == pattern
} else grepl
for (q in query)
keep <- keep & Reduce(`|`, lapply(tbl[field], FUN, pattern = q, ...))
rids <- intersect(tbl$rid[keep], bfcrid(x))
bfcinfo(x, rids)
})
#' @export
setGeneric("bfccount",
function(x) standardGeneric("bfccount")
)
#' @rdname BiocFileCache-class
#' @aliases bfccount,missing-method
#' @exportMethod bfccount
setMethod("bfccount", "missing",
function(x)
{
bfccount(x=BiocFileCache())
})
#' @describeIn BiocFileCache Get the number of objects in the file
#' cache or query.
#' @return For 'bfccount': integer(1) Number of objects in the cache
#' or query.
#' @examples
#' bfccount(bfc0)
#' bfccount(bfcquery(bfc0, "test"))
#' @aliases bfccount
#' @exportMethod bfccount
setMethod("bfccount", "BiocFileCacheBase",
function(x)
{
bfccount(bfcinfo(x))
})
#' @rdname BiocFileCache-class
#' @aliases bfccount,tbl_bfc-method
#' @exportMethod bfccount
setMethod("bfccount", "tbl_bfc",
function(x)
{
.sql_get_nrows(x)
})
#' @export
setGeneric("bfcneedsupdate",
function(x, rids) standardGeneric("bfcneedsupdate"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcneedsupdate,missing-method
#' @exportMethod bfcneedsupdate
setMethod("bfcneedsupdate", "missing",
function(x, rids)
{
bfcneedsupdate(x=BiocFileCache(), rids=rids)
})
#' @describeIn BiocFileCache check if a resource needs to be updated
#' @return For 'bfcneedsupdate': named logical vector if resource
#' needs to be updated. The name is the resource
#' 'rid'. \code{TRUE}: fpath \code{etag} or \code{modified} time of
#' web resource more recent than in BiocFileCache; \code{FALSE}: fpath
#' \code{etag} or \code{modified} time of web resource not more recent
#' than in BiocFileCache; \code{NA}: web resource etag and modified time
#' could not be determined. If the etag is available the function will use
#' that information definitively and only compare last modified time if
#' etag is not available. If there is an \code{expires} time that will be
#' used to initially determine if the resource should be updated.
#' @examples
#' bfcneedsupdate(bfc0, "BFC5")
#' @aliases bfcneedsupdate
#' @exportMethod bfcneedsupdate
setMethod("bfcneedsupdate", "BiocFileCacheBase",
function(x, rids)
{
if (missing(rids))
rids <- .get_all_web_rids(x)
stopifnot(all(rids %in% bfcrid(x)))
if (!all(rids %in% .get_all_web_rids(x)))
stop("rids not all web resources")
helper <- function(x, rid) {
file_time <- .sql_get_last_modified(x, rid)
fpath <- .sql_get_fpath(x, rid)
file_etag <- .sql_get_etag(x, rid)
file_expires <- .sql_get_expires(x, rid)
cache_info <- .httr_get_cache_info(fpath)
web_time <- cache_info[["modified"]]
web_etag <- cache_info[["etag"]]
if (!is.na(file_expires))
expired <- as.Date(file_expires, optional=TRUE) <= Sys.Date()
else
expired <- FALSE
checkTime <- FALSE
if (expired){
res <- TRUE
checkTime <- FALSE
} else {
if (is.na(web_etag) || is.na(file_etag)) {
checkTime <- TRUE
} else {
res <- !identical(unname(file_etag), web_etag)
}
if (checkTime) {
if (is.na(file_time) || is.na(web_time)) {
res <- NA
} else {
res <- as.POSIXlt(web_time, optional=TRUE) >
as.POSIXlt(file_time, optional=TRUE)
}
}
}
res
} # end helper
result <- vapply(rids, helper, logical(1), x=x)
# if web resources hasn't been locally downloaded yet
result[rids %in% .get_rid_filenotfound(x)] = TRUE
setNames(result, rids)
})
#' @export
setGeneric("bfcdownload",
function(x, rid, proxy="", config=list(), ask=TRUE, FUN, ...)
standardGeneric("bfcdownload"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcdownload,missing-method
#' @param rid character(1) Unique resource id.
#' @exportMethod bfcdownload
setMethod("bfcdownload", "missing",
function(x, rid, proxy="", config=list(), ask=TRUE, FUN, ...)
{
bfcdownload(x=BiocFileCache(), rid=rid, proxy=proxy, config=config, ask=ask,
FUN=FUN, ...)
})
#' @describeIn BiocFileCache Redownload resource to location in cache
#' @param FUN A specialized implemented function designed by the user. This
#' function can be used to perform and save the results of a post download
#' processing step rather than direct output. The function should ONLY take in
#' two file names: the first the raw downloaded file and the second the output
#' file for saved results. The output of the function should be TRUE/FALSE if
#' step was successful. See vignette section on Specialty Advance Use Case for
#' more details.
#' @return For 'bfcdownload': character(1) path to downloaded resource
#' in cache.
#' @examples
#' bfcdownload(bfc0, "BFC5")
#' @aliases bfcdownload
#' @exportMethod bfcdownload
setMethod("bfcdownload", "BiocFileCache",
function(x, rid, proxy="", config=list(), ask=TRUE, FUN, ...)
{
stopifnot(
!missing(rid), length(rid) > 0L,
all(rid %in% bfcrid(x)),
all(.sql_get_rtype(x, rid) == "web")
)
.sql_set_time(x, rid)
if (ask && any(file.exists(.sql_get_rpath(x, rid)))) {
doit <- .util_ask(
"bfcdownload() will overwrite exisiting files, continue?"
)
} else {
doit <- TRUE
}
if (doit)
.util_download_and_rename(x, rid, proxy, config, "bfcdownload()",
FUN=FUN, ...)
bfcrpath(x, rids=rid)
})
#' @export
setGeneric("bfcremove",
function(x, rids) standardGeneric("bfcremove"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcremove,missing-method
#' @exportMethod bfcremove
setMethod("bfcremove", "missing",
function(x, rids)
{
bfcremove(x=BiocFileCache(), rids=rids)
})
#' @describeIn BiocFileCache Remove a resource to the database. If
#' the local file is located in \code{bfccache(x)}, the file will
#' also be deleted. This will not delete information in any metadata
#' table.
#' @return For 'bfcremove': updated BiocFileCache object, invisibly
#' @examples
#' bfcremove(bfc0, rid3)
#' bfcinfo(bfc0)
#' @aliases bfcremove
#' @exportMethod bfcremove
setMethod("bfcremove", "BiocFileCache",
function(x, rids)
{
stopifnot(all(rids %in% bfcrid(x)))
rpaths <- .sql_get_rpath(x, rids)
cached <- startsWith(rpaths, bfccache(x))
.sql_remove_resource(x, rids)
status <- .util_unlink(rpaths[cached])
invisible(x)
})
#' @export
setGeneric("bfcsync",
function(x, verbose = TRUE, ask = TRUE) standardGeneric("bfcsync"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases bfcsync,missing-method
#' @exportMethod bfcsync
setMethod("bfcsync", "missing",
function(x, verbose = TRUE, ask = TRUE)
{
bfcsync(x=BiocFileCache(), verbose=verbose, ask=ask)
})
#' @describeIn BiocFileCache sync cache and resource.
#' @param verbose logical(1) If descriptive message and list of issues
#' should be included as output.
#' @return For 'bfcsync': logical(1) indicating whether the cache is
#' in sync (\code{TRUE}) or not. 'verbose' is TRUE by default, so
#' descriptive messages will also be included.
#' @examples
#' bfcsync(bfc0)
#'
#' if (!interactive()){
#' # in interactive mode, in the sync above
#' # this was probably already removed
#' # noninteractive mode does not remove resources
#' # so can remove manually here
#' bfcremove(bfc0, "BFC1")
#' }
#' bfcsync(bfc0, FALSE)
#' @aliases bfcsync
#' @importFrom utils capture.output
#' @exportMethod bfcsync
setMethod("bfcsync", "BiocFileCache",
function(x, verbose=TRUE, ask = TRUE)
{
stopifnot(is.logical(verbose), length(verbose) == 1L, !is.na(verbose))
# files not found
rids <- .get_rid_filenotfound(x)
# files untracked in cache location
files <- file.path(bfccache(x), setdiff(dir(bfccache(x)),c(.CACHE_FILE, .CACHE_FILE_LOCK)))
paths <- .sql_get_rpath(x, bfcrid(x))
# normalizePath on windows
# can't across platform - no opt on linux but added hidden on mac
if (tolower(.Platform$OS.type) == "windows") {
files = normalizePath(files)
paths = normalizePath(paths)
}
untracked <- setdiff(files, paths)
rids0 <- rids; untracked0 <- untracked
if (verbose && (length(rids) != 0L))
message(
"entries without corresponding files: ",
paste0("'", rids, "'", collapse=" ")
)
if (ask && (length(rids) != 0L)) {
doit <- .util_ask("delete ", length(rids), " entries?")
rids <- rids[doit]
}
if (verbose && (length(untracked) != 0L))
message(
"files without cache entries\n ",
paste(untracked, collpase="\n ")
)
if (ask && (length(untracked) != 0L)) {
doit <- .util_ask("delete ", length(untracked), " files?")
untracked <- untracked[doit]
}
.sql_remove_resource(x, rids)
.util_unlink(untracked)
!length(setdiff(rids0, rids)) && !length(setdiff(untracked0, untracked))
})
#' @export
setGeneric("exportbfc",
function(x, rids,
outputFile="BiocFileCacheExport.tar", outputMethod=c("tar","zip"),
verbose=TRUE, ...)
standardGeneric("exportbfc"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases exportbfc,missing-method
#' @exportMethod exportbfc
setMethod("exportbfc", "missing",
function(x, rids,
outputFile="BiocFileCacheExport.tar", outputMethod=c("tar","zip"),
verbose=TRUE, ...)
{
exportbfc(x=BiocFileCache(), rids=rids,
outputFile=outputFile, outputMethod=outputMethod,
verbose=verbose, ...)
})
#' @describeIn BiocFileCache Create exportable file containing
#' BiocFileCache.
#' @param outputFile character(1) The <filepath>/basename for the
#' output archive. Please include appropriate extension based on
#' outMethod and any additional parameters selected for
#' \code{utils::tar} or \code{utils::zip}
#' @param outputMethod Either 'tar' or 'zip' for how the directory
#' should be archived. Default is 'tar'.
#' @return character(1) The outputFile path.
#' @examples
#' \dontrun{exportbfc(bfc)}
#' @aliases exportbfc
#' @exportMethod exportbfc
setMethod("exportbfc", "BiocFileCacheBase",
function(x, rids,
outputFile="BiocFileCacheExport.tar", outputMethod=c("tar","zip"),
verbose=TRUE, ...)
{
if (missing(rids))
rids <- bfcrid(x)
stopifnot(all(rids %in% bfcrid(x)))
stopifnot(length(outputFile) == 1L, is.character(outputFile))
outputMethod <- match.arg(outputMethod)
stopifnot(is.logical(verbose), length(verbose) == 1L)
bfc <- x[rids]
if (length(bfc) == 0L)
stop("No valid rids selected")
dir <- file.path(tempdir(), "BiocFileCacheExport")
dir.create(dir)
ids <- bfcrid(bfc)
file.copy(.sql_dbfile(x), dir)
newbfc <- BiocFileCache(dir)
idrm <- setdiff(.get_all_rids(newbfc), ids)
if (length(idrm) != 0)
newbfc <- bfcremove(newbfc, rids=idrm)
res <- vapply(ids, .util_export_file, character(1),
bfc=x, dir=dir)
.sql_set_time(x, ids)
# 'relative' = ok, 'web'= not download
# 'local' = file not in cache, 'NA' = file not found
if (any(res == "web", na.rm=TRUE)) {
webid <- names(which(res == "web"))
if (verbose)
message(
"The following are identified as web resources\n",
"but have not been downloaded yet. No associated\n",
"files will be exported:\n",
" ", paste0("'", webid, "'", collapse=" "),
"\n\n"
)
}
if (any(res == "local", na.rm=TRUE)) {
locid <- names(which(res == "local"))
if (verbose)
message(
"The following are identified as local resources.\n",
"A copy of the file will be exported:\n",
" ", paste0("'", locid, "'", collapse=" "),
"\n\n"
)
for (i in locid) {
orig <- .sql_get_rpath(x, i)
newpath <- file.path(dir, basename(orig))
if (file.exists(newpath)) {
filename <- paste(basename(tempfile("", bfccache(newbfc))),
basename(orig), sep="_")
newpath <- file.path(dir, filename)
}
file.copy(orig, newpath)
}
}
if (any(is.na(res))) {
naid <- names(which(is.na(res)))
if (verbose)
message(
"The following had a file that was not found.\n",
"The file is not included and the rid will be removed\n",
"from the BiocFileCache object being exported:\n",
" ", paste0("'", naid, "'", collapse=" "),
"\n\n"
)
newbfc <- bfcremove(newbfc, rids=naid)
}
if (length(bfcmetalist(newbfc)) != 0) {
metaList <- bfcmetalist(newbfc)
res <- vapply(metaList, .sql_filter_metadata, logical(1),
bfc=newbfc, verbose=verbose)
}
# tar/zip up directory
origdir <- getwd()
if (dirname(outputFile) == ".")
outputFile = file.path(origdir, outputFile)
setwd(dirname(dir))
files = basename(dir)
archive <- function(outputFile, how = c("tar", "zip"), files, ...) {
fun <- switch(how, tar = tar, zip = zip)
fun(outputFile, files, ...)
}
# remove lock file from export
.util_unlink(file.path(dir, .CACHE_FILE_LOCK))
archive(outputFile=outputFile, how=outputMethod, files=files, ...)
setwd(origdir)
.util_unlink(dir, recursive=TRUE)
outputFile
})
#' @export
setGeneric("importbfc",
function(filename, archiveMethod=c("untar","unzip"),
exdir=".", ...)
standardGeneric("importbfc"),
signature = "filename"
)
#' @describeIn BiocFileCache Import file created with exportbfc containing
#' BiocFileCache.
#' @param filename character(1) The name of the archive.
#' @param archiveMethod Either 'untar' or 'unzip' for how the directory should
#' be extracted. Default is 'untar'.
#' @param exdir Directory to extract files too. See \code{utils::untar} or
#' \code{utils::unzip} for more details.
#' @return A BiocFileCache object
#' @examples
#' \dontrun{importbfc("ExportBiocFileCache.tar")}
#' @aliases importbfc
#' @exportMethod importbfc
setMethod("importbfc", "character",
function(filename, archiveMethod=c("untar","unzip"),
exdir=".", ...)
{
exportPath <- file.path(exdir, "BiocFileCacheExport")
stopifnot(!dir.exists(exportPath))
stopifnot(length(exdir) == 1L, is.character(exdir))
stopifnot(length(filename) == 1L, is.character(filename))
archiveMethod = match.arg(archiveMethod)
inflate <- function(filename, how = c("untar", "unzip"), exdir, ...) {
fun <- switch(how, untar = untar, unzip = unzip)
fun(filename, exdir=exdir, ...)
}
inflate(filename=filename, how=archiveMethod, exdir=exdir, ...)
bfc = BiocFileCache(exportPath)
bfc
})
#' @export
setGeneric("cleanbfc",
function(x, days = 120, ask = TRUE) standardGeneric("cleanbfc"),
signature = "x"
)
#' @rdname BiocFileCache-class
#' @aliases cleanbfc,missing-method
#' @exportMethod cleanbfc
setMethod("cleanbfc", "missing",
function(x, days = 120, ask = TRUE)
{
cleanbfc(x=BiocFileCache(), days=days, ask=ask)
})
#' @describeIn BiocFileCache Remove old/unused files in
#' BiocFileCache. If file to be removed is not in the bfccache
#' location it will not be deleted. Setting \code{days=-Inf}
#' will remove all cached files.
#' @param days integer(1) Number of days between accessDate and
#' currentDate; if exceeded entry will be deleted.
#' @return For 'cleanbfc': updated BiocFileCache, invisibly.
#' @examples
#' \dontrun{cleanbfc(bfc, ask=FALSE)}
#' @aliases cleanbfc
#' @exportMethod cleanbfc
setMethod("cleanbfc", "BiocFileCache",
function(x, days = 120, ask=TRUE)
{
stopifnot(is.numeric(days), length(days) == 1L, !is.na(days))
stopifnot(is.logical(ask), length(ask) == 1L, !is.na(ask))
rids <- .sql_clean_cache(x, days)
rpaths <- .sql_get_rpath(x, rids)
cached <- startsWith(rpaths, bfccache(x))
if (ask) {
txt0 <- paste0(" file ", sQuote(rpaths))
txt <- sprintf(
"Remove id %d %d", sQuote(rids), ifelse(cached, txt0, "")
)
doit <- vapply(txt, .util_ask, logical(1))
rids <- rids[doit]
cached <- cached & doit
}
.sql_remove_resource(x, rids)
.util_unlink(rpaths[cached])
invisible(x)
})
#' @export
setGeneric("removebfc",
function(x, ask = TRUE) standardGeneric("removebfc"),
signature="x"
)
#' @rdname BiocFileCache-class
#' @aliases removebfc,missing-method
#' @exportMethod removebfc
setMethod("removebfc", "missing",
function(x, ask = TRUE)
{
removebfc(x=BiocFileCache(), ask=ask)
})
#' @describeIn BiocFileCache Completely remove the BiocFileCache
#' @return For 'removebfc': TRUE if successfully removed.
#' @examples
#' \dontrun{removebfc(bfc, ask=FALSE)}
#' @aliases removebfc
#' @exportMethod removebfc
setMethod("removebfc", "BiocFileCache",
function(x, ask=TRUE)
{
stopifnot(is.logical(ask), length(ask) == 1L, !is.na(ask))
txt <- paste("remove cache and", length(x), "resource(s)?")
if (!ask || .util_ask(txt))
doit <- .util_unlink(bfccache(x), recursive=TRUE)
doit
})
#' @describeIn BiocFileCache Display a \code{BiocFileCache} instance.
#' @param object A \code{BiocFileCache} instance.
#' @exportMethod show
setMethod("show", "BiocFileCacheBase",
function(object)
{
cat("class: ", class(object), "\n",
"bfccache: ", bfccache(object), "\n",
"bfccount: ", bfccount(object), "\n",
"For more information see: bfcinfo() or bfcquery()\n",
sep="")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.