#' Get, set and clean the xdf tbl directory
#'
#' By default, dplyrXdf will save the xdf files it creates in the R temporary directory. This can be a problem if it is in a location with limited disk space. Use \code{set_dplyrxdf_dir} to change the xdf tbl directory, and \code{get_dplyrxdf_dir} to view it.
#'
#' @param path Location in which to save xdf tbls. If missing, defaults to the R temporary directory.
#' @param fileSystem The filesystem for which to set or get the tbl directory; can be either "hdfs" or "native". Currently only the native (local) filesystem is supported.
#'
#' @details
#' If \code{path} is supplied, \code{set_dplyrxdf_dir} creates a new directory (with a unique name) located \emph{under} \code{path}. This ensures that the files managed by dplyrXdf are properly isolated from the rest of the filesystem.
#'
#' @seealso
#' \code{\link{rxGetFileSystem}}, \code{\link{rxSetFileSystem}}
#' @rdname workdir
#' @export
set_dplyrxdf_dir <- function(path, fileSystem=rxGetFileSystem())
{
fileSystem <- validateFileSystem(fileSystem)
if(inherits(fileSystem, "RxHdfsFileSystem"))
{
if(missing(path))
path <- "/tmp"
# allow for Azure Data Lake storage
host <- fileSystem$hostName
path <- gsub("\\", "/", tempfile(pattern="dxTmp", tmpdir=path), fixed=TRUE)
.dxOptions$hdfsWorkDir <- path
.dxOptions$hdfsHost <- host
.dxOptions$hdfsWorkDirCreated <- FALSE
}
else
{
if(missing(path))
path <- tempdir()
path <- tempfile(pattern="dxTmp", tmpdir=path)
path <- normalizePath(path, mustWork=FALSE)
dir.create(path, recursive=TRUE)
.dxOptions$localWorkDir <- path
}
invisible(NULL)
}
#' @rdname workdir
#' @export
get_dplyrxdf_dir <- function(fileSystem=rxGetFileSystem())
{
fileSystem <- validateFileSystem(fileSystem)
if(in_hdfs(fileSystem))
{
if(!is.na(detectHdfsConnection(FALSE)))
make_dplyrxdf_dir(fileSystem)
makeHdfsUri(.dxOptions$hdfsHost, normalizeHdfsPath(.dxOptions$hdfsWorkDir))
}
else .dxOptions$localWorkDir
}
make_dplyrxdf_dir <- function(fileSystem=rxGetFileSystem())
{
fileSystem <- validateFileSystem(fileSystem)
if(in_hdfs(fileSystem))
{
path <- .dxOptions$hdfsWorkDir
if(!.dxOptions$hdfsWorkDirCreated)
{
message("Creating HDFS working directory")
host <- fileSystem$hostName
res <- hdfs_dir_create(path, host=host)
if(res)
.dxOptions$hdfsWorkDirCreated <- TRUE
else warning("unable to create HDFS working directory", call.=FALSE)
.dxOptions$hdfsHost <- host
return(res)
}
}
else
{
path <- .dxOptions$localWorkDir
if(!dir.exists(path))
return(dir.create(path))
}
NULL
}
#' @details
#' \code{clean_dplyrxdf_dir} is a utility function to delete the files generated by dplyrXdf. Note that all files in the specified location will be removed!
#' @rdname workdir
#' @export
clean_dplyrxdf_dir <- function(fileSystem=rxGetFileSystem())
{
fileSystem <- validateFileSystem(fileSystem)
path <- get_dplyrxdf_dir(fileSystem)
if(inherits(fileSystem, "RxNativeFileSystem"))
{
files <- dir(path, full.names=TRUE)
unlink(files, recursive=TRUE)
}
else if(inherits(fileSystem, "RxHdfsFileSystem"))
{
host <- .dxOptions$hdfsHost
pathExists <- hdfs_dir_exists(path, host)
if(!pathExists)
return(invisible(NULL))
files <- hdfs_dir(path, full_path=TRUE, host=host)
if(length(files) == 0)
return(invisible(NULL))
hdfs_dir_remove(files, skipTrash=TRUE, host=host)
}
invisible(NULL)
}
validateFileSystem <- function(fs)
{
if(!inherits(fs, "RxFileSystem"))
{
if(tolower(fs) == "hdfs")
fs <- RxHdfsFileSystem()
else fs <- RxNativeFileSystem()
}
fs
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.