Nothing
#' Listing the data for the current project
#'
#' This function produces a data.frame of all data files in the project, with
#' meta data on if and how the file will be loaded by \code{load.project}.
#'
#' @param ... Named arguments to override configuration from
#' \code{config/global.dcf} and \code{lib/global.R}.
#'
#' @return A data.frame listing the available data, with relevant meta data
#'
#' @details The returned data.frame contains the following variables, with one
#' observation per file in \code{data/}:
#'
#' \tabular{ll}{
#' \code{filename} \tab Character variable containing the filename relative
#' to \code{data/} directory. \cr
#' \code{varname} \tab Character variable containing the name of the variable
#' into which the file will be imported. * \cr
#' \code{is_ignored} \tab Logical variable that indicates whether the file.
#' is ignored through the \code{data_ignore} option in the configuration \cr
#' \code{is_directory} \tab Logical variable that indicates whether the file
#' is a directory. \cr
#' \code{is_cached} \tab Logical variable that indicates whether the file is
#' already available in the \code{cache/} directory. \cr
#' \code{cached_only} \tab Logical variable that indicates whether the
#' variable is only available in the \code{cache/} directory. This occurs
#' when calling the cache function with a code fragment in a munge script.
#' \cr
#' \code{reader} \tab Character variable containing the name of the reader
#' function that will be used to load the data. Contains a
#' \code{character(0)} if no suitable reader was found.
#' }
#'
#' * Note that some readers return more than one variable, usually with the
#' listed variable name as prefix. This is true for for example the
#' \code{xls.reader} and \code{xlsx.reader}.
#'
#' @export
#'
#' @seealso \code{\link{load.project}}, \code{\link{show.project}},
#' \code{\link{project.config}}
#'
#' @examples
#' library('ProjectTemplate')
#'
#' \dontrun{list.data()}
list.data <- function(...) {
override.config <- .parse.override.config(list(...))
config <- .load.config(override.config)
.list.data(config)
}
#' Build the list of data available for loading into memory
#'
#' This function produces a data.frame of all data files in the project, with
#' meta data on if and how the file will be loaded by \code{load.project}.
#'
#' @param config List containing the configuration to use.
#'
#' @inherit list.data description details return
#'
#' @keywords internal
#'
#' @rdname internal.list.data
.list.data <- function(config) {
# Get list of variables in data/, always recursive to exclude cached
# variables from nested files
all.files <- list.files(path = 'data', recursive = TRUE, include.dirs = TRUE)
# Get list of variables according to configured recursive_loading, used as
# filtering variable later
data.files <- list.files(path = 'data', recursive = config$recursive_loading,
include.dirs = !config$recursive_loading)
# Get variable name and reader from filenames
files.parsed <- .parse.extensions(all.files, config)
varnames <- files.parsed$varnames
readers <- files.parsed$readers
is_ignored <- grepl(.prepare.data.ignore.regex(config$data_ignore),
all.files)
is_directory <- file.info(file.path('data', all.files))$isdir
is_cached <- .is.cached(varnames)
cache_only <- rep(FALSE, length(varnames))
# Build the final data.frame
df <- data.frame(filename = all.files,
varname = varnames,
is_ignored = is_ignored,
is_directory = is_directory,
is_cached = is_cached,
cache_only = cache_only,
stringsAsFactors = FALSE)
df$reader <- readers
# Keep only lines with files that match the configured recursive_loading
# setting
df <- df[df$filename %in% data.files,]
df <- df[order(df$reader == "file.reader", decreasing = TRUE),]
## df <- df[!duplicated(df$varname, incomparables = ""),]
# Get list of variables in cache/
cached.vars <- .cached.variables()
# Exclude variables already found in data/
cached.vars <- setdiff(cached.vars, varnames)
filenames <- rep('', length(cached.vars))
is_ignored <- grepl(.prepare.data.ignore.regex(config$data_ignore),
cached.vars)
is_directory <- rep(FALSE, length(cached.vars))
cache_only <- rep(TRUE, length(cached.vars))
readers <- rep('', length(cached.vars))
# .cached.variables returns all variables without checking validity, need to
# call .is.cached to perform this check
is_cached <- .is.cached(cached.vars)
df2 <- data.frame(filename = filenames,
varname = cached.vars,
is_ignored = is_ignored,
is_directory = is_directory,
is_cached = is_cached,
cache_only = cache_only,
reader = readers,
row.names = NULL,
stringsAsFactors = FALSE)
rbind(df, df2)
}
#' Match readers to the extensions of the data files
#'
#' @param data.files a vector of paths to data files
#'
#' @return A list of \code{readers} and \code{varnames}
#'
#' @keywords internal
#'
#' @rdname internal.parse.extensions
.parse.extensions <- function(data.files, config) {
readers <- character(length(data.files))
varnames <- character(length(data.files))
for (extension in ls(extensions.dispatch.table)) {
extension.match <- grepl(extension, data.files,
ignore.case = TRUE, perl = TRUE)
readers[extension.match] <- list(extensions.dispatch.table[[extension]])
varnames[extension.match] <- sub(extension, '', data.files[extension.match],
ignore.case = TRUE, perl = TRUE)
varnames[extension.match] <- clean.variable.name(varnames[extension.match], config)
}
list(readers = readers, varnames = varnames)
}
#' Prepare a regular expression for matching files to be ignored
#'
#' Constructs a single regular expression for matching file names in data that
#' should not be imported. It can detect literal names, globs with wildcards and
#' regular expressions.
#'
#' @param ignore_files A comma separated character vector that lists all
#' patterns to be matched for ignoring
#'
#' @return A chained regular expression that matches all patterns in the
#' \code{ignore_files} variable.
#'
#' @keywords internal
#'
#' @rdname internal.prepare.data.ignore.regex
.prepare.data.ignore.regex <- function(ignore_files) {
ignore_files <- strsplit(ignore_files, '\\s*,\\s*')[[1]]
regexes <- ignore_files[grepl('^/.*/$', ignore_files)]
literals <- setdiff(ignore_files, regexes)
# Create regex for special characters in regex to be escaped
# (welcome to backslash hell)
# Note that * is a regex special character but often used in literals as
# wildcard
regex.special <- c('.', '\\', '|', '(', ')', '[', '{', '^', '$', '+', '?')
regex.special <- paste0('([',
paste0('\\', regex.special, collapse = '|'),
'])')
# Escape special characters in literal strings
literals <- gsub(regex.special, '\\\\\\1', literals)
# Escape wildcard * in literal strings
literals <- gsub('\\*', '\\.\\*', literals)
# Convert trailing slash to wildcard
literals <- gsub('/$', '/\\.\\*', literals)
literals <- paste0('^', literals, '$')
# Remove starting and trailing slashes from regexes
regexes <- gsub('(^/)|(/$)', '', regexes)
# Combine and return prepared regexes
paste0(c(literals, regexes), collapse = '|')
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.