#' Read the CSV-data for a Data Resource
#'
#' @param path path to the data set.
#'
#' @param resource a Data Resource.
#' @param use_fread use the \code{\link[data.table]{fread}} function instead of
#' \code{\link[utils]{read.csv}} and return a \code{data.table}.
#' @param to_factor convert columns to factor if the schema has a categories
#' field for the column. Passed on to \code{\link{dpapplyschema}}.
#' @param as_connection This argument is ignored. The function will always
#' return a \code{data.frame}.
#' @param ... additional arguments are passed on to \code{\link{read.csv}} or
#' \code{\link[data.table]{fread}}. Note that some arguments are already set
#' by \code{csv_reader}, so not all arguments are available to use as
#' additional arguments.
#'
#' @seealso
#' Generally used by calling \code{\link{dpgetdata}}.
#'
#' @return
#' Returns a \code{data.frame} with the data.
#'
#' @export
csv_reader <- function(path, resource, use_fread = FALSE, to_factor = FALSE,
as_connection = FALSE, ...) {
schema <- dpschema(resource)
if (is.null(schema)) {
dta <- csv_read_base(path, use_fread = use_fread, ...)
} else {
dec <- determine_decimalchar(schema$fields)
colclasses <- sapply(schema$fields, csv_colclass, decimalChar = dec)
dialect <- dpproperty(resource, "dialect")
if (is.null(dialect)) dialect <- list()
# TODO: missing values/na.strings
dta <- csv_read_base(path, decimalChar = dec, colClasses = colclasses,
use_fread = use_fread, csv_dialect = dialect, ...)
dta <- dpapplyschema(dta, resource, to_factor = to_factor, decimalChar = dec)
#dta <- convert_using_schema(dta, schema, to_factor = to_factor, decimalChar = dec)
}
structure(dta, resource = resource)
}
# Wrapper around read.table and data.table::fread that accepts options are
# present in csv dialect specification
# (https://specs.frictionlessdata.io/csv-dialect/#specification). Not all
# options from the dialect specification are supported; the function wil; then
# either generate a warning or error.
#
# From Spec: delimiter, lineTerminator, quoteChar, doubleQuote, excapeChar,
# skipInitialSpace, header, commentChar, caseSensitiveHeader, nullSequence
csv_read_base <- function(filename,
delimiter = ",", decimalChar = ".",
quoteChar = "\"", doubleQuote = TRUE,
commentChar = "", lineTerminator = "\r\n",
header = TRUE, caseSensitiveHeader = FALSE, nullSequence = character(0),
skipInitialSpace = FALSE, colClasses = character(),
na.strings = character(0), use_fread = FALSE, csv_dialect, ...) {
# Handle input of the arguments through a named list
if (!missing(csv_dialect) && !is.null(csv_dialect)) {
stopifnot(is.list(csv_dialect))
keep <- c("delimiter", "lineTerminator", "quoteChar", "doubleQuote",
"skipInitialSpace", "header", "commentChar",
"caseSensitiveHeader", "nullSequence")
csv_dialect <- csv_dialect[names(csv_dialect) %in% keep]
if (!missing(delimiter)) csv_dialect$delimiter <- delimiter
if (!missing(lineTerminator)) csv_dialect$lineTerminator <- lineTerminator
if (!missing(quoteChar)) csv_dialect$quoteChar <- quoteChar
if (!missing(doubleQuote)) csv_dialect$doubleQuote <- doubleQuote
if (!missing(skipInitialSpace)) csv_dialect$skipInitialSpace <- skipInitialSpace
if (!missing(header)) csv_dialect$header <- header
if (!missing(commentChar)) csv_dialect$commentChar <- commentChar
if (!missing(caseSensitiveHeader)) csv_dialect$caseSensitiveHeader <- caseSensitiveHeader
if (!missing(nullSequence)) csv_dialect$nullSequence <- nullSequence
args <- c(csv_dialect, list(filename = filename, colClasses = colClasses, na.strings = na.strings,
use_fread = use_fread, decimalChar = decimalChar), list(...))
return(do.call(csv_read_base, args))
}
# Check and process aguments
stopifnot(is.character(filename))
stopifnot(is.character(delimiter), length(delimiter) == 1, nchar(delimiter) == 1)
stopifnot(is.character(decimalChar), length(decimalChar) == 1, nchar(decimalChar) == 1)
stopifnot(is.character(quoteChar), length(quoteChar) == 1)
stopifnot(length(doubleQuote) == 1)
if (!doubleQuote) stop("Values other than TRUE for doubleQuote are not supported.")
stopifnot(is.logical(header), length(header) == 1)
stopifnot(is.character(commentChar), length(commentChar) == 1, nchar(commentChar) <= 1)
if (use_fread && commentChar != "")
stop('Values other than "" for commentChar are not supported.')
stopifnot(is.character(lineTerminator), length(lineTerminator) == 1)
if (!(lineTerminator %in% c("\n", "\r", "\r\n")))
stop("Values other than '\\n', '\\r' or '\\r\\n' for lineTerminator are not supported.")
stopifnot(is.logical(caseSensitiveHeader), length(caseSensitiveHeader) == 1)
if (!missing(caseSensitiveHeader) && header)
warning("The value for caseSentitiveHeader is ignored as header=FALSE.")
stopifnot(is.character(nullSequence), length(nullSequence) <= 1)
if (length(nullSequence) > 0)
stop("Specifying nullSequence is not supported.")
stopifnot(is.logical(skipInitialSpace), length(skipInitialSpace) == 1)
# Read data
if (use_fread) {
if (!requireNamespace("data.table")) stop("In order to use ",
"'use_fread=TRUE' the data.table package needs to be installed.")
lapply(filename, function(fn) {
d <- data.table::fread(filename, sep = delimiter, quote = quoteChar,
dec = decimalChar, header = header,
strip.white = skipInitialSpace, stringsAsFactors = FALSE,
colClasses = colClasses, na.strings = na.strings, ...)
#if (!caseSensitiveHeader) names(d) <- tolower(names(d))
d
}) |> data.table::rbindlist()
} else {
dta <- lapply(filename, function(fn) {
d <- utils::read.table(filename, sep = delimiter, quote = quoteChar,
dec = decimalChar, header = header, comment.char = commentChar,
strip.white = skipInitialSpace, stringsAsFactors = FALSE,
colClasses = colClasses, na.strings = na.strings, ...)
#if (!caseSensitiveHeader) names(d) <- tolower(names(d))
d
})
if (length(dta) > 1) do.call(rbind, dta) else dta[[1]]
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.