# Functions for saving and loading OS-compliant file types ----
#' Write a data frame to a delimited file, and save typing information in a separate json file
#'
#' @param x A data frame or tibble to write to disk.
#' @param path File or connection to write to. (path is now deprecated in readr v1.4 for OpenSAFELY currently has older version, so use path for now)
#' @param suffix The suffix used in the name of the json file, to be appended to the delimited file name. Defaults to `""` (no suffix), so that the file name is the same as the delimited file name (excluding filetype extensions).
#' @param delim Delimiter used to separate values.
#' @param na String used for missing values. Defaults to `"NA"`. Missing values will never be quoted; strings with the same value as `na` will always be quoted.
#' @param quote_escape The type of escaping to use for quoted values, one of "`double`", "`backslash`" or "`none`". You can also use `FALSE`, which is equivalent to "`none`". The default is "`double`", which is expected format for Excel.
#' @param eol The end of line character to use. Most commonly either "`\n`" for Unix style newlines, or "`\r\n`" for Windows style newlines.
#' @return Returns the input invisibly.
#'
#' @details Based on the [readr::write_delim] function.
#' Additionally, this function saves a json file containing typing info for the data frame,
#' which can be used to re-type the data when re-imported into R.
#' Some further [readr::write_delim] options are deliberately unavailable as they won't make sense for files intended for re-importing.
#' Datetime and time classes are not supported.
#' @export
writetype_delim <- function(
x,
path, ## note OS readr version uses old `path` argument not `file`, so use `path` for compatibility
suffix = "",
delim = " ",
na = "NA",
quote_escape = "double",
eol = "\n"
){
concise_lookup <- tibble::tribble(
~concise, ~col_type, ~class, ~type,
"c", "character", "character", "character",
"f", "factor", "factor", "integer",
"d", "double", "numeric", "double",
"i", "integer", "integer", "integer",
"l", "logical", "logical", "logical",
#"n", "number", NA, NA
"D", "date", "Date", "double",
#"T", "datetime", NA, "double"
#"t", "time", NA, "double"
)
x_type <-
tibble::tibble(
col_name = names(x),
class = purrr::map_chr(x, class),
type = purrr::map_chr(x, typeof),
attributes = purrr::map(x, attributes),
levels = purrr::map(x, ~ levels(.) ),
concise = concise_lookup$concise[match(class, concise_lookup$class)],
col_type = concise_lookup$col_type[match(class, concise_lookup$class)]
)
jsonpath <- paste0(fs::path_ext_remove(path), suffix, ".json")
jsonlite::write_json(x_type, path=jsonpath, pretty=TRUE)
readr::write_delim(
x=x,
path=path,
delim=delim,
na=na,
append=FALSE,
col_names=TRUE,
quote_escape=quote_escape,
eol=eol
)
}
#' Write a data frame to a csv file, and save typing information in a separate json file
#'
#' @param x A data frame or tibble to write to disk.
#' @param path File or connection to write to. (path is now deprecated in readr v1.4 for OpenSAFELY currently has older version, so use path for now).
#' @param suffix The suffix used in the name of the json file, to be appended to the delimited file name. Defaults to `""` (no suffix), so that the file name is the same as the delimited file name (excluding filetype extensions).
#' @param na String used for missing values. Defaults to `"NA"`. Missing values will never be quoted; strings with the same value as `na` will always be quoted.
#' @param quote_escape The type of escaping to use for quoted values, one of "`double`", "`backslash`" or "`none`". You can also use `FALSE`, which is equivalent to "`none`". The default is "`double`", which is expected format for Excel.
#' @param eol The end of line character to use. Most commonly either "`\n`" for Unix style newlines, or "`\r\n`" for Windows style newlines.
#' @return Returns the input invisibly.
#'
#' @details Based on the [readr::write_delim] function.
#' Additionally, this function saves a json file containing typing info for the data frame,
#' which can be used to re-type the data when re-imported into R.
#' Datetime and time classes are not supported.
#' @export
writetype_csv <- function(
x,
path, ## note OS readr version uses old `path` argument not `file`, so use `path` for compatibility
suffix = "",
na = "NA",
quote_escape = "double",
eol = "\n"
){
writetype_delim(
x=x,
path=path, ## note OS readr version uses old `path` argument not `file`, so use `path` for compatibility
suffix=suffix,
delim=",",
na=na,
quote_escape=quote_escape,
eol=eol
)
}
#' Read a delimited file (including CSV and TSV) into a tibble, and type columns using a separate json file
#'
#' @param file Delimited file location.
#' @param suffix The suffix used in the name of the json file, which is appended to the delimited file name. Defaults to `""` (no suffix), so that the file name is the same as the delimited file name (excluding filetype extensions).
#' @param locale The locale controls defaults that vary from place to place.
#' The default locale is US-centric (like R), but you can use
#' [locale()] to create your own locale that controls things like
#' the default time zone, encoding, decimal mark, big mark, and day/month
#' names.
#' @param na Character vector of strings to interpret as missing values. Set this
#' option to `character()` to indicate no missing values.
#' @param quoted_na `r lifecycle::badge("deprecated")` Should missing values
#' inside quotes be treated as missing values (the default) or strings. This
#' parameter is soft deprecated as of readr 2.0.0.
#' @param delim Single character used to separate fields within a record.
#' @param quote Single character used to quote strings.
#' @param trim_ws Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
#' each field before parsing it?
#' @param comment A string used to identify comments. Any text after the
#' comment characters will be silently ignored.
#' @param escape_double Does the file escape quotes by doubling them?
#' i.e. If this option is `TRUE`, the value `""""` represents
#' a single quote, `\"`.
#' @param escape_backslash Does the file use backslashes to escape special
#' characters? This is more general than `escape_double` as backslashes
#' can be used to escape the delimiter character, the quote character, or
#' to add special characters like `\\n`.
#' @details Based on the [readr::read_delim] function. Requires delimited files to be saved using [osutils::writetype_delim], which will also create the json file containing the typing info.
#' Datetime and time classes are not supported.
#' @return A [tibble()].
#' @export
readtype_delim <- function(
file,
suffix = "",
delim,
quote = "\"",
escape_backslash = FALSE,
escape_double = TRUE,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
comment = "",
trim_ws = FALSE
){
jsonpath <- paste0(fs::path_ext_remove(file), suffix, ".json")
x_type <- jsonlite::read_json(jsonpath) %>%
tibble::enframe(name=NULL) %>%
tidyr::unnest_wider(value) %>%
dplyr::rowwise() %>%
dplyr::mutate(
#col_spec = list(get(paste0("col_",col_type))),
levels = list(unlist(levels)) # to change from list of lists to list of character vectors
)
x <- readr::read_delim(
file = file,
delim = delim,
col_types = paste(x_type$concise, collapse=""),
quote = quote,
escape_backslash = escape_backslash,
escape_double = escape_double,
locale = locale,
na = na,
quoted_na = quoted_na,
comment = comment,
trim_ws = trim_ws
)
factors <- dplyr::filter(x_type, class=="factor")$col_name
for (fc in factors) {
levels <- dplyr::filter(x_type, col_name==fc)$levels[[1]]
x[[fc]] <- factor(x[[fc]], levels = levels)
}
x
}
#' Read a csv file into a tibble, and type columns using a separate json file.
#'
#' @param file Delimited file location.
#' @param suffix The suffix used in the name of the json file, which is appended to the delimited file name. Defaults to `""` (no suffix), so that the file name is the same as the delimited file name (excluding filetype extensions).
#' @details Based on the [readr::read_csv] function. Requires csv files to be saved using [osutils::writetype_csv], which will also create the json file containing the typing info.
#' Datetime and time classes are not supported.
#' @inheritParams readtype_delim
#' @return A [tibble()].
#' @export
readtype_csv <- function(
file,
suffix = "",
delim,
quote = "\"",
escape_backslash = FALSE,
escape_double = TRUE,
locale = default_locale(),
na = c("", "NA"),
quoted_na = TRUE,
comment = "",
trim_ws = FALSE
){
readtype_delim(
file = file,
suffix = suffix,
delim = ",",
quote = quote,
escape_backslash = escape_backslash,
escape_double = escape_double,
locale = locale,
na = na,
quoted_na = quoted_na,
comment = comment,
trim_ws = trim_ws
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.