Nothing
#' Read SAS files
#'
#' `read_sas()` supports both sas7bdat files and the accompanying sas7bcat files
#' that SAS uses to record value labels.
#'
#' @param data_file,catalog_file Path to data and catalog files. The files are
#' processed with [readr::datasource()].
#' @param encoding,catalog_encoding The character encoding used for the
#' `data_file` and `catalog_encoding` respectively. A value of `NULL` uses the
#' encoding specified in the file; use this argument to override it if it is
#' incorrect.
#' @inheritParams tibble::as_tibble
#' @param col_select One or more selection expressions, like in
#' [dplyr::select()]. Use `c()` or `list()` to use more than one expression.
#' See `?dplyr::select` for details on available selection options. Only the
#' specified columns will be read from `data_file`.
#' @param skip Number of lines to skip before reading data.
#' @param n_max Maximum number of lines to read.
#' @param cols_only `r lifecycle::badge("deprecated")` `cols_only` is no longer
#' supported; use `col_select` instead.
#' @return A tibble, data frame variant with nice defaults.
#'
#' Variable labels are stored in the "label" attribute of each variable. It is
#' not printed on the console, but the RStudio viewer will show it.
#'
#' `write_sas()` returns the input `data` invisibly.
#' @export
#' @examples
#' path <- system.file("examples", "iris.sas7bdat", package = "haven")
#' read_sas(path)
read_sas <- function(data_file, catalog_file = NULL,
encoding = NULL, catalog_encoding = encoding,
col_select = NULL, skip = 0L, n_max = Inf, cols_only = deprecated(),
.name_repair = "unique") {
if (lifecycle::is_present(cols_only)) {
lifecycle::deprecate_warn("2.2.0", "read_sas(cols_only)", "read_sas(col_select)")
stopifnot(is.character(cols_only)) # used to only work with a char vector
# guarantee a quosure to keep NULL and tidyselect logic clean downstream
col_select <- quo(c(!!!cols_only))
} else {
col_select <- enquo(col_select)
}
if (is.null(encoding)) {
encoding <- ""
}
cols_skip <- skip_cols(read_sas, !!col_select, data_file, encoding = encoding)
n_max <- validate_n_max(n_max)
spec_data <- readr::datasource(data_file)
if (is.null(catalog_file)) {
spec_cat <- list()
} else {
spec_cat <- readr::datasource(catalog_file)
}
switch(class(spec_data)[1],
source_file = df_parse_sas_file(spec_data, spec_cat, encoding = encoding, catalog_encoding = catalog_encoding, cols_skip = cols_skip, n_max = n_max, rows_skip = skip, name_repair = .name_repair),
source_raw = df_parse_sas_raw(spec_data, spec_cat, encoding = encoding, catalog_encoding = catalog_encoding, cols_skip = cols_skip, n_max = n_max, rows_skip = skip, name_repair = .name_repair),
cli_abort("This kind of input is not handled.")
)
}
#' Write SAS files
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#' `write_sas()` creates sas7bdat files. Unfortunately the SAS file format is
#' complex and undocumented, so `write_sas()` is unreliable and in most cases
#' SAS will not read files that it produces.
#'
#' [write_xpt()] writes files in the open SAS transport format, which has
#' limitations but will be reliably read by SAS.
#'
#' @param data Data frame to write.
#' @param path Path to file where the data will be written.
#' @keywords internal
#' @export
write_sas <- function(data, path) {
lifecycle::deprecate_warn("2.5.2", "write_sas()", "write_xpt()")
validate_sas(data)
data_out <- adjust_tz(data)
write_sas_(data_out, normalizePath(path, mustWork = FALSE))
invisible(data)
}
#' Read and write SAS transport files
#'
#' The SAS transport format is a open format, as is required for submission
#' of the data to the FDA.
#'
#' @inheritParams read_spss
#' @return A tibble, data frame variant with nice defaults.
#'
#' Variable labels are stored in the "label" attribute of each variable.
#' It is not printed on the console, but the RStudio viewer will show it.
#'
#' If a dataset label is defined, it will be stored in the "label" attribute
#' of the tibble.
#'
#' `write_xpt()` returns the input `data` invisibly.
#' @export
#' @examples
#' tmp <- tempfile(fileext = ".xpt")
#' write_xpt(mtcars, tmp)
#' read_xpt(tmp)
read_xpt <- function(file, col_select = NULL, skip = 0, n_max = Inf, .name_repair = "unique") {
cols_skip <- skip_cols(read_xpt, {{ col_select }}, file)
n_max <- validate_n_max(n_max)
spec <- readr::datasource(file)
switch(class(spec)[1],
source_file = df_parse_xpt_file(spec, cols_skip, n_max, skip, name_repair = .name_repair),
source_raw = df_parse_xpt_raw(spec, cols_skip, n_max, skip, name_repair = .name_repair),
cli_abort("This kind of input is not handled.")
)
}
#' @export
#' @rdname read_xpt
#' @param version Version of transport file specification to use: either 5 or 8.
#' @param name Member name to record in file. Defaults to file name sans
#' extension. Must be <= 8 characters for version 5, and <= 32 characters
#' for version 8.
#' @param label Dataset label to use, or `NULL`. Defaults to the value stored in
#' the "label" attribute of `data`.
#'
#' Note that although SAS itself supports dataset labels up to 256 characters
#' long, dataset labels in SAS transport files must be <= 40 characters.
#' @param adjust_tz Stata, SPSS and SAS do not have a concept of time zone,
#' and all [date-time] variables are treated as UTC. `adjust_tz` controls
#' how the timezone of date-time values is treated when writing.
#'
#' * If `TRUE` (the default) the timezone of date-time values is ignored, and
#' they will display the same in R and Stata/SPSS/SAS, e.g.
#' `"2010-01-01 09:00:00 NZDT"` will be written as `"2010-01-01 09:00:00"`.
#' Note that this changes the underlying numeric data, so use caution if
#' preserving between-time-point differences is critical.
#' * If `FALSE`, date-time values are written as the corresponding UTC value,
#' e.g. `"2010-01-01 09:00:00 NZDT"` will be written as
#' `"2009-12-31 20:00:00"`.
write_xpt <- function(data, path, version = 8, name = NULL, label = attr(data, "label"), adjust_tz = TRUE) {
if (!version %in% c(5, 8)) {
cli_abort("SAS transport file version {.val {version}} is not currently supported.")
}
if (is.null(name)) {
name <- tools::file_path_sans_ext(basename(path))
}
name <- validate_xpt_name(name, version)
label <- validate_xpt_label(label)
data_out <- validate_sas(data)
if (isTRUE(adjust_tz)) {
data_out <- adjust_tz(data_out)
}
write_xpt_(
data_out,
normalizePath(path, mustWork = FALSE),
version = version,
name = name,
label = label
)
invisible(data)
}
# Validation --------------------------------------------------------------
validate_sas <- function(data) {
stopifnot(is.data.frame(data))
invisible(data)
}
validate_xpt_name <- function(name, version, call = caller_env()) {
if (version == 5) {
if (nchar(name) > 8) {
cli_abort("{.arg name} must be 8 characters or fewer.", call = call)
}
} else {
if (nchar(name) > 32) {
cli_abort("{.arg name} must be 32 characters or fewer.", call = call)
}
}
name
}
validate_xpt_label <- function(label, call = caller_env()) {
if (!is.null(label)) {
stopifnot(is.character(label), length(label) == 1)
if (nchar(label) > 40) {
cli_abort("{.arg label} must be 40 characters or fewer.", call = call)
}
}
label
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.