Nothing
#' Read Data File in Table format, SPSS, Excel, or Stata DTA File
#'
#' This function reads a (1) data file in CSV (\code{.csv}), DAT (\code{.dat}),
#' or TXT (\code{.txt}) format using the \code{fread} function from the \pkg{data.table}
#' package, (2) SPSS file (\code{.sav}) using the \code{read.sav} function, (3)
#' Excel file (\code{.xlsx}) using the \code{read.xlsx} function, or a (4) Stata
#' DTA file (\code{.dta}) using the \code{read.dta} function in the \pkg{misty}
#' package.
#'
#' @param file a character string indicating the name of the data file
#' with the file extension \code{.csv}, \code{.dat},
#' \code{.txt}, \code{.sav}, \code{.xlsx}, or \code{.dta}.
#' Note that the function will select an appropriate
#' \code{read}-function depending on the file extension.
#' @param sheet a character string indicating the name of a Excel sheet
#' or a numeric value indicating the position of the Excel
#' sheet to read. By default the first sheet will be read
#' when reading an Excel file (\code{.xlsx}).
#' @param header logical: if \code{TRUE} (default), the first row is used
#' as column names when reading an Excel file (\code{.xlsx}),
#' if \code{FALSE} default names are used. A character vector
#' giving a name for each column can also be used.
#' @param select a character vector of column names or numeric vector to
#' keep, drop the rest. See the help page of the \code{fread}
#' function in the \pkg{data.table} package.
#' @param drop a character vector of column names or numeric vector
#' to drop, keep the rest.
#' @param sep a character string indicating the separator between
#' columns for the \code{fread} function when reading data
#' in CSV (\code{.csv}), DAT (\code{.dat}), or TXT (\code{.txt})
#' format.
#' @param dec a character string indicating the decimal separator
#' for the \code{fread} function when reading data in CSV
#' (\code{.csv}), DAT (\code{.dat}), or TXT (\code{.txt})
#' format.
#' @param use.value.labels logical: if \code{TRUE}, variables with value labels
#' are converted into factors.
#' @param use.missings logical: if \code{TRUE} (default), user-defined missing
#' values are converted into NAs.
#' @param na.strings a character vector of strings which are to be interpreted
#' as NA values.
#' @param stringsAsFactors logical: if \code{TRUE}, character vectors are converted
#' to factors.
#' @param formats logical: if \code{TRUE}, variable formats are shown in
#' an attribute for all variables.
#' @param label logical: if \code{TRUE}, variable labels are shown in
#' an attribute for all variables.
#' @param labels logical: if \code{TRUE}, value labels are shown in an
#' attribute for all variables.
#' @param missing logical: if \code{TRUE}, value labels for user-defined
#' missings are shown in an attribute for all variables.
#' @param widths logical: if \code{TRUE}, widths are shown in an attribute
#' for all variables.
#' @param as.data.frame logical: if \code{TRUE} (default), function returns a
#' regular data frame; if \code{FALSE} function returns
#' a tibble or data.table.
#' @param encoding a character string indicating the encoding, i.e.,
#' \code{"unknown"}, \code{"UTF-8"}, or \code{"Latin-1"}
#' (default).
#' @param check logical: if \code{TRUE} (default), argument specification
#' is checked.
#'
#' @author
#' Takuya Yanagida
#'
#' @seealso
#' \code{\link{read.sav}}, \code{\link{read.xlsx}}, \code{\link{read.dta}},
#' \code{\link{read.mplus}}
#'
#' @references
#' Barrett, T., Dowle, M., Srinivasan, A., Gorecki, J., Chirico, M., Hocking, T.,
#' & Schwendinger, B. (2024). data.table: Extension of 'data.frame'. R package
#' version 1.16.0. \url{https://CRAN.R-project.org/package=data.table}
#'
#' Wickham H, Miller E, Smith D (2023). \emph{haven: Import and Export 'SPSS',
#' 'Stata' and 'SAS' Files}. R package version 2.5.3.
#' \url{https://CRAN.R-project.org/package=haven}
#'
#' @return
#' Returns a data frame, tibble, or data table.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Read CSV data file
#' dat <- read.data("CSV_Data.csv")
#'
#' # Read DAT data file
#' dat <- read.data("DAT_Data.dat")
#'
#' # Read TXT data file
#' dat <- read.data("TXT_Data.txt")
#'
#' # Read SPSS data file
#' dat <- read.data("SPSS_Data.sav")
#'
#' # Read Excel data file
#' dat <- read.data("Excel_Data.xlsx")
#'
#' # Read Stata data file
#' dat <- read.data("Stata_Data.dta")
#' }
read.data <- function(file, sheet = NULL, header = TRUE, select = NULL, drop = NULL,
sep = "auto", dec = "auto", use.value.labels = FALSE,
use.missings = TRUE, na.strings = c("NA", ""), stringsAsFactors = FALSE,
formats = FALSE, label = FALSE, labels = FALSE,
missing = FALSE, widths = FALSE, as.data.frame = TRUE,
encoding = c("unknown", "UTF-8", "Latin-1"), check = TRUE) {
#_____________________________________________________________________________
#
# Initial Check --------------------------------------------------------------
# Check input 'file'
if (isTRUE(missing(file))) { stop("Please specify a character string indicating the name of the data file for the argument 'file'", call. = FALSE) }
# File extension
if (isTRUE(!grepl("\\.", file))) { stop("Please specify a data file with extension \"csv\", \"dat\", \"txt\", \"sav\", \"xlsx\", or \"dts\" for the argument 'file'.", call. = FALSE) }
assign("file.exten", rev(unlist(strsplit(file, "\\.")))[1L]) |>
(\(y) if (isTRUE(!y %in% c("csv", "dat", "txt", "dta", "sav", "xlsx"))) { stop("Data file with extension ", dQuote(y), " is not support by this function.", call. = FALSE) })()
# Check if 'file' exists
if (isTRUE(!file.exists(file))) { stop(paste0("Unable to open the data file: ", sQuote(file), " does not exist."), call. = FALSE) }
#_____________________________________________________________________________
#
# Input Check ----------------------------------------------------------------
# Check inputs
.check.input(logical = c("header", "use.value.labels", "use.missings", "stringsAsFactors", "formats", "label", "labels", "missing", "widths", "as.data.frame"),
character = list(file = 1L, sep = 1L, dec = 1L),
s.character = list(encoding = c("unknown", "UTF-8", "Latin-1")), envir = environment(), input.check = check)
# Additional checks
if (isTRUE(check)) {
if (isTRUE(file.exten %in% c("csv", "dat", "txt"))) {
# R package data.table
if (isTRUE(!requireNamespace("data.table", quietly = TRUE))) { stop("Package \"data.table\" is needed for this function to work, please install it.", call. = FALSE ) }
} else {
# R package haven
if (isTRUE(!requireNamespace("haven", quietly = TRUE))) { stop("Package \"haven\" is needed for this function to work, please install it.", call. = FALSE ) }
}
}
#_____________________________________________________________________________
#
# Arguments ------------------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## encoding Argument ####
if (isTRUE(all(c("unknown", "UTF-8", "Latin-1") %in% encoding))) { encoding <- "Latin-1" }
#_____________________________________________________________________________
#
# Main Function --------------------------------------------------------------
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## CSV, DAT, or TXT file ####
if (isTRUE(file.exten %in% c("csv", "dat", "txt"))) {
if (isTRUE(sep == "auto" && dec == "auto")) {
# Separator ";"
if (isTRUE(all(suppressWarnings(grepl(";", suppressWarnings(readLines(file, n = 10L)), useBytes = TRUE))))) {
object <- data.table::fread(file = file, sep = ";", dec = ",", select = select, drop = drop, na.strings = na.strings, stringsAsFactors = stringsAsFactors, data.table = !as.data.frame, encoding = encoding)
# Separator ","
} else if (isTRUE(all(suppressWarnings(grepl(",", suppressWarnings(readLines(file, n = 10L)), useBytes = TRUE))))) {
object <- data.table::fread(file = file, sep = ",", dec = ".", select = select, drop = drop, na.strings = na.strings, stringsAsFactors = stringsAsFactors, data.table = !as.data.frame, encoding = encoding)
# Separator not ";" and ","
} else {
object <- data.table::fread(file = file, sep = sep, dec = dec, select = select, drop = drop, na.strings = na.strings, stringsAsFactors = stringsAsFactors, data.table = !as.data.frame, encoding = encoding)
}
} else {
object <- data.table::fread(file = file, sep = sep, dec = dec, select = select, drop = drop, na.strings = na.strings, stringsAsFactors = stringsAsFactors, data.table = !as.data.frame, encoding = encoding)
}
} else {
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## SPSS file ####
switch(file.exten, "sav" = {
object <- misty::read.sav(file = file, use.value.labels = use.value.labels, use.missings = use.missings, formats = formats, label = label, labels = labels, missing = missing, widths = widths, as.data.frame = as.data.frame, check = check)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Excel file ####
}, "xlsx" = {
object <- misty::read.xlsx(file = file, sheet = sheet, header = header, na = na.strings, as.data.frame = as.data.frame, check = TRUE)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Stata file ####
}, "dta" = {
object <- misty::read.dta(file = file, use.value.labels = use.value.labels, formats = formats, label = label, labels = labels, missing = missing, widths = widths, as.data.frame = as.data.frame, check = check)
})
}
#_____________________________________________________________________________
#
# Output ---------------------------------------------------------------------
return(object)
}
#_______________________________________________________________________________
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.