read | R Documentation |
Read and return an R object from data on disk, from URL, or from packages.
read(
file,
type = NULL,
header = "#",
header.max = 50L,
skip = 0L,
locale = default_locale(),
lang = getOption("data.io_lang", "en"),
lang_encoding = "UTF-8",
as_dataframe = FALSE,
as_labelled = FALSE,
comments = NULL,
package = NULL,
sidecar_file = TRUE,
fun_list = NULL,
hfun = NULL,
fun = NULL,
data,
cache_file = NULL,
method = "auto",
quiet = FALSE,
force = FALSE,
...
)
type_from_extension(file, full = FALSE)
hread_text(file, header.max, skip = 0L, locale = default_locale(), ...)
hread_xls(file, header.max, skip = 0L, locale = default_locale(), ...)
hread_xlsx(file, header.max, skip = 0L, locale = default_locale(), ...)
## S3 method for class 'subsettable_type'
x$name
## S3 method for class 'read_function_subset'
.DollarNames(x, pattern = "")
file |
The path to the file to read, or the name of the dataset to get
from an R package (in that case, you must provide the |
type |
The type (format) of data to read. |
header |
The character to use for the header and other comments. |
header.max |
The maximum of lines to consider for the header. |
skip |
The number of lines to skip at the beginning of the file. |
locale |
A readr locale object with all the data regarding required to correctly interpret country-related items. The default value matches R defaults as US English + UTF-8 encoding, and it is advised to be used as much as possible. |
lang |
The language to use (mainly for comment, label and units), but
also for factor levels or other character strings if a translation exists
and if the language is spelled with uppercase characters (e.g., |
lang_encoding |
Encoding used by R scripts for translation. They should
all be encoded as |
as_dataframe |
Deprecated: now use |
as_labelled |
Are variable converted into 'labelled' objects. This
allows to keep labels and units when the vector is manipulated, but it can
lead to incompatibilities with some R code (hence, it is |
comments |
Comments to add in the created object. |
package |
The package where to look for the dataset. If |
sidecar_file |
If |
fun_list |
The table with correspondence of the types, read, and write functions. |
hfun |
The function to read the header (lines starting with a special
mark, usually '#' at the beginning of the file). This function must have
the same arguments as |
fun |
The function to delegate reading of the data. If |
data |
A synonym to |
cache_file |
The path to a local file to use as a cache when file is
downloaded (http://, https://, ftp://, or file:// protocols). If cache_file
already exists, data are read from this cache, except if |
method |
The downloading method used ( |
quiet |
In case we have to download files, do it silently ( |
force |
If |
... |
Further arguments passed to the function |
full |
Do we return the full extension, like |
x |
A |
name |
The value to use for the |
pattern |
A regular expression to list matching names. |
read()
allows for a unique entry point to read various kinds of
data, but it delegates the actual work to various other functions dispatched
across several R packages. See getOption("read_write")
.
An R object with the data (its class depends on the data being read).
Philippe Grosjean phgrosjean@sciviews.org
data_types()
, write()
, read_csv()
# Use of read() as a more flexible substitute to data() (can change dataset
# name and syntax more similar to read R datasets and datasets from files)
read() # List all available datasets in your installed version of R
# List datasets in one particular package
read(package = "data.io")
# Read one dataset from this package, possibly changing its name
(urchin <- read("urchin_bio", package = "data.io"))
# Same, but using labels in French
(urchin <- read("urchin_bio", package = "data.io", lang = "fr"))
# ... and also the levels of factors in French (note: uppercase FR)
(urchin <- read("urchin_bio", package = "data.io", lang = "FR"))
# Read one dataset from another package, but with labels and comments
data(iris) # The R way: you got the initial datasets
# Same result, using read()
ir2 <- read("iris", package = "datasets", lang = NULL)
# ir2 records that it comes from datasets::iris
attr(comment(ir2), "src")
# otherwise, it is identical to iris, except is may be a data.table or a
# tibble, depending on user preferences
comment(ir2) <- NULL
# Force coercion into a data.frame
ir2 <- svBase::as_dtf(ir2)
identical(iris, ir2)
# More interesting: you can get an enhanced version of iris with read():
# (note that variable names ar in snake-case now!)
(ir3 <- read("iris", package = "datasets"))
class(ir3)
comment(ir3)
ir3$sepal_length
# ... and you can get it in French too!
(ir_fr <- read("iris", package = "datasets", lang = "fr"))
class(ir_fr)
comment(ir_fr)
ir_fr$sepal_length
# Sometimes, datasets are more deeply reworked. For instance, trees has
# variables in imperial units (in, ft, and cubic ft), but it is automatically
# reworked by read() into metric variables (m or m^3):
data(trees)
head(trees)
(trees2 <- read("trees", package = "datasets"))
comment(trees2)
trees2$volume
# Read from a Github Gist (need to specify the type here!)
# (ble <- read$csv("http://tinyurl.com/Biostat-Ble"))
# Various versions of the famous iris dataset
(iris <- read(data_example("iris.csv")))
(iris <- read(data_example("iris.csv.zip")))
(iris <- read(data_example("iris.csv.gz")))
(iris <- read(data_example("iris.csv.bz2")))
(iris <- read(data_example("iris.tsv")))
(iris <- read(data_example("iris.xls")))
(iris <- read(data_example("iris.xlsx")))
(iris <- read(data_example("iris.rds"))) # Does not tranform into tibble!
#(iris <- read(data_example("iris.syd"))) ##
#(iris <- read(data_example("iris.csvy"))) ##
#(iris <- read(data_example("iris.csvy.zip"))) ##
# A file with an header both in English (default) and in French
(iris <- read(data_example("iris_short_header.csv")))
(iris_fr <- read(data_example("iris_short_header.csv"), lang = "fr"))
# Headers are also recognized in xls/xlsx files
(iris_fr <- read(data_example("iris_short_header.xls"), lang = "fr"))
# Read a file with a sidecar file (same name + '.R')
(iris <- read(data_example("iris_sidecar.csv"))) # lang = "en" by default
(iris <- read(data_example("iris_sidecar.csv"), lang = "EN")) # Full lang
(iris <- read(data_example("iris_sidecar.csv"), lang = "en_us")) # US (in)
(iris <- read(data_example("iris_sidecar.csv"), lang = "fr")) # French
(iris <- read(data_example("iris_sidecar.csv"), lang = "FR_BE")) # Belgian
(iris <- read(data_example("iris_sidecar.csv"), lang = NULL)) # No labels
# Require the feather package
#(iris <- read(data_example("iris.feather"))) # Not available for all Win
# Challenging datasets from the readr package
library(readr)
(mtcars <- read(readr_example("mtcars.csv")))
(mtcars <- read(readr_example("mtcars.csv.zip")))
(mtcars <- read(readr_example("mtcars.csv.bz2")))
(challenge <- read(readr_example("challenge.csv"), guess_max = 1001))
(massey <- read(readr_example("massey-rating.txt")))
# By default, the type cannot be guessed from the extension
# This is a space-separated vaules file (ssv)
(massey <- read(readr_example("massey-rating.txt"), type = "ssv"))
# or ...
(massey <- read$ssv(readr_example("massey-rating.txt")))
(epa <- read$ssv(readr_example("epa78.txt"), col_names = FALSE))
(example_log <- read(readr_example("example.log")))
# There are different ways to specify columns for fixed-width files (fwf)
# See ?read_fwf in package readr
(fwf_sample <- read$fwf(readr_example("fwf-sample.txt"),
col_positions = fwf_cols(name = 20, state = 10, ssn = 12)))
# Various examples of Excel datasets from readxl
library(readxl)
(xl <- read(readxl_example("datasets.xls")))
(xl <- read(readxl_example("datasets.xlsx"), sheet = "mtcars"))
(xl <- read(readxl_example("datasets.xlsx"), sheet = 3))
# Accomodate a column with disparate types via col_type = "list"
(clip <- read(readxl_example("clippy.xls"), col_types = c("text", "list")))
(clip <- read(readxl_example("clippy.xlsx"), col_types = c("text", "list")))
tibble::deframe(clip)
# Read from a specific range in a sheet
(xl <- read(readxl_example("datasets.xlsx"), range = "mtcars!B1:D5"))
(deaths <- read(readxl_example("deaths.xls"), range = cell_rows(5:15)))
(deaths <- read(readxl_example("deaths.xlsx"), range = cell_rows(5:15)))
(type_me <- read(readxl_example("type-me.xls"), sheet = "logical_coercion",
col_types = c("logical", "text")))
(type_me <- read(readxl_example("type-me.xlsx"), sheet = "numeric_coercion",
col_types = c("numeric", "text")))
(type_me <- read(readxl_example("type-me.xls"), sheet = "date_coercion",
col_types = c("date", "text")))
(type_me <- read(readxl_example("type-me.xlsx"), sheet = "text_coercion",
col_types = c("text", "text")))
(xl <- read(readxl_example("geometry.xls"), col_names = FALSE))
(xl <- read(readxl_example("geometry.xlsx"), range = cell_rows(4:8)))
# Various examples from haven
library(haven)
haven_example <- function(path)
system.file("examples", path, package = "haven", mustWork = TRUE)
(iris2 <- read(haven_example("iris.dta"))) # Stata v. 8-14
(iris2 <- read(haven_example("iris.sav"))) # SPSS, TODO: labelled -> factor?
(pbc <- read(data_example("pbc.por"))) # SPSS, POR format
(iris2 <- read$sas(haven_example("iris.sas7bdat"))) # SAS file
(afalfa <- read(data_example("afalfa.xpt"))) # SAS transport file
# Note that where completion is available, you have a completion list of file
# format after typing read$<tab>
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.