import: Import Data Sets

View source: R/import_export.R

importR Documentation

Import Data Sets

Description

These functions can be used to import data, from local or remote paths, or from the internet. They work closely with the certeprojects package to support Microsoft Planner project numbers. To support row names and older R versions, ⁠import_*()⁠ functions return plain data.frames, not e.g. tibbles.

Usage

import(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  encoding = "UTF-8",
  ...
)

import_rds(filename, project_number = project_get_current_id(ask = FALSE), ...)

import_xlsx(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  sheet = 1,
  range = NULL,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_excel(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  sheet = 1,
  range = NULL,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_csv(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_csv2(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ",",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_tsv(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_txt(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  sep = "\t",
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ",",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_sav(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_spss(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_feather(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  col_select = everything(),
  ...
)

import_clipboard(
  sep = "\t",
  header = TRUE,
  startrow = 1,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_mail_attachment(
  search = "hasattachment:yes",
  search_subject = NULL,
  search_from = NULL,
  search_when = NULL,
  search_attachment = NULL,
  folder = certemail::get_inbox_name(account = account),
  n = 5,
  sort = "received desc",
  account = certemail::connect_outlook(),
  auto_transform = TRUE,
  sep = ",",
  ...
)

import_url(
  url,
  auto_transform = TRUE,
  sep = ",",
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_teams(
  full_teams_path = NULL,
  account = connect_teams(),
  auto_transform = TRUE,
  sep = ",",
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8"
)

Arguments

filename

the full path of the file to be imported, will be parsed to a character, can also be a remote location (from http/https/ftp/ssh, GitHub/GitLab)

project_number

a Microsoft Planner project number

auto_transform

transform the imported data with auto_transform()

encoding

Default encoding. This only affects how the file is read.

...

arguments passed on to methods

sheet

Excel sheet to import, defaults to first sheet

range

a cell range to read from, allows typical Excel ranges such as "B3:D87" and "Budget!B2:G14"

datenames

language of the date names, such as weekdays and months

dateformat

expected date format, will be coerced with format_datetime()

timeformat

expected time format, will be coerced with format_datetime()

decimal.mark

separator for decimal numbers

big.mark

separator for thousands

timezone

expected time zone

na

values to interpret as NA

skip

number of first rows to skip

sep

character to separate values in a row

col_select

columns to select, supports the tidyselect language)

header

use first row as header

startrow

first row to start importing

search

an ODATA filter, ignores sort and defaults to search only mails with attachments

search_subject

a character, equal to search = "subject:(search_subject)", case-insensitive

search_from

a character, equal to search = "from:(search_from)", case-insensitive

search_when

a Date vector of size 1 or 2, equal to search = "received:date1..date2", see Examples

search_attachment

a character to use a regular expression for attachment file names

folder

email folder name to search in, defaults to Inbox of the current user by calling get_inbox_name()

n

maximum number of emails to search

sort

initial sorting

account

a Teams account from Azure or an AzureAuth Microsoft 365 token, e.g. retrieved with certeprojects::connect_teams()

url

remote location of any data set, can also be a (non-raw) GitHub/GitLab link

full_teams_path

a full path in Teams, including the Team name and the channel name. Leave blank to use interactive mode, which allows file/folder picking from a list in the console.

Details

Importing any unlisted filetype using import() requires the rio package to be installed.

Importing an Excel file using import_xlsx() or import_excel() requires the readxl package to be installed.

Importing an SPSS file using import_sav() or import_spss() requires the haven package to be installed.

Importing a Feather file using import_feather() requires the arrow package to be installed. Apache Feather provides efficient binary columnar serialization for data sets, enabling easy sharing data across data analysis languages (such as between Python and R). Use the col_select argument (which supports the tidyselect language) for specific data selection to improve importing speed.

Importing the clipboard using import_clipboard() requires the clipr package to be installed.

Importing mail attachments using import_mail_attachment() requires the certemail package to be installed. It calls download_mail_attachment() internally and saves the attachment to a temporary folder. For all folder names, run: sapply(certemail::connect_outlook()$list_folders(), function(x) x$properties$displayName).

The import_url() function tries to download the file first, after which it will be imported using the appropriate ⁠import_*()⁠ function.

The import_teams() function uses certeprojects::teams_download_file() to provide an interactive way to select a file in any Team, to download the file, and to import the file using the appropriate ⁠import_*()⁠ function.

See Also

export()

Examples

export_csv(iris)
import_csv("iris") |> head()

# the above is equal to:
# export(iris, "iris.csv")
# import("iris.csv") |> head()


# row names are also supported
export_csv(mtcars)
import_csv("mtcars") |> head()


# Apache's Feather format is column-based
# and allow for specific and fast file reading
library(dplyr, warn.conflicts = FALSE)
starwars |> export_feather()
import("starwars.feather",
       col_select = starts_with("h")) |> 
  head()
  

# (cleanup)
file.remove("iris.csv")
file.remove("mtcars.csv")
file.remove("starwars.feather")

## Not run: 

# ---- Microsoft Teams support -------------------------------------------

# IMPORTING

# import from Teams by picking a folder interactively from any Team
x <- import_teams()

# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
x <- import_teams(full_teams_path = "MyTeam/MyChannel/MyFolder/MyFile.xlsx")


# EXPORTING

# export to Teams by picking a folder interactively from any Team
mtcars |> export_teams()

# the default is RDS, but you can set `filename` to specify yourself
mtcars |> export_teams("mtcars.xlsx")

# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
mtcars |> export_teams("mtcars.xlsx", full_teams_path = "MyTeam/MyChannel/MyFolder")
mtcars |> export_teams(full_teams_path = "MyTeam/MyChannel/MyFolder")


## End(Not run)

certe-medical-epidemiology/certetoolbox documentation built on April 17, 2025, 3:24 a.m.