Nothing
#' Read a CSV file exported from Qualtrics
#'
#' Reads comma separated CSV files generated by Qualtrics software. The second
#' line containing the variable labels is imported. Repetitive introductions to
#' matrix questions are automatically removed. Variable labels are stored as
#' attributes.
#'
#' @param file_name String. A CSV data file.
#' @param import_id Logical. If `TRUE`, use Qualtrics import IDs instead of
#' question IDs as column names. Defaults to `FALSE`.
#' @param strip_html Logical. If `TRUE`, then remove HTML tags from variable
#' descriptions. Defaults to `TRUE`.
#' @param time_zone String. A local timezone to determine response date values.
#' Defaults to `NULL` which corresponds to UTC time. See ["Dates and
#' Times"](https://api.qualtrics.com/) from Qualtrics for more information on
#' format.
#' @param legacy Logical. If `TRUE`, then import "legacy" format CSV files (as
#' of 2017). Defaults to `FALSE`.
#' @param add_column_map Logical. If `TRUE`, then a column map data frame will
#' be added as an attribute to the main response data frame. This column map
#' captures Qualtrics-provided metadata associated with the response download,
#' such as an item description and internal ID's. Defaults to `TRUE`.
#' @param add_var_labels Logical. If `TRUE`, then the item description from each
#' variable (equivalent to the one in the column map) will be added as a
#' "label" attribute using [sjlabelled::set_label()]. Useful for reference as
#' well as cross-compatibility with other stats packages (e.g., Stata, see
#' documentation in `sjlabelled`). Defaults to `TRUE`.
#' @param col_types Optional. This argument provides a way to manually overwrite
#' column types that may be incorrectly guessed. Takes a [readr::cols()]
#' specification. See example below and [readr::cols()] for formatting
#' details. Defaults to `NULL`.
#'
#' @importFrom sjlabelled set_label
#' @importFrom jsonlite fromJSON
#' @importFrom purrr map
#' @importFrom purrr imap
#' @importFrom purrr map_dfr
#' @importFrom tidyr unite
#' @importFrom tidyr everything
#' @importFrom tidyr pivot_longer
#' @importFrom tidyr pivot_wider
#' @importFrom stringr str_split_fixed
#' @importFrom readr read_csv
#' @importFrom readr locale
#' @importFrom readr type_convert
#' @importFrom dplyr select
#' @importFrom dplyr slice
#' @importFrom rlang set_names
#'
#' @return A data frame. Variable labels are stored as attributes. They are not
#' printed on the console but are visibile in the RStudio viewer.
#' @export
#' @examples
#' \dontrun{
#' # Generic use of read_survey()
#' df <- read_survey("<YOUR-PATH-TO-CSV-FILE>")
#' }
#' # Example using current data format
#' file <- system.file("extdata", "sample.csv", package = "qualtRics")
#' df <- read_survey(file)
#'
#' # Example using legacy data format
#' file <- system.file("extdata", "sample_legacy.csv", package = "qualtRics")
#' df <- read_survey(file, legacy = TRUE)
#'
#' # Example changing column type
#' file <- system.file("extdata", "sample.csv", package = "qualtRics")
#' # Force EndDate to be a string
#' df <- read_survey(file, col_types = readr::cols(EndDate = readr::col_character()))
#'
read_survey <-
function(file_name,
strip_html = TRUE,
import_id = FALSE,
time_zone = NULL,
legacy = FALSE,
add_column_map = TRUE,
add_var_labels = TRUE,
col_types = NULL
) {
# START UP: CHECK ARGUMENTS PASSED BY USER ----
# Ignore import_id if legacy = TRUE
if (import_id & legacy) {
rlang::warn(c("Using import IDs as column names is not supported for legacy response files.",
"Defaulting to user-defined variable names",
"Set import_id = FALSE in future."))
import_id = FALSE
}
# Check time_zone and set to system timezone if left unspecified
time_zone <- checkarg_time_zone(time_zone)
# check if file at file_name exists:
checkarg_file_name(file_name)
# Check col_types argument is the right type:
checkarg_col_types(col_types)
# Check other arguments:
checkarg_isboolean(strip_html)
checkarg_isboolean(import_id)
checkarg_isboolean(legacy)
checkarg_isboolean(add_column_map)
checkarg_isboolean(add_var_labels)
# READ RAW DATA ----
# import raw data excluding variable names (row 1)
# variable JSON (row 2, v3 only)
# and descriptions (row 3, or 2 if legacy)
rawdata <-
suppressMessages(
readr::read_csv(
file = file_name,
col_types = readr::cols(.default = readr::col_character()),
na = c("")
))
# If Qualtrics adds an empty column at the end, remove it
if (grepl(",$", readLines(file_name, n = 1))) {
rawdata <- rawdata[, 1:(ncol(rawdata) - 1)]
}
data <-
process_raw_survey(
rawdata = rawdata,
import_id = import_id,
time_zone = time_zone,
col_types = col_types,
add_column_map = add_column_map,
add_var_labels = add_var_labels,
strip_html = strip_html,
legacy = legacy
)
}
process_raw_survey <-
function(
rawdata,
import_id,
time_zone,
col_types,
add_column_map,
add_var_labels,
strip_html,
legacy = FALSE
){
# Identify metadata rows - first row for legacy, first 2 rows for modern:
if(legacy){
header_rows <- 1
} else {
header_rows <- 1:2
}
# CREATE RESPONSE DATA FRAME ----
# Remove metadata rows:
responsedata <-
dplyr::slice(rawdata, -header_rows)
# Infer data types from data:
responsedata <-
readr::type_convert(
responsedata,
locale = readr::locale(tz = time_zone),
col_types = col_types,
na = character()
)
# GENERATE COLUMN MAP ----
# Take the first two rows (or just the first if legacy)
colmapdata <-
dplyr::slice(rawdata, header_rows)
# Create the column map:
if(!legacy){
# Add a reference column:
colmapdata <-
dplyr::mutate(colmapdata,
metadata_type = c("description", "JSON"))
# Pivot twice to create the column:
col_map <-
tidyr::pivot_longer(colmapdata,
-metadata_type,
names_to = "qname")
col_map <-
tidyr::pivot_wider(col_map,
names_from = "metadata_type",
values_from = "value")
# Process the JSON column into other columns, dropping the raw JSON:
col_map <-
dplyr::mutate(col_map,
purrr::map_dfr(JSON, jsonlite::fromJSON),
.keep = "unused")
# If choiceId does not exist, create it for consistency:
if(!rlang::has_name(col_map, "choiceId")){
col_map$choiceId <- NA
}
} else {
# If legacy, just create a simple column map with name and description:
col_map <-
pivot_longer(colmapdata,
tidyr::everything(),
names_to = "qname",
values_to = "description")
}
# If desired, clean variable labels in column map
if (strip_html) {
col_map$description <-
remove_html(col_map$description)
}
# New columns in column map for main and sub questions from description:
col_map <-
dplyr::mutate(col_map,
tibble::as_tibble(
# Separate out descriptions based on whether there's a " - " separator
# Only separates a single time
stringr::str_split_fixed(description, "\\s-\\s", n = 2),
# Add names
.name_repair = ~c("main", "sub")
),
# Store after variable description:
.after = description
)
# ASSIGNING QID'S IF import_id = TRUE ----
if (import_id) {
# Rename variables to be "ImportId_ChoiceId" rather than user-defined variable names:
qid_names <-
tidyr::unite(col_map,
col = qidnames,
c(ImportId, choiceId),
sep = "_",
na.rm = TRUE)[["qidnames"]]
# Change the response data and column map to use these QID names:
names(responsedata) <- qid_names
col_map$qname <- qid_names
}
# FINAL CLEANUP ----
# Add descriptions to data as attribute "label"
if(add_var_labels){
responsedata <-
sjlabelled::set_label(responsedata, col_map$description)
}
# Add column map:
if(add_column_map){
attr(responsedata, "column_map") <- col_map
}
# RETURN ----
return(responsedata)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.