R/qualtrics_read_survey.R

Defines functions qualtrics_read_survey

Documented in qualtrics_read_survey

#   Download qualtrics data into R
#    Copyright (C) 2018 Jasper Ginn

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

#' Read a CSV File Exported From Qualtrics.
#'
#' \code{readSurvey}
#'Reads comma separated csvdata files generated by Qualtrics
#'software. The second line containing the variable labels is imported.
#'Repetitive introductions to matrix questions are automatically removed.
#'Variable labels are stored as attributes.
#'
#' @param file_name String. A csv data file.
#' @param stripHTML Logical. If TRUE, then remove html tags. Defaults to TRUE
#' @param legacyFormat Logical. If TRUE, then import "legacy" format csv files (as of 2017). Defaults to FALSE
#'
#' @author Adrian Brugger, Stefan Borer & Jasper Ginn
#' @importFrom utils read.csv
#' @importFrom sjlabelled set_label
#' @importFrom jsonlite fromJSON
#' @importFrom stringr str_match
#' @importFrom readr read_csv
#' @return A data frame. Variable labels are stored as attributes. They are not printed on
#' the console but are visibile in the RStudio viewer.
#' @export
#' @examples
#' \dontrun{
#' # Generic use of readSurvey()
#' my_data_frame <- readSurvey("<YOUR-PATH-TO-CSV-FILE>")
#' # Example using current data format
#' file <- system.file("extdata", "sample.csv", package = "qualtRics")
#' sample_df <- readSurvey(file)
#' # Example using legacy data format
#' file <- system.file("extdata", "sample_legacy.csv", package = "qualtRics")
#' sample_legacy_df <- readSurvey(file)
#' }

qualtrics_read_survey <- function(file_name,
                                  stripHTML = TRUE,
                                  legacyFormat = FALSE) {

  # START UP: CHECK ARGUMENTS PASSED BY USER ----

  # check if file exists
  assert_surveyFile_exists(file_name)
  # skip 2 rows if legacyFormat, else 3 when loading the data
  skipNr <- ifelse(legacyFormat, 2, 3)

  # READ DATA ----

  # import data including variable names (row 1) and variable labels (row 2)
  rawdata <- suppressMessages(readr::read_csv(file = file_name,
                             col_names = FALSE,
                             skip = skipNr))
  # Need contingency when 0 rows
  assertthat::assert_that(nrow(rawdata) > 0,
                          msg="The survey you are trying to import has no responses.") # nolint
  # Load headers
  header <- suppressMessages(readr::read_csv(file = file_name,
                            col_names = TRUE,
                            n_max = 1))

  # MANIPULATE DATA ----

  # make them data.frame's, else the factor conversion
  # in `inferDataTypes` crashes
  #rawdata <- as.data.frame(rawdata)
  #header <- as.data.frame(header)
  # Add names
  names(rawdata) <- names(header)

  # If Qualtrics adds an empty column at the end, remove it
  if(grepl(",$", readLines(file_name, n = 1))) {
    header <- header[, 1:(ncol(header)-1)]
    rawdata <- rawdata[, 1:(ncol(rawdata)-1)]
  }
  # extract second row, remove it from df
  secondrow <- unlist(header)
  row.names(rawdata) <- NULL

  # Clean variable labels
  if(stripHTML) {
    # weird regex to strip HTML tags, leaving only content
    # https://www.r-bloggers.com/htmltotext-extracting-text-from-html-via-xpath/ # nolint
    pattern <- "</?\\w+((\\s+\\w+(\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)+\\s*|\\s*)/?>" # nolint
    secondrow <- gsub(pattern, "\\4", secondrow)
  }

  # Scale Question with subquestion:
  # If it matches one of ".?!" followed by "-", take subsequent part
  subquestions <- stringr::str_match(secondrow, ".*[:punct:]\\s*-(.*)")[,2]

  # Else if subquestion returns NA, use whole string
  subquestions[is.na(subquestions)] <- unlist(secondrow[is.na(subquestions)])

  # Remaining NAs default to 'empty string'
  subquestions[is.na(subquestions)] <- ""

  # Add labels to data
  rawdata <- sjlabelled::set_label(rawdata, unlist(subquestions))

  # RETURN ----

  return(rawdata)

}
JasperHG90/qualtRics documentation built on May 7, 2019, 10:33 a.m.