R/get_survey_data.R

Defines functions survey_var_recode survey_recode get_survey_data

Documented in get_survey_data

#' Download a labeled survey data set
#'
#' Download a survey data set from Qualtrics corresponding to a variable
#' dictionary generated by \code{\link[qualtdict]{dict_generate}}.
#' Question, items, levels and labels are added as attributes using
#' \code{sjlabelled}.
#'
#' @param dict A variable dictionary returned by
#' \code{\link[qualtdict]{dict_generate}}.
#' @param keys A character vector containing variables to be added, if
#' \code{split_by_block} is \code{TRUE}, to all individual block data sets.
#' Can also be used to add variables (e.g. IP address) found on Qualtrics
#' but not in the dictionary to the downloaded data sets.
#' @param skip Logical. If \code{TRUE}, variables with potenetial
#' level-label mistakes will be removed from the data set.
#' @param split_by_block Logical. If \code{TRUE}, the function returns a
#' list with each element being the data set for a single survey block.
#' @param ... Other arguments passed to
#' \code{\link[qualtRics]{fetch_survey}}. Note that \code{surveyID},
#' \code{import_id}, \code{convert}, \code{label} and \code{include_qids}
#' will be overwritten by the function.
#'
#' @return
#' A dataframe containing survey data with question, items, levels and
#' labels added as attributes to each column with \code{sjlabelled}.
#'
#' @export
#' @examples
#' \dontrun{
#'
#' # Generate a dictionary
#' mydict <- dict_generate("SV_4YyAHbAxpdbzacl",
#'   survey_name = "mysurvey",
#'   var_name = "easy_name",
#'   block_pattern = block_pattern,
#'   block_sep = ".",
#'   split_by_block = FALSE
#' )
#' survey_dat <- get_survey_data(mydict,
#'   unanswer_recode = -77,
#'   unanswer_recode_multi = 0
#' )
#' }
get_survey_data <- function(dict,
                            keys = NULL,
                            split_by_block = FALSE,
                            skip = NULL,
                            ...) {
  checkarg_isqualtdict(dict)
  checkarg_ischaracter(keys, null_okay = TRUE)
  checkarg_isboolean(split_by_block)
  checkarg_ischaracter(skip, null_okay = TRUE)

  args <- list(...)
  args$force_request <- TRUE
  args$surveyID <- attr(dict, "surveyID")
  args$import_id <- TRUE
  args$convert <- FALSE
  args$label <- FALSE

  survey <- do.call(fetch_survey2, args)

  # Not sure why underscore is appended sometimes
  # when include_questions is specified
  colnames(survey) <- str_remove(colnames(survey), "_$")

  if (!is.null(skip)) {
    survey <- survey[!colnames(survey) %in% skip]
  }

  if (split_by_block == TRUE) {
    keys <- unique(unlist(dict[dict[["name"]] %in% keys, "qid"]))
    keys_dat <- dict[dict[["name"]] %in% keys, ]

    block_dict <- map(
      split(dict, dict$block),
      ~ bind_rows(
        keys_dat[-match(keys_dat[["name"]], .x[["name"]])],
        .x
      ) %>%
        select(keys, everything())
    )

    return(map(block_dict, survey_recode,
      dat = survey,
      keys = keys,
      unanswer_recode = args$unanswer_recode,
      unanswer_recode_multi = args$unanswer_recode_multi
    ))
  } else {
    return(survey_recode(dict,
      dat = survey, keys = keys,
      unanswer_recode = args$unanswer_recode,
      unanswer_recode_multi = args$unanswer_recode_multi
    ))
  }
}

#' Add labels to survey
#' @keywords internal
#' @noRd
survey_recode <- function(dict,
                          dat,
                          keys,
                          unanswer_recode,
                          unanswer_recode_multi) {
  in_dat <- dict[["qid"]] %in% colnames(dat)
  dict <- dict[in_dat, ]
  unique_qids <- unique(dict[["qid"]])
  unique_varnames <- unique(dict[["name"]])

  keys <- c("externalDataReference", "startDate", "endDate", keys)
  dat_cols <- c(keys, unique_qids)
  varnames <- setNames(unique_qids, unique_varnames)
  dat <- rename(dat[dat_cols], !!!varnames)

  # level = unique to preserve ordering
  split_dict <- split(dict, factor(dict$qid, levels = unique(dict$qid)))
  dat_vars <- map2_df(
    dat[unique_varnames], split_dict,
    ~ survey_var_recode(.x, .y,
      unanswer_recode = unanswer_recode,
      unanswer_recode_multi = unanswer_recode_multi
    )
  )

  bind_cols(dat[keys], dat_vars)
}

#' Add labels to each variable in survey (`sjlabelled` uses `haven`)
#' @importFrom sjlabelled set_label set_labels
#' @importFrom haven read_xpt
#' @keywords internal
#' @noRd
survey_var_recode <- function(var,
                              var_dict,
                              unanswer_recode,
                              unanswer_recode_multi) {
  # Multiple rows for a question, only first one chosen
  type <- var_dict[["type"]][1]
  content_type <- var_dict[["content_type"]][[1]]
  levels <- var_dict[["level"]]
  labels <- var_dict[["label"]]

  if (!is.na(content_type) && content_type == "Number") {
    # Check for content_type numeric,
    # vector with numbers such as "06" passes validation on Qualtrics but
    # will be character when read by readr

    var <- as.numeric(var)
  }

  if (type == "TE" || any(grepl("_TEXT", levels))) {

  } else if (nrow(var_dict) == 1) {
    # Single row means allowing for multiple answer
    if (!is.null(unanswer_recode_multi)) {
      levels <- c(levels, unanswer_recode_multi)
      labels <- c(labels, paste("Not", labels))
    }
  } else if (nrow(var_dict) > 1) {
    # If multiple rows it's ordinal
    labels <- grep("TEXT", labels, invert = TRUE, value = TRUE)
    levels <- grep("TEXT", levels, invert = TRUE, value = TRUE)
    if (!is.null(unanswer_recode)) {
      levels <- c(levels, unanswer_recode)
      labels <- c(labels, "Seen but not answered")
    }
  }

  # TE variables dont have levels or labels
  if (any(!is.na(levels))) {
    var <- set_labels(var, labels = setNames(levels, labels))
  } else {
    text_label <- unique(paste_narm(var_dict[["question"]], var_dict[["item"]]))
    var <- set_label(var, label = text_label)
  }


  return(var)
}
lyh970817/qualtdict documentation built on April 8, 2023, 9:26 p.m.