R/surveyXact.R

Defines functions read_surveyxact

Documented in read_surveyxact

#' Read/import SurveyXact data
#'
#' @param datafile Excel XML export from SurveyXact (Sheets: Labels, Variables, Dataset, Structure)
#' @return Data frame (tibble), Haven labelled
read_surveyxact <- function(datafile) {

  # Check that file exists and has the sheets
  if (!file_test("-f", datafile))
    stop("No such file.")

  if (!sum(c("Labels", "Variables", "Dataset", "Structure") %in% readxl::excel_sheets(datafile)) == 4)
    stop("File is not SurveyXact formatted. Make sure to make a full export.")

  # Initialize Progress bar
  p <- dplyr::progress_estimated(5)

  # Import data
  # Structure sheet not used (yet)

  surveydata <- list()
  surveydata[["Dataset"]] <- readxl::read_excel(datafile, sheet = "Dataset") # Get numeric dataset
  p$tick()$print()
  surveydata[["Variables"]] <- readxl::read_excel(datafile, sheet = "Variables") # Get variable labels
  p$tick()$print()
  surveydata[["Labels"]] <- readxl::read_excel(datafile, sheet = "Labels", col_names = c("var", "varnum", "varlabel")) # Get value labels
  p$tick()$print()

  # Nest value labels

  surveydata[["NestedLabels"]] <- tidyr::nest(surveydata[["Labels"]],
                                              varinfo = c(varnum, varlabel)
  )
  p$tick()$print()

  # Set variable and value labels for all vars
  # TODO: Consider use of apply-family => More elegant solution (and a bit faster)

  for (eachvar in colnames(surveydata[["Dataset"]])) {

    # Dates, logical classes, factors not allowed by Haven's labelled class
    varvector <- surveydata[["Dataset"]][[eachvar]]
    if (is.numeric(varvector) || is.character(varvector)) {

      # Lookup value labels if they exist. Save as named vector.
      if (eachvar %in% surveydata[["NestedLabels"]]$var) {

        vallabels <- setNames(surveydata[["NestedLabels"]]$varinfo[surveydata[["NestedLabels"]]$var == eachvar][[1]][[1]],
                              surveydata[["NestedLabels"]]$varinfo[surveydata[["NestedLabels"]]$var == eachvar][[1]][[2]])

      } else {

        vallabels <- NULL

      }

      varlabel <- surveydata[["Variables"]][[2]][surveydata[["Variables"]]$variableName == eachvar]

      surveydata[["Dataset"]][eachvar] <- haven::labelled(varvector,
                                                          labels = vallabels,
                                                          label = varlabel)

    }

  }

  p$tick()$print()

  return(surveydata[["Dataset"]])

}
adviceas/adviceverse documentation built on Jan. 9, 2021, 11:58 a.m.