R/sirs301_import.R

Defines functions sirs301_import_csvs

Documented in sirs301_import_csvs

#' sirs301_import_csvs
#'
#' @param path directory of exports from L2RPT.  To get the files, run the SIRS-301 report, click
#' the number in the 'Total Tested' grid to get the student drilldown, and then use the
#' 'View in Excel Options' > 'View in CSV format' option to write the data to a file.  Do not choose
#' 'View in Excel 2007 data' - the files generated by the IBM Cognos system are subtly different than files
#' produced by MS Excel and can't be read by the `readxl` package.
#'
#' @param verbose if TRUE, will generate `message()` updates.
#'
#' @return tbl_df data frame of student-level records
#' @export

sirs301_import_csvs <- function(
  path,
  verbose = TRUE
) {

  # Read in all export files
  if (verbose) message("Reading path names.")
  all_xls <- dir(
    path = path, pattern = "xls", ignore.case = TRUE,
    recursive = TRUE, full.names = TRUE
  )
  all_csv <- dir(
    path = path, pattern = "csv", ignore.case = TRUE,
    recursive = TRUE, full.names = TRUE
  )

  num_files <- max(length(all_xls), length(all_csv))
  if (verbose) sprintf("There are %s files in the path you provided.", num_files) %>% message()

  # Write files to list objects
  if (verbose) message("Reading SIRS 301 export files.")

  # insane IBM cognos csvs are tsvs thing
  if (length(all_xls) > 0) {
    exports_list <- lapply(
      all_xls, read.delim, header = TRUE, fileEncoding = "UTF-16LE",
      stringsAsFactors = FALSE, colClasses = 'character'
    )
  }

  if (length(all_csv) > 0) {
    exports_list <- lapply(all_csv, readr::read_csv, col_types = readr::cols(.default = "c"))
  }

  # Combine into one df
  exports_results <- dplyr::bind_rows(exports_list)

  # Turn into tbl_df
  exports_results <- tibble::as_tibble(exports_results)

  # studentid should be character
  exports_results$STUDENT_ID <- as.character(exports_results$STUDENT_ID)

  # Return
  exports_results
}
almartin82/NYSEDtools documentation built on June 3, 2023, 10:52 a.m.