rsoomo: Soomo Tools

#' Combine multiple analytics reports.
#'
#' Allows users to combine a given number of analytics reports into a helpful compiled format.
#'
#' `combine_analytics` will return a named data.frame which can be exported as a .csv file with useful column names.
#'  This function requires you to name functions using some consistent style and within the same file. For instance,
#'  if you want to combine 8 weeks of analytics reports from the admin space, you might name each file analytics_report_n
#'  where n is the week in question. The columns returned will include the user id for each student as `user_id`, as well as question completion percentage,
#'  time spent in minutes, first attempt score, last attempt score, and if templates were used in that chapter.
#'
#' @param prefix A character string. This is the regular expression that will be included in each of your file names. I suggest that this expression
#'  be entirely lower case and not include any spaces. For instance, if I were looking to combine analytics reports from
#'  Texas Politics, I may name each file starting with "txpol_spring_".
#' @param weeks An integer. This is the number of analytics reports you want to combine. For instance, if I want to look at a full term of SNHU
#'  analytics reports, I may want to look at 8 weeks. This means I would give this value 8.
#' @export
#' @import tidyr
#' @import dplyr

combine_analytics <- function(prefix, weeks) {

  requireNamespace("dplyr")
  requireNamespace("tidyr")

  n_files_list <- 1:weeks

  # Read in analytics reports

  analytics_data_by_week <- lapply(n_files_list, function(x)
    read.csv(paste0(prefix, x, ".csv"), header = TRUE, sep = ",", quote = "\"", stringsAsFactors = FALSE,
             check.names = FALSE))

  # Select appropriate fields from each report

  analytics_data_by_week <- lapply(n_files_list, function(x)
    analytics_data_by_week[[x]] %>%
      filter(!is.na(`Student ID`)) %>%
      select(one_of(c(
        "Student ID",
        paste0("Chapter ", x, ": Question Completion %"),
        paste0("Chapter ", x, ": Total Minutes Spent"),
        paste0("Chapter ", x, ": MULTIPLE CHOICE Correct % - 1st Attempt"),
        paste0("Chapter ", x, ": MULTIPLE CHOICE Correct % - Final Attempt"),
        paste0("Chapter ", x, ": MULTIPLE CHOICE Questions Possible"),
        paste0("Chapter ", x, ": POLL Questions Possible"),
        paste0("Chapter ", x, ": RESPONSE BOARD Questions Possible"),
        paste0("Chapter ", x, ": SHORT ANSWER Questions Possible"),
        paste0("Chapter ", x, ": Questions Answered")
      ))) %>%
      mutate(non_template_questions_answered = rowSums(.[ ,6:9]),
             template_use = ifelse(.[ ,10] > non_template_questions_answered, 1, 0)) %>%
      select(-one_of(c(
        paste0("Chapter ", x, ": MULTIPLE CHOICE Questions Possible"),
        paste0("Chapter ", x, ": POLL Questions Possible"),
        paste0("Chapter ", x, ": RESPONSE BOARD Questions Possible"),
        paste0("Chapter ", x, ": SHORT ANSWER Questions Possible"),
        paste0("Chapter ", x, ": Questions Answered"),
        "non_template_questions_answered"
      )))
  )

  uniform_col_names = c("soomo_id", "completion_chapter_", "mins_spent_chapter_", "first_attempt_chapter_", "final_attempt_chapter_", "template_use_chapter_")

  # Replace column names with uniform column names

  analytics_data_by_week <- lapply(n_files_list, function(x){
    colnames(analytics_data_by_week[[x]]) <- paste0(uniform_col_names, x); analytics_data_by_week[[x]]
  })

  # Rename `soomo_idx` to `soomo_id` to join data

  analytics_data_by_week <- lapply(n_files_list, function(x){
    colnames(analytics_data_by_week[[x]]) <- gsub("soomo.*", "soomo_id", colnames(analytics_data_by_week[[x]])); analytics_data_by_week[[x]]
  })

  # Combine all dataframes into one

  analytics_data_full <- Reduce(function(...) merge(..., by="soomo_id", all.x = TRUE), analytics_data_by_week)

  return(analytics_data_full)

}