R/data_ready.r

Defines functions dataReady_setup data_ready read_data

Documented in data_ready dataReady_setup read_data

#' @include import.r
#' @include helper.r
NULL

#' read_data
#'
#' @param filePath a character for a file path.
#' @return a named list containing standard setting information.
#' @examples
#' \dontrun{
#' filePath <- "data/freq_data.xlsx"
#' imported_data <- read_data(filePath)
#' }
#'
read_data <- function(filePath) {
  imprt_data <-
    filePath %>%
    excel_sheets() %>%
    map(read_excel, path = filePath) %>%
    set_names(., nm = c("setup", "panelist","rating","item_data","examinee_data"))

  return(imprt_data)
}

#' data_ready
#'
#' @param imprtData a named list containing standard setting information
#' @return a list
#' @examples
#' \dontrun{
#' require(embededss)
#' filePath <- "data/freq_data.xlsx"
#' imported_data <- read_data(filePath)
#' data_list <- data_ready(imported_data)
#' }

data_ready <- function(imprt_data) { # imprtData = imprt_data

  # IMPORTANT ------------------------------------
  # DATA MUST be in order: setup / panelist / rating / item meta data / examinee data
  # ----------------------------------------------

  setup_data    <- dataReady_setup(imprt_data)
  panelist_data <- dataReady_panelist(imprt_data)
  rating_data   <- dataReady_rating(imprt_data)
  item_data     <- dataReady_itemdata(imprt_data)
  examinee_data <- dataReady_examineedata(imprt_data)

  res <- list(
    setup_data    = setup_data,
    panelist_data = panelist_data,
    rating_data   = rating_data,
    item_data     = item_data,
    examinee_data = examinee_data)

  return(res)
}

#' set up data
#'
#'
dataReady_setup <- function(imprt_data) {
  lv_p <-
    which(upper_remove_blank(names(imprt_data[[1]])) ==
            upper_remove_blank("Level Options"))
  level_opt_name <- names(imprt_data[[1]])[lv_p]

  lv_vec <-
    map(str_split(imprt_data[[1]][[lv_p]], ","),remove_blank_vector)

  lv_vec1 <-lv_vec %>%
    lapply(., function(x) sapply(1:length(x), function(xx) paste0("Level",xx) )) %>%
    map(., ~paste(.x, collapse = ", ")) %>%
    do.call("rbind", .)

  first_data <- imprt_data[[1]]
  first_data[[level_opt_name]] <-  lv_vec1
  data_name <- names(first_data)
  setup_data  <- first_data %>% arrange(!!as.name(data_name[1]))
  return(setup_data)
}
#'
dataReady_panelist <- function(imprt_data) {

  panelist_data <- imprt_data[[2]]
  return(panelist_data)
}

#'
#'
dataReady_rating <- function(imprt_data) {
  data_name <- names(imprt_data[[3]])
  third_data <- imprt_data[[3]] %>%
    arrange_all()
  #arrange(!!sym(list(data_name)))

  ald_p <-
    which(upper_remove_blank(names(third_data)) ==
            upper_remove_blank("ald"))
  ald_opt_name <- names(third_data)[ald_p]

  panel_p <- which(
    str_detect(
      upper_remove_blank(names(third_data)),
      paste0(upper_remove_blank("use"), "|", upper_remove_blank("panel"))
    )
  )
  lv_p <-
    which(upper_remove_blank(names(imprt_data[[1]])) ==
            upper_remove_blank("Level Options"))
  lv_vec <-
    map(str_split(imprt_data[[1]][[lv_p]], ","),remove_blank_vector)

  new_level_name <-
    third_data %>%
    mutate(ALD = remove_blank_vector(ALD)) %>%
    group_split(GCA) %>%
    map(., ~ .x %>% pull(ALD)) %>%
    map2(., lv_vec, ~ paste0("Level", match(.x, .y))) %>%
    unlist(.)

  names(third_data)[panel_p] <- "Panelist"

  third_data[[ald_p]] <- new_level_name
  data_name <- names(third_data)

  rating_data <-
    third_data %>%
    select(data_name[1]:"ALD") %>%
    arrange(!!as.name(data_name[1]))

  return(rating_data)
}
#
dataReady_itemdata <- function(imprt_data){

  item_data <- imprt_data[[4]]

  data_name <- names(item_data)
  item_data <-
    item_data %>%
    arrange(!!as.name(data_name[1]))

  return(item_data)
}
#
dataReady_examineedata <- function(imprt_data){

  if(dim(imprt_data[[5]])[1] == 0) {
    imprt_data[[5]]

    examinee_data <- bind_cols(score = 1, freq = 10, imprt_data[[1]][1])

  } else {

    fifth_data <- fifth_reorg(imprt_data[[5]])

    gca_p <-
      which(upper_remove_blank(names(fifth_data)) ==
              upper_remove_blank("grade")|
              upper_remove_blank(names(fifth_data)) ==
              upper_remove_blank("gca"))

    names(fifth_data)[gca_p] <- "GCA"
    examinee_data <- fifth_data %>% arrange(GCA)
  }
  return(examinee_data)
}



#' get_data_info
#'
#' @return a list containing all the information for later estimation.
#' @examples
#' \dontrun{
#' require(embededss)
#' filePath <- "data/freq_data.xlsx"
#' imported_data <- read_data(filePath)
#' data_list <- data_ready(imported_data)
#' data_information <- get_data_info(data_list, grade = c("M3"), ald = "ALD", location = "Loc_RP60", wess = F, modal = F, threshold = F)
#'
#' }

get_data_info <- # data_list <- data_list; inputs <- list(grade = c("M3"), ald = "ALD", location = "Loc_RP60", wess = F, modal = F, threshold = F)
  function(data_list, ...){ # data_list <- data_list

  information <- list()

  inputs <- list(...)
  names(inputs) <- tolower(names(inputs))

  setup_data    <- data_list$setup_data
  panelist_data <- data_list$panelist_data
  rating_data   <- data_list$rating_data
  item_data     <- data_list$item_data
  examinee_data <- data_list$examinee_data

  data_name <- names(data_list$item_data)
  item_data <- item_data  %>% arrange(!!as.name(data_name[1]), !!as.name(inputs$loc))


  filtered_data <- rating_data %>% filter(GCA %in% inputs[["grade"]])
  id_list = get_ID(filtered_data)
  SD_data <- get_SD(setup_data, id_list)
  lv_p <- which(remove_blank(names(setup_data)) == "LEVELOPTIONS")
  level_nm0 <- setup_data[setup_data$GCA %in% inputs[["grade"]], lv_p]
  # names(level_nm0) <- inputs[["grade"]]

  information$imported_data <-
    list(
      setup_data     = setup_data,
      panel_data     = panelist_data,
      rating_data    = rating_data,
      item_data      = item_data,
      examinee_data  = examinee_data
    )

  # information$base_data <-
  information$base_data <-
    list(
      target_nm     = inputs[["ald"]],
      loc_nm        = inputs[["location"]],
      WESS          = inputs[["wess"]],
      modal         = inputs[["modal"]],
      threshold     = inputs[["threshold"]],
      filtered_data = filtered_data
    )

  information$data_ready <-
    list(
      id_list = id_list,
      level_nm = get_lvnm(level_nm0, inputs[["grade"]]),
      location_ready = get_location(item_data, inputs[["location"]], inputs[["grade"]]),
      SD_data = SD_data
    )

  information$split_data <-
    filtered_data %>%
    group_split(!!as.name(names(filtered_data)[str_detect(names(filtered_data), "Panel|User")]))


  information$split_data <- lapply(information$split_data, function(x) {
    # x <- information$split_data[[1]]
    GCA_name <- unique(x$GCA)
    location_info <- information$data_ready$location_ready$bind_loc[[GCA_name]]

    x %>% left_join(., location_info[, c("Item_ID","OOD")], by = "Item_ID") %>% arrange(OOD) %>% select(-OOD)
  })

  return(information)
  }

##################
# Helper functions
#------------------
#' remove_blank
remove_blank <- function(inpData) {

  vec <- inpData %>% stri_replace_all_charclass(., "\\p{WHITE_SPACE}", "")
  vec <- toupper(vec)
  return(vec)
}
#' reorganize_examinee
reorganize_examinee <- function(inpData){
    if(sum(str_detect(toupper(names(inpData)), toupper("freq"))) == 0){

      return(inpData)

    } else {
      inpdata_reorg <- vector("list", ncol(inpData))

      for(i in 1:ncol(inpData)) {
        # i <- 4
        if(sum(is.na(inpData[i])) == nrow(inpData)){
          next
        }
        inpdata_reorg[[i]] <- inpData[i]
      }
      inpdata_reorg <- inpdata_reorg %>% bind_cols()

      inpdata_reorg <-
        foreach(i = 1:(ncol(inpdata_reorg)/3), .combine = 'rbind') %do% {
          ii = 1 + (i - 1)*3
          iii = ii + 2
          inpdata_reorg[,ii:iii] %>% drop_na() %>%
            set_names(., nm = c("score", "freq","GCA"))
        }
      return(inpdata_reorg)
    }
  }
#' get_ID
get_ID <- function(filteredData) {

  GCAID <-
    filteredData %>%
    distinct(GCA) %>%
    pull()

  TableID<-
    filteredData %>%
    distinct(GCA, Table)

  Table_n <- TableID %>% group_by(GCA) %>% count() %>% pull(n)

  p1 <- as.name(names(filteredData)[str_detect(names(filteredData), "Panel|User")])

  UserID <-
    filteredData %>%
    distinct(GCA, Table, !!p1)

  return(list(GCA = GCAID, Table = TableID, Table_n = Table_n,
              PanelID = UserID))
}
#' get_SD
get_SD <- function(setupData, id_list){
  SD_data =
    tryCatch({
      a1 <- setupData %>% filter(GCA %in% id_list$GCA) %>% pull(SD)
      a1[which(is.na(a1))] <- 1
      a1
    },
    error = function(e) 1
    )

  SD_data <- data.frame(GCAid = id_list$GCA, SD = SD_data)
}
#' get_lvnm
get_lvnm <- # inpData = level_nm0; GCAID = inputs[["grades"]]
  function(inpData, GCAID){
    .get_lvnm <-
      function(inpData) {
        # inpData <- first_data[,5]
        lvnm <-
          inpData %>%
          str_split(., ",") %>%
          unlist() %>%
          stri_replace_all_charclass(.,
                                     "\\p{WHITE_SPACE}", "")
        return(lvnm)
      }

    apply(inpData, 1, .get_lvnm) %>%
      data.frame() %>%
      set_names(., nm = GCAID)
  }
#' get_location
get_location <- function(fourthData, locNm, testinp){
  # fourthData <- fourth_data; locNm <- input$loc
  dataUsed <- fourthData %>% filter(GCA %in% testinp)
  GCAId <- fourthData %>% filter(GCA %in% testinp) %>% pull(GCA) %>% unique()

  location <-
    dataUsed %>%
    group_split(GCA) %>%
    map(., ~ .x %>% select(GCA, Item_ID, all_of(locNm))) %>%
    set_names(., nm = GCAId)

  bind_loc <-
    location %>%
    map(., ~ .x %>% arrange(!!as.name(locNm)) %>%
          mutate(OOD = 1:nrow(.)) %>%
          select(-c(1))
    )
  return(list(location = location, bind_loc = bind_loc))

}
sooyongl/ESS documentation built on Dec. 23, 2021, 4:22 a.m.