Nothing
#' Returns the formatted demographic data set
#'
#' Load and formats the student demographic data set from the OULAD for data analysis.
#'
#' @param module name of the module to be included, either `"All"`, `"AAA"`, `"BBB"`, `"CCC"`, `"DDD"`, `"EEE"`, `"FFF"` or `"GGG"`.
#' @param presentation name of the semester of the module to be included, either `"2013B"`,
#' `"2014B"`, `"2013J"`, `"2014J"`, `"All"`, `"Summer"` or `"Winter"`.
#' `"B"` indicates a February start time whereas `"J"` indicates an October start time. `"All"` indicates
#' that all presentations of the module will be included in the returned data. `"Summer"` returns both `"2013B"` and `"2014B"`.
#' `"Winter"` returns both `"2013J"` and `"2014J"`.
#' @param repeat_students indicator of whether students who had previous attempts at the module should be removed, either `"remove"` or `"keep"`.
#'
#' @returns Returns one `tibble` (object of class `tbl_df`), called 'studentInfo', based on the OULAD studentInfo.csv file
#' and the specified inputs (module, presentation, and repeat_students).
#'
#' The `tibble` consists of 12 columns (Kuzilek et al., 2017):
#'
#' \itemize{
#' \item{code_module - module identification code.}
#' \item{code_presentation - module presentation identification code.}
#' \item{id_student - the unique student identification number.}
#' \item{gender - student’s gender, either Male or Female.}
#' \item{region - the geographic region where the student lived while taking the module-presentation.}
#' \item{highest_education - the highest student education level on entry to the module presentation.}
#' \item{imd_band - the index of multiple deprivation band of the place where the student lived during the module-presentation.}
#' \item{age_band - a band of student’s age.}
#' \item{num_of_prev_attempts - the number of times the student has attempted this module previously.}
#' \item{studied_credits - the total number of credits for the modules the student is currently studying.}
#' \item{disability - indicates whether the student has declared a disability.}
#' \item{final_result - student’s final result in the module-presentation.}
#' }
#'
#' @references
#' Kuzilek, J., Hlosta, M., & Zdrahal, Z. (2017). Open university learning analytics dataset. Scientific Data
#' volume 4 , (pp. 1–8). https://doi.org/10.1038/sdata.2017.171.
#'
#' @seealso [combined_dataset()]
#' @export
#' @importFrom dplyr "filter" "tibble"
#' @importFrom magrittr "%>%"
#' @importFrom httr "status_code"
#' @examples
#' dataset_demographics(module = "BBB", presentation = "2013J", repeat_students = "remove")
dataset_demographics = function(module = c("All", "AAA", "BBB", "CCC", "DDD", "EEE", "FFF", "GGG"),
presentation = c("2013B", "2014B", "2013J", "2014J",
"All", "Summer", "Winter"),
repeat_students = c("remove", "keep")){
# Bind the variables locally
code_module = pres = possible_pres = code_presentation =
num_of_prev_attempts = response1 = NULL
# Demographic data
env=environment()
tryCatch({
response1 <- load_github_modified("https://github.com/ehoward1/oulad_data/blob/5bbf34af9922471385371e0d26133d54015f21a2/studentInfo.RData", env)
if(!is.character(response1)){
if(httr::status_code(response1) == 429){
message("Too many requests. Please wait and try again later.")
return(NULL)
} else if (httr::status_code(response1) >= 400){
message(sprintf("HTTP error %s. The resource may have changed.", httr::status_code(response1)))
return(NULL)
}}
})
# For matching
module = match.arg(module)
presentation = match.arg(presentation)
repeat_students = match.arg(repeat_students)
# Convert to correct data types
studentInfo$code_module = factor(studentInfo$code_module)
studentInfo$code_presentation = factor(studentInfo$code_presentation)
studentInfo$id_student = as.character(studentInfo$id_student)
studentInfo$gender = factor(studentInfo$gender, levels=c("F", "M"), labels=c("Female", "Male"))
studentInfo$region = factor(studentInfo$region)
studentInfo$highest_education = factor(studentInfo$highest_education,
levels = c("No Formal quals", "Lower Than A Level",
"A Level or Equivalent", "HE Qualification",
"Post Graduate Qualification"),
labels = c("No Formal quals", "Lower Than A Level",
"A Level or Equivalent", "HE Qualification",
"Post Graduate Qualification"),
ordered = TRUE)
studentInfo$imd_band = factor(studentInfo$imd_band, ordered = TRUE)
levels(studentInfo$imd_band)[levels(studentInfo$imd_band) == "10-20"] <- "10-20%"
levels(studentInfo$imd_band)[levels(studentInfo$imd_band) == ""] <- NA
studentInfo$age_band = factor(studentInfo$age_band, levels = c("0-35", "35-55", "55<="),
labels = c("0-35", "35-55", "55<="), ordered = TRUE)
studentInfo$num_of_prev_attempts = as.numeric(as.character(studentInfo$num_of_prev_attempts))
studentInfo$disability = factor(studentInfo$disability)
studentInfo$studied_credits = factor(studentInfo$studied_credits, ordered=TRUE)
studentInfo$final_result = factor(studentInfo$final_result)
# Filter by module
if(module != "All"){
studentInfo = filter(studentInfo, code_module == module)
}
# Remove repeating students
if(repeat_students == "remove"){
studentInfo = filter(studentInfo, num_of_prev_attempts == 0)
}
# Filter presentation
if(presentation == "Summer"){
studentInfo = filter(studentInfo, code_presentation == "2013B" | code_presentation == "2014B")
}else if(presentation == "Winter"){
studentInfo = filter(studentInfo, code_presentation == "2013J" | code_presentation == "2014J")
}else if(presentation == "All"){
studentInfo = studentInfo
}else{
# filter by specific presentation
possible_pres = unique(studentInfo$code_presentation)
if(presentation %in% possible_pres){
studentInfo = filter(studentInfo, code_presentation == presentation)
}else{
stop("This presentation does not exist for ", module)
}
}
return(list(studentInfo = tibble(studentInfo),
module = module,
presentation = presentation,
repeat_students = repeat_students))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.