Nothing
#' Aggregate the students number data by class at the municipality and province level
#'
#' @description This function creates two dataframes with the number of students, classes and students by class, aggregated at the province and municipality level
#'
#'
#'
#'
#' @param data Either an object of class \code{list}, obtained as output of the \code{\link{Get_nstud}} function,
#' or an object of class class \code{tbl_df}, \code{tbl} and \code{data.frame}, obtained as output of the \code{\link{Util_nstud_wide}} function,
#' if \code{NULL}, the function will download it automatically but it will not be saved in the global environment. \code{NULL} by default.
#' @param Year Numeric or character value. The reference school year, if either of the \code{input_} arguments must be retrieved.
#' Available in the formats: \code{2022}, \code{"2022/2023"}, \code{"202223"}, \code{"20222023"}. \code{2023} by default
#' @param check Logical. If \code{TRUE}, the function runs the test of the students number availability across all school included in the school registries (see \code{\link{Util_Check_nstud_availability}}). \code{TRUE} by default
#' @param check_ggplot Logical. If \code{check == TRUE}, whether to display or not a static map of the availability of the students number by province; see also \code{\link{Util_Check_nstud_availability}}. \code{TRUE} by default.
#' @param check_registry Character. If \code{check == TRUE}, the school registries included in the \code{input_School2mun} object (see \code{\link{Get_School2mun}}) whose availability has to be checked. Either \code{"Registry_from_buildings"} (buildings section), \code{"Registry_from_registry"} (registry section), \code{"Any"} or \code{"Both"}. \code{"Any"} by default.
#' @param InnerAreas Logical. If \code{check == TRUE}, Whether it must be checked if municipalities belong to Inner areas or not. \code{TRUE} by default.
#' @param ord_InnerAreas Logical. If \code{check == TRUE} and \code{InnerAreas == TRUE}, whether the Inner areas classification should be treated as an ordinal variable rather than as a categorical one (see \code{\link{Get_InnerAreas}} for the classification).
#' \code{FALSE} by default.
#' @param missing_to_1 Logical. Only needed if \code{data} is not provided in wide format. Whether the number of classes should be imputed to 1 when it is missing; see \code{\link{Util_nstud_wide}}. \code{FALSE} by default.
#' @param verbose Logical. If \code{TRUE}, the user keeps track of the main underlying operations. \code{TRUE} by default.
#' @param input_Registry Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}, obtained as output of the function \code{\link{Get_Registry}}
#' If \code{check == TRUE}, the school registry (the properly said one, from the registry section).
#' If \code{NULL}, it will be downloaded automatically, but not saved in the global environment.
#' \code{NULL} by default
#' @param input_InnerAreas Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}.
#' The classification of peripheral municipalities, obtained as output of the \code{\link{Get_InnerAreas}} function.
#' Needed only if \code{check == TRUE} and \code{InnerAreas == TRUE}.
#' If \code{NULL}, it will be downloaded automatically, but not saved in the global environment.
#' \code{NULL} by default
#' @param input_Prov_shp Object of class \code{sf}, \code{tbl_df}, \code{tbl}, \code{data.frame}. The relevant shapefile of Italian municipalities, if both the \code{check} and \code{check_ggplot} options are chosen.
#' If \code{NULL} it is downloaded automatically but not saved in the global environment. \code{NULL} by default.
#' @param input_School2mun Object of class \code{list} with elements of class \code{tbl_df}, \code{tbl} and \code{data.frame}, obtained as output of the function \code{\link{Get_School2mun}}.
#' The mapping from school codes to municipality (and province) codes. Needed only if `check == TRUE`.
#' If \code{NULL}, it will be downloaded automatically, but not saved in the global environment. \code{NULL} by default.
#' @param input_AdmUnNames Object of class \code{tbl_df}, \code{tbl} and \code{data.frame}, obtained as output of the function \code{\link{Get_AdmUnNames}}
#' The ISTAT file including all the codes and the names of the administrative units for the year in scope.
#' Only needed if \code{check == TRUE} and the argument \code{input_School2mun} is \code{NULL}.
#' If \code{NULL}, it will be downloaded automatically, but not saved in the global environment. \code{NULL} by default.
#' @param autoAbort Logical. In case any data must be retrieved, whether to automatically abort the operation and return NULL in case of missing internet connection or server response errors. \code{FALSE} by default.
#' @param ... Additional arguments to the function \code{\link{Util_nstud_wide}} if \code{data} is not provided.
#'
#'
#'
#'
#' @return An object of class \code{list} including:
#'
#' \itemize{
#' \item \code{$Municipality_data}:
#' object of class \code{tbl_df}, \code{tbl} and \code{data.frame}, the output dataframe detailed at the municipality level
#'
#' \item \code{$Province_data}: object of class 'tbl_df', 'tbl' and 'data.frame', the output dataframe detailad at the province level
#' }
#'
#' @details Numerical variables are summarised by the mean; Boolean variables are summarised by the mean as well, thus they become frequency indicators.
#' Qualitative values, if included, are summarised by the mode. Summary measures do not include NAs.
#'
#'
#' @examples
#
#' Year <- 2023
#'
#' nstud23_aggr <- Group_nstud(data = example_input_nstud23, Year = Year,
#' input_Registry = example_input_Registry23,
#' InnerAreas = FALSE,
#' input_School2mun = example_School2mun23)
#'
#' summary(nstud23_aggr$Municipality_data[,c(46,47,48)])
#'
#' summary(nstud23_aggr$Province_data[,c(44,45,46)])
#'
#'
#' @export
Group_nstud <- function(data = NULL, Year = 2023,
check = TRUE, verbose = TRUE, check_registry = "Any",
InnerAreas = TRUE,
ord_InnerAreas = FALSE, check_ggplot = FALSE,
missing_to_1 = FALSE, input_Registry = NULL,
input_InnerAreas = NULL, input_Prov_shp = NULL,
input_School2mun = NULL, input_AdmUnNames = NULL,
autoAbort = FALSE, ...) {
options(dplyr.summarise.inform = FALSE)
. <- NULL
start.zero <- Sys.time()
while(is.null(data)){
data <- Get_nstud(Year = Year, verbose = verbose, autoAbort = autoAbort)
if(is.null(data)){
if(!autoAbort){
holdOn <- ""
message("Error during students counts retrieving. Would you abort the whole operation or retry?",
" - To abort the operation, press `A` \n",
" - To retry data retrieving, press any other key \n")
holdOn <- readline(prompt = " ")
if(toupper(holdOn) == "A"){
cat("You chose to abort the operation \n")
return(NULL)
} else {
cat("You chose to retry \n")
}
} else return(NULL)
}
}
if(is.data.frame(data)){
if(any(grepl("per.*class", names(data), ignore.case = TRUE)) ||
any(grepl("full_time", names(data), ignore.case = TRUE))){
nstud.wide <- data
} else {
nstud.wide <- Util_nstud_wide(data = data, missing_to_1 = missing_to_1, ... )
}
} else nstud.wide <- Util_nstud_wide(data = data, missing_to_1 = missing_to_1, ... )
if(verbose) cat("Linking schools to reference municipalities \n")
while(is.null(input_School2mun)){
input_School2mun <- Get_School2mun(
Year = Year,verbose = verbose,
input_Registry = input_Registry, input_AdmUnNames = input_AdmUnNames,
autoAbort = autoAbort)
if(is.null(input_School2mun)){
if(!autoAbort){
holdOn <- ""
message("Error occurred in mapping school to municipalities. Would you abort the whole operation or retry?",
" - To abort the operation, press `A` \n",
" - To retry data retrieving, press any other key \n")
holdOn <- readline(prompt = " ")
if(toupper(holdOn) == "A"){
cat("You chose to abort the operation \n")
return(NULL)
} else {
cat("You chose to retry \n")
}
} else return(NULL)
}
}
if(!is.data.frame(input_School2mun)) School2mun.R <- input_School2mun[[check_registry]]
if(verbose) cat("Aggregating schools data \n")
suppressWarnings(
nstud.wide_Mun <- dplyr::left_join(
nstud.wide, School2mun.R ,by = "School_code") %>%
dplyr::filter(!is.na(.data$Municipality_code)) %>% dplyr::relocate(
c(.data$Municipality_code, .data$Municipality_description,
.data$Province_code, .data$Province_initials), .after ="School_code") %>%
dplyr::select(-grep("Students_per_class", names(.))) %>% Group_Count(
groupcol = c("Municipality_code", "Municipality_description",
"Province_code", "Province_initials", "Order"),
startgroup = 7, FUN = sum, countname = "nschools")
)
suppressWarnings(
nstud.wide_Prov <- dplyr::left_join(
nstud.wide, School2mun.R, by = "School_code") %>%
dplyr::filter(!is.na(.data$Municipality_code)) %>% dplyr::relocate(
c(.data$Municipality_code, .data$Municipality_description,
.data$Province_code, .data$Province_initials), .after ="School_code") %>%
dplyr::select(-grep("Students_per_class", names(.))) %>% Group_Count(
groupcol = c("Province_code", "Province_initials", "Order"),
startgroup = 7, FUN = sum, countname = "nschools")
)
if(any(grepl("Students_per_class", names(nstud.wide)))){
for (i in (1:14)){
j <- 2*i + 5
nstud.wide_Mun <- nstud.wide_Mun %>%
dplyr::mutate(xx = as.numeric(unlist(dplyr::select(.,j)/dplyr::select(.,j+1) ) ) )
names(nstud.wide_Mun)[ncol(nstud.wide_Mun)] <- paste("Students_per_class_", ifelse(i<14, i,"Tot"), sep = "")
}
nstud.wide_Mun[, c(6:ncol(nstud.wide_Mun))][is.na(nstud.wide_Mun[ ,c(6:ncol(nstud.wide_Mun))])] <- 0
for (i in c(1:13)){
j <- i + 34
k <- 3*i + 5
nstud.wide_Mun <- nstud.wide_Mun %>% dplyr::relocate(j, .after = k)
}
for (i in (1:14)){
j <- 2*i + 3
nstud.wide_Prov <- nstud.wide_Prov %>%
dplyr::mutate(xx = as.numeric(unlist(dplyr::select(.,j)/dplyr::select(.,j+1) ) ) )
names(nstud.wide_Prov)[ncol(nstud.wide_Prov)] <- paste("Students_per_class_", ifelse(i<14, i,"Tot"), sep = "")
}
nstud.wide_Prov[, c(4:ncol(nstud.wide_Prov))][is.na(nstud.wide_Prov[ ,c(4:ncol(nstud.wide_Prov))])] <- 0
for (i in c(1:13)){
j <- i + 32
k <- 3*i + 3
nstud.wide_Prov <- nstud.wide_Prov %>%
dplyr::relocate(j, .after = k)
}
}
nstud.check <- NULL
if(check){
if(verbose) cat("Checking whether registered schools are included \n")
nstud.check <-
Util_Check_nstud_availability(nstud.wide, Year = Year,cutout = c("IC", "IS", "NR"),
ggplot = check_ggplot,
verbose = verbose, InnerAreas = InnerAreas,
ord_InnerAreas = ord_InnerAreas,
input_Registry = input_Registry, input_AdmUnNames = NULL,
input_InnerAreas = input_InnerAreas,
input_Prov_shp = input_Prov_shp, input_School2mun = input_School2mun,
autoAbort = autoAbort)
if(is.null(nstud.check)){
message("Error occurred during the students count availability check")
} else {
check_mun <- nstud.check$Municipality_data[[check_registry]]
if(InnerAreas){
if(ord_InnerAreas){
check_mun <- check_mun %>% dplyr::select(.data$Order, .data$Municipality_code, .data$Availability, .data$Inner_area,
.data$A_mun, .data$B_mun, .data$C_mun, .data$D_mun, .data$E_mun, .data$F_mun)
} else {
check_mun <- check_mun %>% dplyr::select(.data$Order, .data$Municipality_code, .data$Availability, .data$Inner_area)
}
} else {
check_mun <- check_mun %>% dplyr::select(.data$Order, .data$Municipality_code, .data$Availability)
}
check_mun <- check_mun %>%
tidyr::unite("ID", c(.data$Municipality_code, .data$Order), sep = "___") %>%
dplyr::mutate(ID = gsub(" ", "_", .data$ID))
nstud.wide_Mun <- nstud.wide_Mun %>%
tidyr::unite("ID", c(.data$Municipality_code, .data$Order), sep = "___") %>%
dplyr::mutate(ID = gsub(" ", "_", .data$ID)) %>%
dplyr::left_join(check_mun, by = "ID") %>%
tidyr::separate(.data$ID, into = c("Municipality_code", "Order"), sep = "___") %>%
dplyr::mutate(Order = gsub("_", " ", .data$Order))
check_prov <- nstud.check$Province_data[[check_registry]]
if(InnerAreas){
if(ord_InnerAreas){
check_prov <- check_prov %>%
dplyr::select(.data$Order, .data$Province_code, .data$Availability, .data$Inner_area,
.data$A_mun, .data$B_mun, .data$C_mun, .data$D_mun, .data$E_mun, .data$F_mun)
} else {
check_prov <- check_prov %>%
dplyr::select(.data$Order, .data$Province_code, .data$Availability, .data$Inner_area)
}
} else {
check_prov <- check_prov %>%
dplyr::select(.data$Order, .data$Province_code, .data$Availability)
}
check_prov <- check_prov %>%
tidyr::unite("ID", c(.data$Province_code, .data$Order), sep = "___") %>%
dplyr::mutate(ID = gsub(" ", "_", .data$ID))
nstud.wide_Prov <- nstud.wide_Prov %>%
tidyr::unite("ID", c(.data$Province_code, .data$Order), sep = "___") %>%
dplyr::mutate(ID = gsub(" ", "_", .data$ID)) %>%
dplyr::left_join(check_prov, by = "ID") %>%
tidyr::separate(.data$ID, into = c("Province_code", "Order"), sep = "___") %>%
dplyr::mutate(Order = gsub("_", " ", .data$Order)) %>%
dplyr::mutate(Province_code = as.numeric(.data$Province_code))
}
}
endtime <- Sys.time()
if(verbose){
cat(paste("Total time needed to aggregate students number data",
ifelse(check, "and check for data availability in school registries", ""),
ifelse(InnerAreas, "and for schools belonging to inner areas", ""),
":"), difftime(endtime, start.zero, units="secs"), "seconds \n" )
}
return(list(Municipality_data = nstud.wide_Mun, Province_data = nstud.wide_Prov))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.