#' Extract identifying variables
#'
#' This function extracts the identifying variables from a table by applying a
#' schema description to it.
#' @param schema [\code{character(1)}]\cr the (validated) schema description of
#' \code{input}.
#' @param input [\code{character(1)}]\cr table to reorganise.
#' @return a list per cluster with values of the identifying variables
#' @examples
#' input <- tabs2shift$clusters_nested
#' schema <- setCluster(id = "sublevel",
#' group = "territories", member = c(1, 1, 2),
#' left = 1, top = c(3, 8, 15)) %>%
#' setIDVar(name = "territories", columns = 1, rows = c(2, 14)) %>%
#' setIDVar(name = "sublevel", columns = 1, rows = c(3, 8, 15)) %>%
#' setIDVar(name = "year", columns = 7) %>%
#' setIDVar(name = "commodities", columns = 2) %>%
#' setObsVar(name = "harvested", columns = 5) %>%
#' setObsVar(name = "production", columns = 6)
#'
#' validateSchema(schema = schema, input = input) %>%
#' getIDVars(input = input)
#' @importFrom checkmate assertTRUE
#' @importFrom tibble tibble
#' @importFrom purrr map set_names map_dfc
#' @importFrom dplyr row_number filter select
#' @importFrom tidyr extract unite fill
#' @importFrom tidyselect all_of
#' @export
getIDVars <- function(schema = NULL, input = NULL){
assertTRUE(x = schema@validated)
clusters <- schema@clusters
nClusters <- max(lengths(clusters))
variables <- schema@variables
filter <- schema@filter
idVars <- map(.x = seq_along(variables), .f = function(ix){
# unselect those id variables that are also cluster or group id
if(variables[[ix]]$vartype == "id" & !names(variables)[ix] %in% c(clusters$id, clusters$group)){
variables[ix]
}
})
idVars <- unlist(idVars, recursive = FALSE)
if(length(idVars) != 0){
out <- map(.x = 1:nClusters, .f = function(ix){
vars <- NULL
for(i in 1:length(idVars)){
tempVar <- idVars[[i]]
varRow <- clusters$row[ix]:(clusters$row[ix]+clusters$height[ix] - 1)
if(!is.null(tempVar$value)){
temp <- tibble(X = tempVar$value)
} else {
if(!is.null(tempVar$row[ix])){
if(!tempVar$dist){
# in case a row value is set, this means we deal with a variable that is not long ...
temp <- input[tempVar$row[ix], unique(tempVar$col)]
rowFilter <- NULL
if(!is.null(filter$col)){
colFilter <- colnames(temp)[tempVar$col %in% filter$col]
} else {
colFilter <- NULL
}
} else {
# ... or distinct from clusters
temp <- input[unique(tempVar$row), unique(tempVar$col)]
rowFilter <- NULL
colFilter <- NULL
}
} else {
if(!is.null(tempVar$merge)){
temp <- input[varRow, tempVar$col]
rowFilter <- filter$row
colFilter <- NULL
} else {
temp <- input[varRow, tempVar$col[ix]]
rowFilter <- which(varRow %in% filter$row)
colFilter <- NULL
}
}
# apply a row filter ...
if(!is.null(rowFilter)){
temp <- temp %>%
filter(row_number() %in% rowFilter)
}
# ... and column filter
if(!is.null(colFilter)){
temp <- temp %>%
select(all_of(colFilter))
}
# copy missing values downwards
if(anyNA(temp[1])){
message("filling NA-values in variable '", names(idVars[i]),"'.")
temp <- temp %>%
fill(1, .direction = "down")
}
# split ...
if(!is.null(tempVar$split)){
# need to distinguish between one and several columns
if(dim(temp)[2] == 1){
temp <- temp %>%
extract(col = 1, into = names(temp), regex = tempVar$split)
} else {
temp <- map(.x = seq_along(temp), .f = function(iy){
temp %>%
select(all_of(iy)) %>%
tidyr::extract(col = 1, into = names(temp)[iy], regex = tempVar$split)
}) %>% bind_cols(.name_repair = "check_unique")
}
}
# ... or merge the variable
if(!is.null(tempVar$merge)){
newName <- paste0(names(temp), collapse = tempVar$merge)
temp <- temp %>%
unite(col = !!newName, sep = tempVar$merge)
}
}
vars <- c(vars, set_names(x = list(temp), nm = names(idVars)[i]))
}
return(vars)
})
} else {
out <- NULL
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.