R/hierarchy-helpers.R

Defines functions load_spatial_relationship np_rows zero_region_code zero_region_level zero_start_date zero_end_date zero_id zero_parent_id zero_row get_zero_idx spatial_hierarchy region_names get_id_idx get_leaf_idxs

Documented in get_id_idx get_leaf_idxs get_zero_idx load_spatial_relationship np_rows region_names spatial_hierarchy zero_region_code zero_row

#' Load relationship file
#'
#' The relationship file lists each region code, level, 
#' its parent region code, and the parent region level
#' along with the date when the region enters existence
#' and when the region ceases existence.  This is a way
#' of representing a hierarchy of groups that does not 
#' require all parts of the hierarchy to have the same
#' number of levels.
#'
#' When the talbe is loaded we create a unique id that is
#' the combination of the parent_region_code and the parent_region_level
#' so that the region_code field is not required to be unique.
#'
#' @param file path to .csv file describing a spatial relationship
#' @return table describing the spatial relationship
#' @export
load_spatial_relationship = function(file) {
  expected_columns = c("region_code", "region_level", 
    "parent_region_code", "parent_region_level",
    "start_date", "end_date")
  table = load_csv(file, "spatial_relationship_file")
  checkmate::assertDataFrame(table, col.names = "named", 
    ncol = length(expected_columns))
  checkmate::assertNames(names(table), must.include = expected_columns)

  table[['start_date']] = lubridate::ymd(table[['start_date']])
  table[['end_date']] = lubridate::ymd(table[['end_date']])
  table[['id']] = paste(table[['region_code']], table[['region_level']])
  table[['parent_id']] = paste(table[['parent_region_code']], table[['parent_region_level']]) 
  return(table) 
}

#' Make h_table no-parent rows 
#'
#' The no-parent rows are the parents of all regions that
#' themselves have no parent regions.  These are identified
#' as the rows who's parent_region_id does not appear in the 
#' 'id' column.  They are added so that all parts of the hierarchy
#' terminate at a well-defined "parent_region_level" regardless of the
#' depth of the hierarchy.
#'
#' @param table h_table to check for no-parent codes.
#' @return data frame rows for all no-parent rows (with ZERO as parent).
np_rows = function(table) {
  np_idxs = which(!(table[['parent_id']] %in% table[['id']]))
  np_rows = data.frame(
    region_code = table[['parent_region_code']][np_idxs], 
    region_level = table[['parent_region_level']][np_idxs],
    parent_region_code = zero_region_code(), 
    parent_region_level = zero_region_level(),
    start_date = table[['start_date']][np_idxs],
    end_date = table[['end_date']][np_idxs],
    id = table[['parent_id']][np_idxs],
    parent_id = zero_parent_id(),
    stringsAsFactors = FALSE)
  np_rows = unique(np_rows)
  return(np_rows)
}

#' Avoid hardcoding strings everywhere.
#'
#' @export zero_region_code
#' @export zero_region_level
#' @export zero_start_date
#' @export zero_end_date
#' @export zero_id
#' @export zero_parent_id
zero_region_code = function() "ZERO"
zero_region_level = function() "ZERO_LEVEL"
zero_start_date = function() NA
zero_end_date = function() NA
zero_id = function() paste(zero_region_code(), zero_region_level())
zero_parent_id = function() paste(zero_region_code(), zero_region_level())

#' Make h_table zero row.
#'
#' This row is the parent of the parents of rows with no parents.
#' It exists so that all rows have parents, even the rows that are
#' constructed to be the parents of rows with no parents...
#' 
#' @return an 'h_table' zero_Row
zero_row = function() {
  zero_row = data.frame(
    region_code = zero_region_code(), 
    region_level = zero_region_level(),
    parent_region_code = zero_region_code(), 
    parent_region_level = zero_region_level(),
    start_date = zero_start_date(), 
    end_date = zero_end_date(),
    id = zero_id(), 
    parent_id = zero_parent_id(),
    stringsAsFactors = FALSE
  )
  return(zero_row)
}

#' Return the index of the zero-value entry in the parameter index
#'
#' @return index of the zero entry in the 'h_table'
get_zero_idx = function() return(1)


#' Generate a single hierarchy table for an implied hierarchy
#'
#' The first item describes the terminal level for all levels.
#' Each item describes one level of the hierarchy.  That's an 'h_table'.
#'
#' @param files paths to .csv files describing spatial relationships
#' @return single hierarchy table
#' @export
spatial_hierarchy = function(files) {
  tables = lapply(files, load_spatial_relationship)
  table = do.call(rbind, c(tables, stringsAsFactors = FALSE))
  h_table = do.call(rbind, c(list(zero_row(), table, np_rows(table), stringsAsFactors = FALSE)))
  class(h_table) = c(class(h_table), 'h_table')
  return(h_table)
}

#' Get names of levels from a hierarchy table.
#' 
#' @param table table generated by `spatial_hierarchy`
#' @return names names of hierarchy levels
#' @export
region_names = function(table) {
  nm = table[['region_level']]
  nm = unique(nm)
  return(nm)
}

#' For a code return the index if a code exists or a zero otherwise
#' 
#' @param id paste(code, level)
#' @param table h_table object.
get_id_idx = function(id, table) {
  idx = which(id == table[['id']])
  if (length(idx) == 0)
    idx = get_zero_idx()
  return(idx)
}

#' Get the leaf indexes for an 'h_table'
#'
#' A leaf index is one that does _not_ appear as a parent.
#' 
#' @param table 'h_table'
get_leaf_idxs = function(table) {
  ids = table[['id']]
  leaf_idxs = which(!(ids %in% table[['parent_id']]))
  return(leaf_idxs)
}
sakrejda/hierarchy documentation built on Aug. 23, 2019, 7:15 p.m.