R/Population.R

Defines functions plot_relatioship print update update_hhsize household_type remove_emptied_households leave_household join_household

#' @title Population
#'
#' @description
#'
#' A [Container] specifically made for a [Individual] object and a [Household] object.
#' It also contains methods that modify both if its contained objects simutaneously.
#'
#' @usage NULL
#' @format [R6::R6Class] object inheriting from [Container]<-[ContainerGeneric]<-[Generic].
#' @include Individual.R
#' @include Household.R
#'
#' @section Construction:
#'
#' ```
#' Population$new(ind_data, hh_data, pid_col, hid_col)
#' ```
#'
#' * ind_data::[data.table::data.table]\cr
#'   Microdata of Individuals/Persons.
#'
#' * hh_data::[data.table::data.table]\cr
#'   Microdata of Households.
#'
#' * pid_col::`character(1)`\cr
#'   Individual/Person id column in `ind_data`.
#'
#' * hid_col::`character(1)`\cr
#'   Household id column in `hh_data`
#'
#' @section Public Fields:
#'
#' * `ind`:: an [Individual] object\cr
#'  Shorthand to the [Individual] object. This will be deprecated in a future release.
#'  Please use `$get()` to get a reference of object instead of this.
#'
#' * `hh`:: an [Household] object\cr
#'  Shorthand to the [Household] object. This will be deprecated in a future release.
#'  Please use `$get()` to get a reference of object instead of this.
#'
#' @section Public Methods:
#'
#' * `add_population(ind_data, hh_data)`\cr
#'  ([data.table::data.table()], [data.table::data.table()])\cr
#'  Add a new population. This requires that all individuals `ind_data` belong
#'  to valid households. In the case that `hh_data` is not provided, household ids of
#'  `ind_data` will be checked against the household ids of the existing households.
#'  All records in `ind_data` and `hh_data` will be assigned new unique ids to
#'  make sure that their ids are not a duplicate of the ids of existing entities
#'  of their respective entity type.
#'
#' * `join_household(ind_ids, hh_ids)`\cr
#'  (`integer()`, `integer()`)\cr
#'  Individuals join their new households and the households' affected attributes,
#'  from the joining of new members, will also be updated. Note! All individuals
#'  must not be in any household prior to joining a new one. Individual's
#'  existing household can be removed using `leave_household`.
#'
#' * `leave_household(ind_ids)`\cr
#'  (`integer()`)\cr
#'  Remove the household ids of the individuals in ind_ids and update the households'
#'  affected attributes, from their members leaving. Note that, if the
#'  household has no individuals then it will be removed. This will only cause
#'  a problem if all members of two or more households are to swap their households.
#'  There are no good reasons why that case should be allowed anyway.
#'
#' * `remove_emptied_households()`\cr
#'  Remove all emptied households.
#'
#' * `remove_population(pid, hid)`\cr
#'  (`integer()`, `integer()`)\cr
#'  Remove population from `$ind` and `$hh` of this `Pop` object. If only `hid`
#'  is given all household members of households in `hid` arg will be removed.
#'  To remove only individuals leave `hid` to NULL and specify individuals by their ids
#'  in `pid`.
#'
#' * `get_hhsize(hids = NULL)`\cr
#'  (`integer()` | `NULL`) -> (`integer()`)\cr
#'  Get household size of the households in `hids` if `NULL` then household size
#'  of all households will be returned.
#'
#' * `update_hhsize()`\cr
#'  Update household size of all household agents.
#'
#' * `update()`\cr
#'  Masks all the household update functions that need to be adjust after changes
#'  in household members or in their attributes; such as change in partnership status,
#'  change of income, birth.
#'
#' * `check_unique_id_cols(ind_data, hh_data = NULL)`\cr
#'  ([data.table::data.table()], [data.table::data.table()]) -> `logical(1)`\cr
#'  Check that all id cols of the input data are unique from the existing ids in
#'  their respective objects.
#'
#' * `plot_relationship(hid)`\cr
#'  (`integer(1)`)\cr
#'  Plot the relationship network within the household of `hid`.
#'
#' * `household_type(hid)`\cr
#'  (`integer()`) -> (`character()`)\cr
#'  Return the household type classification result of the households in `hid`.
#'  The result has be one of the following: 'couple_hh', 'couple_hh_with_children',
#'  'lone_parent_hh' and 'non_family_hh'. Note that, the classification doesn't
#'  take into account of the reference family of the household as we have yet to
#'  implement explicit distinction between single family household and multi-family
#'  household. Hence, if there are a couple and a lone parent residing in the same
#'  household it would be classified as a `couple_hh`.
#' @export
Population <- R6Class(
  "Population",
  inherit = Container,
  public = list(
    # public ------------------------------------------------------------------
    ind = NULL,
    hh = NULL,
    initialize = function(ind_data, hh_data, pid_col = NULL, hid_col = NULL) {
      checkmate::assert_data_table(ind_data, min.rows = 1)
      checkmate::assert_data_table(hh_data, min.rows = 1)
      checkmate::assert_character(pid_col, any.missing = FALSE, min.len = 1, unique = T)
      checkmate::assert_character(hid_col, any.missing = FALSE, min.len = 1, unique = T)
      checkmate::assert_names(names(ind_data), must.include = c(pid_col, hid_col))
      checkmate::assert_names(names(hh_data), must.include = hid_col)
      checkmate::assert_integerish(ind_data[[pid_col[1]]], lower = 1, unique = T, all.missing = FALSE)
      checkmate::assert_integerish(hh_data[[hid_col[1]]], lower = 1, unique = T, all.missing = FALSE)

      if (!checkmate::test_set_equal(unique(ind_data[[hid_col]]), hh_data[[hid_col]])) {
        stop(
          glue::glue(
            "Some ids in `hid_col` are not linkable between `ind_data` or `hh_data`. \\
             Please check for missing ids."
          )
        )
      }

      if (!"hhsize" %in% names(hh_data)) {
        lg$warn("Creating a `hhsize` column in `hh_data` as it is not provided.")
        hhsize_dt <- ind_data[, .(hhsize = .N), by = c(hid_col)]
        hh_data <- hh_data[hhsize_dt, , on = c(hid_col)]
      } else {
        checkmate::assert_integerish(hh_data[["hhsize"]],
          lower = 1,
          any.missing = FALSE,
          null.ok = FALSE
        )
      }

      if (nrow(ind_data) != hh_data[, sum(hhsize)]) {
        stop(glue::glue("The total number of individuals in `ind_data` does not \\
                        equal to the sum of household size (hhsize) of `hh_data`."))
      }

      self$add(Individual$new(ind_data, id_col = pid_col, hid_col = hid_col[1]), name = "Individual")
      self$add(Household$new(hh_data, id_col = hid_col), name = "Household")

      # make it compatible with old modules
      self$ind <- self$get("Individual")
      self$hh <- self$get("Household")

      return(invisible(self))
    },
    add_population = function(ind_data, hh_data = NULL) {
      Ind <- self$get("Individual")
      Hh <- self$get("Household")
      pid_col <- Ind$id_col
      hid_col <- Hh$id_col

      checkmate::assert_data_table(ind_data, min.rows = 1)
      checkmate::assert_character(pid_col, any.missing = FALSE, min.len = 1, unique = T)
      checkmate::assert_names(names(ind_data), must.include = c(pid_col, hid_col))
      checkmate::assert_integerish(ind_data[[pid_col[1]]], lower = 1, unique = T, all.missing = FALSE)

      if (!is.null(hh_data)) {
        checkmate::assert_data_table(hh_data, min.rows = 1)
        checkmate::assert_character(hid_col, any.missing = FALSE, min.len = 1, unique = T)
        checkmate::assert_names(names(hh_data), must.include = hid_col)
        checkmate::assert_integerish(hh_data[[hid_col[1]]], lower = 1, unique = T, all.missing = FALSE)

        if (!checkmate::test_set_equal(ind_data[[hid_col[1]]], hh_data[[hid_col[1]]], fmatch = TRUE)) {
          stop("Not all household ids exist in both `ind_data` and `hh_data`.")
        }

        # add household size
        if (!"hhsize" %in% names(hh_data)) {
          lg$warn("Creating `hhsize` as it is not provided with `hh_data`.")
          hhsize_dt <- ind_data[, .(hhsize = .N), by = c(hid_col)]
          hh_data <- hh_data[hhsize_dt, , on = c(hid_col)]
        } else {
          checkmate::assert_integerish(hh_data[["hhsize"]],
            lower = 1,
            any.missing = FALSE,
            null.ok = FALSE
          )
        }
        # check hhsize
        if (nrow(ind_data) != hh_data[, sum(hhsize)]) {
          stop(glue::glue("The total number of individuals in `ind_data` does not \\
                        equal to the sum of household size (hhsize) of `hh_data`."))
        }
      }

      # assign new ids
      ind_data <- register(Ind, ind_data)[[1]]
      if (!is.null(hh_data)) {
        pop_data_ls <- register(Hh, ind_data, hh_data)
        ind_data <- pop_data_ls$ind_data
        hh_data <- pop_data_ls$hh_data
      }
      rm(pop_data_ls)

      # add new data
      IndNewData <- DataBackendDataTable$new(ind_data, key = Ind$id_col[[1]])
      Ind$add(.data = IndNewData$data, add_population = TRUE)
      if (!is.null(hh_data)) {
        HhNewData <- DataBackendDataTable$new(hh_data, key = Hh$id_col[[1]])
        Hh$add(.data = HhNewData$data)
      }

      return(invisible(self))
    },
    join_household = function(ind_ids, hh_ids) {
      Ind <- self$get(Individual)
      Hh <- self$get(Household)
      assert_entity_ids(Ind, ind_ids)
      assert_entity_ids(Hh, hh_ids)
      # make sure all individuals in ind_ids don't have hid
      all_hids_are_na <-
        all(is.na(Ind$get_attr(x = Ind$get_hid_col(), ids = ind_ids)))
      if (!all_hids_are_na) {
        stop("Not all individuals in ind_ids have left their households.")
      }
      # update hid for individidual in ind_ids
      Ind$add_household_id(ids = ind_ids, hh_ids = hh_ids)
      add_history(entity = Ind, ids = ind_ids, event = EVENT$JOINED_HOUSEHOLD)
      # update household attributes
      self$update()
      invisible()
    },
    leave_household = function(ind_ids) {
      # check that ids in ind_ids and their household ids exist
      stopifnot(self$get("Individual")$ids_exist(ids = ind_ids))
      stopifnot(self$get("Household")$ids_exist(ids = self$get("Individual")$get_household_ids(ids = ind_ids)))
      # leave household
      self$get("Individual")$remove_household_id(ids = ind_ids)
      add_history(
        entity = self$get("Individual"),
        ids = ind_ids, event = EVENT$LEFT_HOUSEHOLD
      )
      # households update themselves
      self$update()
      invisible()
    },
    remove_emptied_households = function(update_hhsize = TRUE) {
      checkmate::assert_flag(update_hhsize, na.ok = FALSE)
      if (update_hhsize) {
        self$update_hhsize()
      }
      Hh <- self$get("Household")
      hh_with_hhsize_0 <- Hh$get_data()[hhsize == 0, get(Hh$get_id_col())]
      if (length(hh_with_hhsize_0) != 0) {
        self$log(desc = "n_emptied_households_removed", value = length(hh_with_hhsize_0))
        Hh$remove(ids = hh_with_hhsize_0)
      }
      invisible()
    },
    household_type = function(hids, .debug = FALSE) {
      Ind <- self$get("Individual")
      Hh <- self$get("Household")
      if (!missing(hids)) {
        assert_entity_ids(x = Hh, ids = hids, informative = T)
        idx <- which(Ind$get_attr(x = Ind$get_hid_col()) %in% hids)
      } else {
        hids <- unique(Ind$get_attr(Ind$get_hid_col()))
        idx <- seq_len(Ind$n())
      }
      household_type <-
        Ind$get_data()[idx, ] %>%
        # group ids
        .[, .(
          members = list(pid),
          parents = list(as.vector(na.omit(mother_id, father_id))),
          partners = list(as.vector(na.omit(partner_id)))
        ), by = c(Ind$get_hid_col())] %>%
        # identify relationships
        .[, `:=`(
          couple_hh = purrr::map2_lgl(members, partners, ~ {
            any(.y %in% .x)
          }),
          with_children = purrr::map2_lgl(members, parents, ~ {
            any(.y %in% .x)
          })
        )] %>%
        # household type classification
        .[, household_type := fcase(
          couple_hh & !with_children, "couple_hh",
          couple_hh & with_children, "couple_hh_with_children",
          !couple_hh & with_children, "lone_parent_hh",
          default = "non_family_hh"
        )] %>%
        # merge to sort in the original order of `hid`
        merge(
          data.table(id = hids),
          .,
          by.x = "id",
          by.y = Ind$get_hid_col(),
          sort = FALSE,
          allow.cartesian = FALSE
        )

      checkmate::assert_character(household_type[["household_type"]], any.missing = FALSE)

      if (.debug) {
        return(household_type)
      } else {
        return(household_type[["household_type"]])
      }
    },
    remove_population = function(pid, hid) {
      if (missing(pid) & missing(hid)) {
        stop("`pid` or `hid` or both must be specified.")
      }

      if (!missing(hid)) {
        checkmate::check_integerish(hid, lower = 1, any.missing = FALSE)
        member_ids <- self$get("Individual")$get_ids_in_hids(hids = hid)
        self$get("Individual")$remove(ids = member_ids)
        self$get("Household")$remove(ids = hid)
      }
      if (!missing(pid)) {
        checkmate::check_integerish(pid, lower = 1, any.missing = FALSE)
        self$get("Individual")$remove(ids = pid)
        self$remove_emptied_households(update_hhsize = TRUE)
      }
      invisible()
    },
    check_hhsize = function() {
      n_individuals <- self$get("Individual")$n()
      n_members_in_households <- sum(self$get_hhsize())
      n_households <- self$get("Household")$n()
      n_non_emptied_households <- sum(self$get_hhsize() != 0)
      n_emptied_households <- n_non_emptied_households - n_non_emptied_households
      if (n_households != n_non_emptied_households) {
        stop(glue::glue("Emptied households exist. There are {n_emptied_households} \\
                        empied households."))
      }
      lg$info("check_hhsize: returns consitence is true.")
      return(invisible(list(
        n_inds = n_individuals,
        n_members_in_households = n_members_in_households,
        n_households = n_households,
        n_non_emptied_households = n_non_emptied_households
      )))
    },
    check_unique_id_cols = function(ind_data, hh_data) {
      checkmate::assert_data_table(ind_data, null.ok = FALSE)
      # specify id cols to be checked
      pid_cols <- c(self$get("Individual")$get_id_col(), IND$ID_COLS)
      hid_col <- self$get("Household")$get_id_col()
      # extract all ids
      ind_data_pids <-
        ind_data[, unlist(lapply(.SD, unlist)), .SDcol = pid_cols] %>%
        unique() %>%
        .[!is.na(.)]
      # check uniqueness
      if (self$get("Individual")$ids_exist(ind_data_pids)) {
        stop("There are ids that exist in data already.")
      }
      # if no hh_data is given then all household id should be NA
      if (missing(hh_data)) {
        if (!all(is.na(ind_data[[hid_col]]))) {
          stop(glue::glue("Not all household ids are NAs. When hh_data is not \\
                            given it is expected that individuals in ind_data will \\
                            join existing households hence all their household id \\
                           which in this case is `{hid_col}` should all be NAs."))
        }
      }
      # for household id (hid)
      if (!missing(hh_data)) {
        checkmate::assert_data_table(hh_data, null.ok = FALSE)
        # extract all ids
        hh_data_hids <-
          hh_data[, unlist(.SD, use.names = FALSE), .SDcols = hid_col] %>%
          .[!is.na(.)]
        ind_data_hids <-
          ind_data[, unlist(.SD, use.names = FALSE), .SDcols = hid_col] %>%
          unique() %>%
          .[!is.na(.)]
        # check uniqueness
        if (self$get("Household")$ids_exist(hh_data_hids)) {
          stop("Some hids in hh_data exist in the household data of the existing population")
        }
        if (self$get("Household")$ids_exist(ind_data_hids)) {
          stop("Some hids in ind_data exist in the household data of the existing population")
        }
      }
      return(TRUE)
    },
    get_hhsize = function(hids = NULL) {
      hid_col <- self$get("Individual")$get_hid_col()
      if (is.null(hids)) {
        hhsize_dt <-
          self$get("Individual")$get_data()[, .(hhsize = .N), by = c(hid_col)]
        hids <- self$get("Household")$get_ids()
      } else {
        stopifnot(self$get("Household")$ids_exist(hids))
        hhsize_dt <-
          self$get("Individual")$get_data() %>%
          .[get(hid_col) %in% hids, .(hhsize = .N), by = c(hid_col)]
      }

      # make sure all hids in the household object are get returned
      # eventhough no individual agents belong to those household agents.
      hhsize_dt <-
        merge(
          x = data.table::data.table(hid = hids),
          y = hhsize_dt,
          by.x = "hid",
          by.y = hid_col,
          all.x = T, # this make sure all hids get returned
          sort = FALSE
        ) %>%
        # replace NAs with 0s
        .[is.na(hhsize), hhsize := 0]

      return(hhsize_dt[["hhsize"]])
    },
    update_hhsize = function() {
      hid_col <- self$get("Household")$get_id_col()
      self$get("Household")$get_data(copy = FALSE)[, hhsize := self$get_hhsize()]
      invisible()
    },
    update = function() {
      self$update_hhsize()
    },
    print = function() {
      super$print()
      for (e in self$Cont) {
        e$print()
      }
    },
    plot_relatioship = function(hid) {
      if (!requireNamespace("visNetwork", quietly = TRUE)) {
        .choice <-
          utils::menu(
            choices = c("Yes", "No"),
            title = glue::glue("plot_relationship needs the `visNetwork` package. \\
                                Would you like to download the sf package now?")
          )
        if (.choice == 1) {
          install.packages("visNetwork", repos = "https://cloud.r-project.org")
        } else {
          stop("The `visNetwork` package is not installed.")
        }
      }

      Ind <- self$get("Individual")
      Hh <- self$get("Household")

      members <- inspect(Hh, hid, Ind, verbose = FALSE)$related_entity

      nodes <- members[, .(
        id = pid,
        label = paste0(pid, ":", age),
        group = sex,
        title = paste0(
          "<p>pid:<b>", pid, "</b><br>",
          "Age:<b>", age, "</b><br>",
          "MS:<b>", marital_status, "</b></p>"
        )
      )]

      edges <-
        rbindlist(list(
          members[, .(from = pid, to = father_id, label = "father")],
          members[, .(from = pid, to = mother_id, label = "mother")],
          members[, .(from = pid, to = partner_id, label = "partner")]
        ))

      visNetwork::visNetwork(nodes, edges) %>%
        visNetwork::visEdges(
          shadow = TRUE,
          arrows = list(to = list(
            enabled = TRUE, scaleFactor = 1
          )),
          color = list(color = "lightblue", highlight = "red")
        ) %>%
        visNetwork::visGroups(
          groupname = "female",
          color = "salmon",
          shape = "circle",
          shadow = list(enabled = TRUE)
        ) %>%
        visNetwork::visGroups(
          groupname = "male",
          color = "#97C2FC",
          shape = "circle",
          shadow = list(enabled = TRUE)
        ) %>%
        visNetwork::visLegend(
          width = 0.2,
          position = "right",
          main = "Group"
        ) %>%
        visNetwork::visEdges(smooth = FALSE) %>%
        visNetwork::visInteraction(navigationButtons = TRUE)
    }
  )
)
dymium-org/dymiumCore documentation built on July 18, 2021, 5:10 p.m.