R/fill_gaps.R

Defines functions check_inputs_fill_gaps weighted_average_poverty_stats fg_adjust_poverty_stats fg_create_params fg_compute_pip_stats fill_gaps

Documented in fill_gaps

# Add global variables to avoid NSE notes in R CMD check
if (getRversion() >= "2.15.1") {
  utils::globalVariables(
    c(
      "df0", "predicted_request_mean", "request_year",
      "survey_year", "data", "poverty_line",
      "distribution_type", "default_ppp"
    )
  )
}

#' Fill gaps
#'
#' Calculate poverty statistics for a request year for which survey data is not
#' available.
#'
#' The predicted request year mean(s) must be in comparable international
#' dollars and adjusted for differences in purchasing-power, and changes in
#' prices and currencies.
#'
#' The survey data must contain a column named *welfare* and optionally a column
#' named *weight* if welfare values are to be weighted.
#'
#' @param request_year integer: A value with the request year.
#' @param data list: A list with one or two data frames containing survey data.
#'   See details.
#' @param predicted_request_mean numeric: A vector with one or two survey means.
#'   See details.
#' @param survey_year numeric: A vector with one or two survey years.
#' @param poverty_line numeric: Daily poverty line in international dollars.
#' @param default_ppp numeric: Default purchasing power parity.
#' @param ppp numeric: PPP request by user.
#' @param distribution_type character: A vector with the type of distribution,
#'   must be either micro, group, aggregate or imputed.
#'
#' @seealso [deflate_welfare_mean()] [predict_request_year_mean()]
#'
#' @examples
#' # Load example data
#' data("md_ABC_2000_income")
#' data("md_ABC_2010_income")
#' md_ABC_2010_income <-
#'   wbpip:::md_clean_data(md_ABC_2010_income,
#'     welfare = "welfare",
#'     weight = "weight"
#'   )$data
#'
#' # Extrapolation
#' res <- fill_gaps(
#'   request_year = 2005,
#'   survey_year = 2000,
#'   data = list(df0 = md_ABC_2000_income),
#'   predicted_request_mean = 13,
#'   default_ppp = 1,
#'   distribution_type = "micro",
#'   poverty_line = 1.9
#' )
#'
#' # Interpolation (monotonic)
#' res <- fill_gaps(
#'   request_year = 2005,
#'   survey_year = c(2000, 2010),
#'   data = list(df0 = md_ABC_2000_income, df1 = md_ABC_2010_income),
#'   predicted_request_mean = c(13, 13),
#'   default_ppp = c(1, 1),
#'   distribution_type = "micro",
#'   poverty_line = 1.9
#' )
#'
#' # Interpolation (non-monotonic)
#' res <- fill_gaps(
#'   request_year = 2005,
#'   survey_year = c(2000, 2010),
#'   data = list(df0 = md_ABC_2000_income, df1 = md_ABC_2010_income),
#'   predicted_request_mean = c(14, 17),
#'   default_ppp = c(1, 1),
#'   distribution_type = "micro",
#'   poverty_line = 1.9
#' )
#' @export
fill_gaps <- function(request_year,
                      data = list(df0, df1 = NULL),
                      predicted_request_mean,
                      survey_year,
                      default_ppp,
                      ppp = NULL,
                      distribution_type,
                      poverty_line = 1.9) {

  # CHECKS
  environment(check_inputs_fill_gaps) <- environment()
  check_inputs_fill_gaps()

  # Calculate poverty stats
  out <- fg_compute_pip_stats(
    request_year = request_year,
    data = data,
    predicted_request_mean = predicted_request_mean,
    survey_year = survey_year,
    default_ppp = default_ppp,
    ppp = ppp,
    distribution_type = distribution_type,
    poverty_line = poverty_line
  )

  return(out)
}

#' fg_compute_pip_stats
#'
#' A helper function for calculating interpolated or extrapolated poverty
#' statistics.
#'
#' @inheritParams fill_gaps
#' @return list
#' @noRd
fg_compute_pip_stats <- function(request_year,
                                 data,
                                 predicted_request_mean,
                                 survey_year,
                                 default_ppp,
                                 ppp,
                                 distribution_type,
                                 poverty_line) {

  # Set type
  type <- distribution_type
  if (length(type) == 1 & length(predicted_request_mean) == 2) {
    type <- rep(type, 2)
  }

  # Number of supplied surveys
  n_surveys <- length(type)

  # Create list of parameters
  params <- fg_create_params(
    predicted_request_mean = predicted_request_mean,
    data = data,
    poverty_line = poverty_line,
    default_ppp = default_ppp,
    ppp = ppp
  )

  # Calculate poverty stats
  dl <- vector(mode = "list", length = n_surveys)
  for (i in seq_along(type)) {
    dl[[i]] <- do.call(fg_select_compute_pip_stats[[type[i]]], params[[i]])
  }

  # If interpolating between two surveys then calculate
  # a weighted average for the request year
  if (n_surveys == 2) {
    out <- fg_adjust_poverty_stats(dl[[1]], dl[[2]], survey_year, request_year)
    # Else returned the extrapolation for the request year as is
  } else {
    out <- dl[[1]]
  }

  return(out)
}

#' fg_create_params
#'
#' Create parameters to be used in `fg_compute_pip_stats()`.
#'
#' @inheritParams fg_compute_pip_stats
#' @return list
#' @noRd
fg_create_params <- function(predicted_request_mean,
                             data,
                             poverty_line,
                             default_ppp,
                             ppp) {

  # If one survey
  if (length(predicted_request_mean) == 1) {
    params <- list(
      params0 = list(
        welfare = data$df0$welfare,
        population = data$df0$weight,
        povline = poverty_line,
        default_ppp = default_ppp[1],
        ppp = ppp,
        requested_mean = predicted_request_mean[1]
      )
    )
    # If two surveys (micro or grouped)
  } else {
    params <- list(
      params0 = list(
        welfare = data$df0$welfare,
        population = data$df0$weight,
        povline = poverty_line,
        default_ppp = default_ppp[1],
        ppp = ppp,
        requested_mean = predicted_request_mean[1]
      ),
      params1 = list(
        welfare = data$df1$welfare,
        population = data$df1$weight,
        povline = poverty_line,
        default_ppp = default_ppp[2],
        ppp = ppp,
        requested_mean = predicted_request_mean[2]
      )
    )
  }

  return(params)
}

#' fg_select_compute_pip_stats
#'
#' A small wrapper function for picking the correct `compute_pip_stats()`
#' function depending on distribution type.
#'
#' @noRd
fg_select_compute_pip_stats <- list(
  micro = function(...) md_compute_pip_stats(...),
  group = function(...) gd_compute_pip_stats(...),
  aggregate = function(...) gd_compute_pip_stats(...),
  imputed = function(...) md_compute_pip_stats(...)
)

#' fg_adjust_poverty_stats
#'
#' Calculate a weighted average for poverty statistics based on the difference
#' between the two survey_years and the request year. This is used when the
#' interpolation method is non-monotonic.
#'
#' @param stats0 list: A list with poverty statistics.
#' @param stats1 list: A list with poverty statistics.
#' @param request_year integer: A value with the request year.
#' @param survey_year numeric: A vector with one or two survey years.
#' @return numeric
#' @noRd
fg_adjust_poverty_stats <- function(stats0, stats1, survey_year, request_year) {

  out <- weighted_average_poverty_stats(stats0, stats1, survey_year, request_year)
  # Set distributional statistics to missing
  # It does not make sense to interpolate these values
  out[c("polarization", "gini", "mld", "median", "deciles")] <- NA_real_

  return(out)
}

#' weighted_average_poverty_stats
#' @return numeric
#' @noRd
weighted_average_poverty_stats <- function(stats0, stats1, survey_year, request_year) {
  # Calculate a weighted average for the poverty stats by taking the
  # difference between the two survey years and the request year
  mapply(function(measure0, measure1) {
    ((survey_year[2] - request_year) * measure0 +
       (request_year - survey_year[1]) * measure1) /
      (survey_year[2] - survey_year[1])
  },
  stats0, stats1, SIMPLIFY = FALSE)
}

#' check_inputs_fill_gaps
#' @return logical
#' @noRd
check_inputs_fill_gaps <- function() {

  # Check for incorrect distribution types
  for (i in seq_along(distribution_type)) {
    if (!distribution_type[i] %in% c("micro", "group", "aggregate", "imputed")) {
      rlang::abort(
        c("Incorrect value in `distribution_type`:",
          i = "`distribution_type` accepts the following values; 'micro', 'group, 'aggregate' and 'imputed'.",
          x = sprintf("You've supplied '%s'.", distribution_type[i])
        )
      )
    }
  }

  # Check for names in data input
  if (!"df0" %in% names(data)) {
    rlang::abort(c("`data$df0` not found."))
  }
  if (length(data) == 2 & !"df1" %in% names(data)) {
    rlang::abort(c("`data$df1` not found."))
  }

  # CHECK for column names
  if (!"welfare" %in% colnames(data$df0)) {
    rlang::abort(c("`data$df0` needs to contain a column named welfare."))
  }
  if (!is.null(data$df1) & !"welfare" %in% colnames(data$df1)) {
    rlang::abort(c("`data$df1` needs to contain a column named welfare."))
  }

  # CHECK for correct classes
  if (!is.numeric(request_year)) {
    rlang::abort(c("`request_year` must be a numeric or integer vector:",
      x = sprintf(
        "You've supplied an object of class %s.",
        class(request_year)
      )
    ))
  }
  if (!is.numeric(predicted_request_mean)) {
    rlang::abort(c("`predicted_request_mean` must be a numeric or integer vector:",
      x = sprintf(
        "You've supplied an object of class %s.",
        class(predicted_request_mean)
      )
    ))
  }
  if (!is.numeric(survey_year)) {
    rlang::abort(c("`survey_year` must be a numeric or integer vector:",
      x = sprintf(
        "You've supplied an object of class %s.",
        class(survey_year)
      )
    ))
  }
  if (!is.numeric(poverty_line)) {
    rlang::abort(c("`poverty_line` must be a numeric or integer vector:",
      x = sprintf(
        "You've supplied an object of class %s.",
        class(poverty_line)
      )
    ))
  }
  if (!is.numeric(default_ppp)) {
    rlang::abort(c("`default_ppp` must be a numeric or integer vector:",
      x = sprintf(
        "You've supplied an object of class %s.",
        class(default_ppp)
      )
    ))
  }
  if (!is.numeric(data$df0$welfare)) {
    rlang::abort(c("`data$df0$welfare` must be a numeric or integer vector:",
      x = sprintf(
        "You've supplied an object of class %s.",
        class(data$df0$welfare)
      )
    ))
  }
  if (!is.null(data$df1) & !is.numeric(data$df1$welfare)) {
    rlang::abort(c("`data$df1$welfare` must be a numeric or integer vector:",
      x = sprintf(
        "You've supplied an object of class %s.",
        class(data$df1$welfare)
      )
    ))
  }

  # CHECK for compatible lengths
  if (length(survey_year) > 2) {
    rlang::abort(c("`survey_year` has too many values.",
      i = "You can't interpolate between more than two surveys."
    ))
  }
  if (length(predicted_request_mean) > 2) {
    rlang::abort(c("`predicted_request_mean` has too many values.",
      i = "You can't interpolate between more than two surveys."
    ))
  }
  if (length(distribution_type) > 2) {
    rlang::abort(c("`distribution_type` has too many values.",
      i = "You can't interpolate between more than two surveys."
    ))
  }
  if (length(request_year) > 1) {
    rlang::abort(c("`request_year` has too many values.",
      i = "You can only interpolate or extrapolate to one request year at a time."
    ))
  }
  if (length(poverty_line) > 1) {
    rlang::abort(c("`poverty_line` has too many values.",
      i = "You can only supply one poverty line at a time."
    ))
  }
  if (length(survey_year) != length(predicted_request_mean)) {
    rlang::abort(c("`survey_year` and `predicted_request_mean` must have compatible lengths:",
      x = sprintf(
        "`survey_year` has length %s.",
        length(survey_year)
      ),
      x = sprintf(
        "`predicted_request_mean` has length %s.",
        length(predicted_request_mean)
      )
    ))
  }
  if (length(predicted_request_mean) == 2 & is.null(data$df1)) {
    rlang::abort(c("You supplied two survey means, but only one survey data frame.",
      i = "Pass an additonal data frame to argument `df1 in `data`."
    ))
  }

  # CHECK for incorrect NA's
  if (is.na(request_year)) {
    rlang::abort("`request_year` can't be NA.")
  }
  if (anyNA(survey_year)) {
    rlang::abort(c("`survey_year` can't contain missing values:",
      x = sprintf(
        "Found %s missing values in `survey_year.`",
        sum(is.na(survey_year))
      )
    ))
  }
  if (anyNA(predicted_request_mean)) {
    rlang::abort(c("`predicted_request_mean` can't contain missing values:",
      x = sprintf(
        "Found %s missing values in `predicted_request_mean`",
        sum(is.na(predicted_request_mean))
      )
    ))
  }
  if (is.na(poverty_line)) {
    rlang::abort("`poverty_line` can't be NA.")
  }
}
PIP-Technical-Team/wbpip documentation built on Nov. 29, 2024, 6:57 a.m.