R/gd_compute_dist_stats.R

Defines functions gd_compute_dist_fit_lb gd_compute_dist_fit_lq check_curve_validity_dist_lb gd_select_lorenz_dist gd_estimate_dist_stats_lb gd_lq_key_values gd_estimate_dist_stats_lq gd_compute_dist_stats

Documented in check_curve_validity_dist_lb gd_compute_dist_fit_lb gd_compute_dist_fit_lq gd_compute_dist_stats gd_estimate_dist_stats_lb gd_estimate_dist_stats_lq gd_lq_key_values gd_select_lorenz_dist

#' Computes distributional statistics (grouped)
#'
#' Compute distributional statistics for grouped data by selecting the best
#' functional fit for the Lorenz curve (either beta or quadratic).
#'
#' @inheritParams gd_compute_pip_stats
#' @inheritParams gd_compute_dist_stats_lb
#'
#' @return list
#' @keywords internal
#' @examples
#' # Compute distributional statistics
#' res <- wbpip:::gd_compute_dist_stats(
#'  welfare = grouped_data_ex2$welfare,
#'  population = grouped_data_ex2$weight,
#'  mean = 50)
#'
gd_compute_dist_stats <- function(welfare,
                                  population,
                                  mean,
                                  p0 = 0.5) {


  # Apply Lorenz quadratic fit ----------------------------------------------

  # STEP 1: Prep data to fit functional form
  prepped_data <- create_functional_form_lq(
    welfare = welfare, population = population
  )

  # STEP 2: Estimate regression coefficients using LQ parameterization
  reg_results_lq <- regres(prepped_data, is_lq = TRUE)
  A <- reg_results_lq$coef[1]
  B <- reg_results_lq$coef[2]
  C <- reg_results_lq$coef[3]
  kv <- gd_lq_key_values(A, B, C)

  # STEP 3: Compute Sum of Squared Error
  reg_results_lq[["sse"]] <- gd_compute_dist_fit_lq(welfare = welfare,
                                                    population = population,
                                                    A = A,
                                                    B = B,
                                                    C = C,
                                                    key_values = kv)

  # STEP 3: Calculate distributional stats
  # Compute key numbers from Lorenz quadratic form


  results_lq <- gd_estimate_dist_stats_lq(mean = mean,
                                          p0 = p0,
                                          A = A,
                                          B = B,
                                          C = C,
                                          key_values = kv)

  results_lq <- append(results_lq, reg_results_lq)

  # Apply Lorenz beta fit ---------------------------------------------------

  # STEP 1: Prep data to fit functional form
  prepped_data <- create_functional_form_lb(
    welfare = welfare, population = population
  )

  # STEP 2: Estimate regression coefficients using LB parameterization
  reg_results_lb <- regres(prepped_data, is_lq = FALSE)
  A <- reg_results_lb$coef[1]
  B <- reg_results_lb$coef[2]
  C <- reg_results_lb$coef[3]

  # STEP 3: Compute Sum of Squared Error
  reg_results_lb[["sse"]] <- gd_compute_dist_fit_lb(welfare = welfare,
                                                    population = population,
                                                    A = A,
                                                    B = B,
                                                    C = C)

  # STEP 3: Calculate distributional stats
  results_lb <- gd_estimate_dist_stats_lb(mean = mean,
                                          p0 = p0,
                                          A = A,
                                          B = B,
                                          C = C)

  results_lb <- append(results_lb, reg_results_lb)

  # Apply selection rules -----------------------------------------------

  # STEP 4: Select best fit
  out <- gd_select_lorenz_dist(
    lq = results_lq, lb = results_lb
  )

  # Return only subset of variables
  out <- out[c(
    "mean",
    "median",
    "gini",
    "mld",
    "polarization",
    "deciles"
  )]

  return(out)
}


#' gd_estimate_dist_stats_lq
#' Estimates distributional stats from Quadratic Lorenz fit
#' @inheritParams gd_estimate_lq
#' @return list
#' @keywords internal
gd_estimate_dist_stats_lq <- function(mean, p0, A, B, C, key_values) {

  # Compute Lorenz quadratic  -----------------------------------------------

  validity <- check_curve_validity_lq(A,
                                      B,
                                      C,
                                      key_values = key_values)

  # Compute distributional measures -----------------------------------------

  dist_stats <- gd_compute_dist_stats_lq(mean,
                                         p0,
                                         A,
                                         B,
                                         C,
                                         key_values = key_values)

  out <- list(
    mean = mean,
    gini = dist_stats$gini,
    median = dist_stats$median,
    rmhalf = dist_stats$rmhalf,
    polarization = dist_stats$polarization,
    ris = dist_stats$ris,
    mld = dist_stats$mld,
    dcm = dist_stats$dcm,
    deciles = dist_stats$deciles,
    is_valid = validity$is_valid
  )

  return(out)
}


#' Calculates the key values in Table 2 of Datt (1998)
#' @inheritParams gd_estimate_lq
#' @return list
#' @keywords internal
gd_lq_key_values <- function(A, B, C) {

  # Theorem 3 from original Lorenz quadratic paper
  e <- -(A + B + C + 1) # e = -(A + B + C + 1): condition for the curve to go through (1, 1)
  m <- (B^2) - (4 * A) # m < 0: condition for the curve to be an ellipse (m is called \alpha original in paper)
  n <- (2 * B * e) - (4 * C) # n is called $\beta$ in original paper
  r <- sqrt((n^2) - (4 * m * e^2))  # r is called $K*2\alpha$ in original paper

  s1 <- (r - n) / (2 * m)
  s2 <- -(r + n) / (2 * m)

#   ____________________________________________________________________________
#   Return                                                                  ####
  l_res <- list(
    e = e,
    m = m,
    n = n,
    r = r,
    s1 = s1,
    s2 = s2
  )
  return(l_res)

}



#' Estimates distributional stats from beta Lorenz fit
#' @inheritParams gd_estimate_lb
#' @return list
#' @keywords internal
gd_estimate_dist_stats_lb <- function(mean, p0, A, B, C) {

  # Check validity
  validity <- check_curve_validity_dist_lb(A, B, C)

  # Compute distributional measures
  dist_stats <-
    gd_compute_dist_stats_lb(mean, p0, A, B, C)

  out <- list(
    gini = dist_stats$gini,
    median = dist_stats$median,
    rmhalf = dist_stats$rmhalf,
    polarization = dist_stats$polarization,
    ris = dist_stats$ris,
    mld = dist_stats$mld,
    dcm = dist_stats$dcm,
    deciles = dist_stats$deciles,
    is_valid = validity$is_valid
  )

  return(out)
}

#' gd_select_lorenz_dist
#' Select best Lorenz fit for distributional statistics
#' @inheritParams gd_select_lorenz
#' @return list
#' @keywords internal
gd_select_lorenz_dist <- function(lq, lb) {

  # Set default value
  datamean <- lq[["mean"]]
  is_valid <- lq[["is_valid"]] | lb[["is_valid"]]
  # is_normal <- lq[["is_normal"]] | lb[["is_normal"]]

  # Selection of Lorenz fit for distributional statistics
  use_lq_for_dist <-
    use_lq_for_distributional(lq = lq, lb = lb)

  # Retrieve distributional statistics
  dist <-
    retrieve_distributional(
      lq = lq,
      lb = lb,
      is_valid = is_valid,
      use_lq_for_dist = use_lq_for_dist
    )

  return(list(
    mean             = datamean,
    z_min            = dist[["z_min"]],
    z_max            = dist[["z_max"]],
    gini             = dist[["gini"]],
    median           = dist[["median"]],
    rmhalf           = dist[["rmhalf"]],
    polarization     = dist[["polarization"]],
    ris              = dist[["ris"]],
    mld              = dist[["mld"]],
    deciles          = dist[["deciles"]],
    sse              = dist[["sse"]]
  ))
}


#' check_curve_validity_dist_lb
#' Check validity of Lorenz beta fit for distributional statistics
#' @inheritParams check_curve_validity_lb
#' @return list
#' @keywords internal
check_curve_validity_dist_lb <- function(A, B, C) {
  is_valid <- TRUE

  for (w in seq(from = 0.001, to = 0.1, by = 0.05)) {
    if (derive_lb(w, A, B, C) < 0) {
      is_valid <- FALSE
      break
    }
  }

  if (is_valid) {
    for (w in seq(from = 0.001, to = 0.999, by = 0.05)) {
      if (DDLK(w, A, B, C) < 0) { # What does DDLK stands for?? What does it do?
        is_valid <- FALSE
        break
      }
    }
  }

  return(list(
    is_valid = is_valid
  ))
}

#' Computes the sum of squares of error
#'
#' Measures the fit of the model to the data for distributional statistics.
#'
#' @inheritParams gd_compute_fit_lq
#' @inheritParams gd_estimate_lq
#' @return list
#' @keywords internal
gd_compute_dist_fit_lq <- function(welfare,
                                   population,
                                   A,
                                   B,
                                   C,
                                   key_values) {

  sse <- 0L # Sum of square error

  for (i in seq_len(length(welfare) - 1)) {
    residual <- welfare[i] - value_at_lq(population[i], A, B, C, key_values)
    residual_sq <- residual^2
    sse <- sse + residual_sq
  }

  return(sse)
}

#' Computes the sum of squares of error
#' Measures the fit of the model to the data for distributional statistics.
#'
#' @inheritParams gd_compute_fit_lb
#' @inheritParams gd_estimate_lb
#'
#' @return list
#' @keywords internal
gd_compute_dist_fit_lb <- function(welfare,
                                   population,
                                   A,
                                   B,
                                   C) {

  sse <- 0 # Sum of square error

  for (i in seq_along(welfare[-1])) {
    residual <- welfare[i] - value_at_lb(population[i], A, B, C)
    residual_sq <- residual^2
    sse <- sse + residual_sq
  }

  return(sse)
}
PIP-Technical-Team/wbpip documentation built on Nov. 29, 2024, 6:57 a.m.