R/evaluate_internal.R
In calidad: Assesses the Quality of Estimates Made by Complex Sample Designs

Documented in quadratic

# Check that all the inputs are available
check_cepal_inputs <- function(table, var) {
  check_ess <- names(table) %>%  stringr::str_detect(pattern =  var) %>% sum()
  if (check_ess != 1) {stop(paste(var, "must be used!"))}
}


## check n_obj
check_n_obj_var <- function(table_n_obj, table , var = 'n_obj'){

  if(! var %in% names(table_n_obj)){
    if(!is.null(table_n_obj)){
      stop("Oops! n_obj missing in table_n_obj object. Please review your data.")
    }
    # message('n_obj missing in table_n_obj object')

    if((! var %in% names(table))){
      warning('n_obj missing in the table. \n
              The process will consider estimations with n < 30 as non-reliable.')

      return(FALSE)    # No merge

    }else{
      # message('n_obj in table!')
      return(FALSE)    # No merge
    }

  }else{

    return(TRUE)       # merge
  }
}

## check type col
check_type_cols <- function(tabla1, tabla2, columnas_comunes) {

  tipos_t1 <- sapply(tabla1 %>% dplyr::select(dplyr::all_of(columnas_comunes)), class)
  tipos_t2 <- sapply(tabla2 %>% dplyr::select(dplyr::all_of(columnas_comunes)), class)

  comparacion <- data.frame(
    col = columnas_comunes,
    table1 = sapply(tipos_t1[columnas_comunes], paste, collapse = ", "),
    table2 = sapply(tipos_t2[columnas_comunes], paste, collapse = ", "),
    coincide = tipos_t1[columnas_comunes] == tipos_t2[columnas_comunes],
    stringsAsFactors = FALSE)

    if(sum(comparacion$coincide)!=length(columnas_comunes)){
      print(comparacion)
      stop("Columns types doesn't match. Please review your data.")

    }
}


merge_columns <- function(table, table_n_obj){

  columns <- intersect(names(table), names(table_n_obj))

  check_type_cols(table, table_n_obj, columns)

  if(! (table_n_obj %>% dplyr::pull(.data$n_obj) %>% is.numeric())){
    warning('Oops! n_obj is not numeric. Please review your data. \n
            The process will consider estimations with n < 30 as unreliable')
    return(table)
  }

  if((length(columns)>0) & (nrow(table_n_obj) == nrow(table))){

    table_merge <- table %>%
      dplyr::left_join(table_n_obj, by = columns)


    if(nrow(table_merge) != nrow(table)){
      stop("Oops! Joining tables resulted in a different number of rows. Please review your data.")

      return(table)

    }else if(sum(is.na(table_merge %>% dplyr::pull(.data$n_obj)))>0){

      if (sum(table_merge$n<30, na.rm = T)>0){

        stop("Oops! NA values found in n_obj column and some rows where n < 30. Please review your data.")

      }else{

        warning("Oops! NA values found in n_obj column. The process will skip the sample recovery verification for those rows.")

        return(table_merge)
      }

      return(table)

    }else{
      return(table_merge)
    }


  }else{

    stop("Columns are missing in the table or the number of rows in table_n_obj and the table don't match. Please review your data or manually add n_obj to the table")

    return(table)
  }

}



#---------------------------------------------------------------------
#'
#' Calcula el valor de una función cuadrática
#'
#' \code{quadratic} returns the output of a particular function created by INE Chile, which
#' is assessed at the value of the estimated proportion from a sample. If the output of the
#' function is  higher than the standard error, it is interpreted as a signal that the
#' estimation is not reliable.
#'
#'
#' @param p numeric vector with the values of the estimations for proportions
#' @return  numeric vector
#'
quadratic <- function(p) {
  purrr::map_dbl(p, function(x) {
    if (x <= 0.5) {
      (x**(2/3))/9
    } else {
      ((1 - x)**(2/3))/9
    }
  })
}
#--------------------------------------------------------------------
assess_ine <- function(table, params, class = "calidad.mean", ratio_between_0_1 = TRUE) {

  # General case
  if (sum(class %in% c("calidad.mean", "calidad.size", "calidad.total")) == 1 | (sum(class %in% 'calidad.prop') == 1 & (sum(table$stat>1)>0 | !ratio_between_0_1))) {

    if ((ratio_between_0_1) & sum(class %in% 'calidad.prop') == 1){
      warning('Oops! A ratio estimation greater than 1 was detected. The assessment will use cv.')
    }


    evaluacion <- table %>%
      dplyr::filter(!is.na(.data$n) & !is.na(.data$df) & !is.na(.data$cv)) %>%
      dplyr::mutate(eval_n = dplyr::if_else(.data$n >= params$n, "sufficient sample size", "insufficient sample size"),
                    eval_df = dplyr::if_else(.data$df >= params$df, "sufficient df", "insufficient df"),
                    eval_cv = dplyr::case_when(
                      .data$cv <= params$cv_lower_ine  &  .data$cv > 0                ~ paste("cv <=", params$cv_lower_ine) ,
                      .data$cv > params$cv_lower_ine & .data$cv <= params$cv_upper_ine ~ paste("cv between", params$cv_lower_ine, "and", params$cv_upper_ine),
                      .data$cv > params$cv_upper_ine                                        ~ paste("cv >", params$cv_upper_ine)
                    ),
                    label = dplyr::case_when(
                      eval_n == "insufficient sample size" | eval_df == "insufficient df" | eval_cv == paste("cv >", params$cv_upper_ine)      ~ "non-reliable",
                      eval_n == "sufficient sample size" & eval_df == "sufficient df" & eval_cv == paste("cv <=", params$cv_lower_ine)         ~ "reliable",
                      eval_n == "sufficient sample size" & eval_df == "sufficient df" & eval_cv ==  paste("cv between", params$cv_lower_ine, "and", params$cv_upper_ine) ~
                        "weakly reliable"
                    )
      )
    # proportion case
  } else {

    evaluacion <- table %>%
      dplyr::mutate(eval_n = dplyr::if_else(.data$n >= params$n, "sufficient sample size", "insufficient sample size"),
                    eval_df = dplyr::if_else(.data$df >= params$df, "sufficient df", "insufficient df"),
                    prop_est = dplyr::case_when(.data$stat <= 0.5                 ~ "<= 0.5",
                                                .data$stat < 1 & .data$stat > 0.5 ~ "> 0.5"),

                    quadratic = dplyr::if_else(.data$stat < 1, quadratic(.data$stat), NA_real_),

                    eval_se = dplyr::if_else(.data$se <= .data$quadratic, "admissible SE", "high SE"),

                    label = dplyr::case_when(
                      eval_n == "insufficient sample size" | eval_df == "insufficient df"                                                  ~ "non-reliable",
                      eval_n == "sufficient sample size" & eval_df == "sufficient df" & prop_est == "<= 0.5" & eval_se == "admissible SE"  ~ "reliable",
                      eval_n == "sufficient sample size" & eval_df == "sufficient df" & prop_est == "<= 0.5" & eval_se == "high SE"      ~ "weakly reliable",
                      eval_n == "sufficient sample size" & eval_df == "sufficient df" & prop_est == "> 0.5" & eval_se == "admissible SE"   ~ "reliable",
                      eval_n == "sufficient sample size" & eval_df == "sufficient df" & prop_est == "> 0.5" & eval_se == "high SE"       ~ "weakly reliable"
                      )
                    )

  }
  return(evaluacion)
}
#-------------------------------------------------
assess_cepal2020 <- function(table, params, class = "calidad.mean") {
  # General case
  if (sum(class %in% c("calidad.mean", "calidad.size", "calidad.total")) == 1 ) {

    evaluation <- table %>%
      dplyr::mutate(eval_n = dplyr::if_else(.data$n >= params$n, "sufficient sample size", "insufficient sample size"),
                    eval_ess = dplyr::if_else(.data$ess >= params$ess, "sufficient ess", "insufficient ess"),
                    eval_unweighted = dplyr::if_else(.data$unweighted >= params$unweighted , "sufficient cases", "insufficient cases"),
                    eval_df = dplyr::if_else(.data$df >= params$df, "sufficient df", "insufficient df"),
                    eval_cv = dplyr::if_else(.data$cv < params$cv_cepal, "adequate cv", "non adequate cv")) %>%
      dplyr::mutate(label = dplyr::case_when(
        eval_n == "insufficient sample size" | eval_ess == "insufficient ess" | eval_unweighted == "insufficient cases" ~ "supress",
        eval_df == "insufficient df"  ~ "review",
        eval_cv ==  "adequate cv"  ~ "publish"


      ))
    # Proportion case
  } else {

    evaluation <- table %>%
      dplyr::mutate(eval_n = dplyr::if_else(.data$n >= params$n, "sufficient sample size", "insufficient sample size"),
                    eval_ess = dplyr::if_else(.data$ess >= params$ess, "sufficient ess", "insufficient ess"),
                    eval_unweighted = dplyr::if_else(.data$unweighted >= params$unweighted , "sufficient cases", "insufficient cases"),
                    eval_df = dplyr::if_else(.data$df >= params$df, "sufficient df", "insufficient df"),
                    eval_log_cv = dplyr::if_else(.data$log_cv <= params$log_cv, "adequate log cv", "non adequate log cv"),
                    eval_cv = dplyr::if_else(.data$cv < params$cv_cepal, "adequate cv", "non adequate cv")) %>%
      dplyr::mutate(label = dplyr::case_when(
        eval_n == "insufficient sample size" | eval_ess == "insufficient ess" |
          eval_unweighted == "insufficient cases" | eval_log_cv == "non adequate log cv"  ~ "supress",
        eval_df == "insufficient df" | eval_cv == "non adequate cv" ~ "review",
        eval_cv ==  "adequate cv"  ~ "publish"
      ))

  }

  # Add cepal 2020 class to the final object
  evaluation <- add_class(evaluation, "cepal2020.eval")

  return(evaluation)


}
#-------------------------------------------------
###################
# CEPAL 2023
utils::globalVariables(c("eval_deff", "eval_ess"))

assess_cepal2023 <- function(table, params, class = "calidad.mean", domain_info = FALSE, low_df_justified =FALSE, ratio_between_0_1 = TRUE) {

  evaluation <- table %>%
    dplyr::mutate(eval_deff = dplyr::case_when(.data$deff >= 1 ~ "Sufficient deff",
                                               .data$deff < 1 & domain_info & .data$n >= params$n ~ "Sufficient deff",
                                               TRUE ~ "non-reliable")) %>%
    dplyr::mutate(
      eval_ess = dplyr::if_else(eval_deff == "Sufficient deff" & .data$ess >= params$ess, "Sufficient ess", "non-reliable"),
      eval_df = dplyr::if_else(eval_ess == "Sufficient ess" & .data$df >= params$df, "Sufficient df",
                               dplyr::if_else(eval_ess == "Sufficient ess" & .data$df < params$df & domain_info & low_df_justified , "Sufficient df",
                                              "non-reliable"))
    )

  if ((sum(class %in% c("calidad.mean", "calidad.size", "calidad.total")) == 1) | (sum(class %in% 'calidad.prop') == 1 & (sum(table$stat>1)>0 | !ratio_between_0_1))) {

    if ((ratio_between_0_1) & sum(class %in% 'calidad.prop') == 1){
      warning('Oops! A ratio estimation greater than 1 was detected. The assessment will use cv.')
    }

    evaluation <- evaluation %>%
      dplyr::mutate(
        eval_cv = dplyr::case_when(
          .data$cv > params$cv_upper_cepal ~ paste("cv >", params$cv_upper_cepal),
          .data$cv > params$cv_lower_cepal & .data$cv <= params$cv_upper_cepal ~ paste("cv between", params$cv_lower_cepal, "and", params$cv_upper_cepal),
          .data$cv <= params$cv_lower_cepal ~ paste("cv <=", params$cv_lower_cepal))
        ) %>%

      dplyr::mutate(label = dplyr::case_when(
        eval_df == "Sufficient df" & eval_cv == paste("cv >", params$cv_upper_cepal) ~ "non-reliable",
        eval_df == "Sufficient df" & eval_cv == paste("cv between", params$cv_lower_cepal, "and", params$cv_upper_cepal) & .data$unweighted < params$CCNP_a ~ "non-reliable",
        eval_df == "Sufficient df" & eval_cv == paste("cv between", params$cv_lower_cepal, "and", params$cv_upper_cepal) & .data$unweighted >= params$CCNP_a ~ "weakly-reliable",
        eval_df == "Sufficient df" & eval_cv == paste("cv <=", params$cv_lower_cepal) & .data$unweighted >= params$CCNP_b ~ "reliable",
        eval_df == "Sufficient df" & eval_cv == paste("cv <=", params$cv_lower_cepal) & .data$unweighted < params$CCNP_b & .data$unweighted >= params$CCNP_a ~ "weakly-reliable",
        eval_df == "Sufficient df" & eval_cv == paste("cv <=", params$cv_lower_cepal) & .data$unweighted < params$CCNP_a ~ "non-reliable",
        TRUE ~ "non-reliable"
      ))
    #proportion
  } else {
    if (!"log_cv" %in% colnames(table)) {
      stop("log_cv must be used!")
    }
    evaluation <- evaluation %>%
      dplyr::mutate(eval_log_cv = dplyr::case_when(
        .data$log_cv <= params$cvlog_max ~ paste("log_cv <=", params$cvlog_max),
        .data$log_cv > params$cvlog_max ~ paste("log_cv >", params$cvlog_max)

        )) %>%

      dplyr::mutate(label = dplyr::case_when(
        eval_df == "Sufficient df" & .data$stat < 0.5 & eval_log_cv == paste("log_cv <=", params$cvlog_max) & .data$unweighted >= params$CCNP_b ~ "reliable",
        eval_df == "Sufficient df" & .data$stat < 0.5 &  eval_log_cv == paste("log_cv <=", params$cvlog_max) & .data$unweighted < params$CCNP_b & .data$unweighted >= params$CCNP_a ~ "weakly-reliable",
        eval_df == "Sufficient df" & .data$stat < 0.5 &  eval_log_cv == paste("log_cv <=", params$cvlog_max) & .data$unweighted < params$CCNP_a ~ "non-reliable",
        eval_df == "Sufficient df" & .data$stat >= 0.5 & eval_log_cv == paste("log_cv <=", params$cvlog_max) & .data$unweighted >= params$CCNP_b ~ "reliable",
        eval_df == "Sufficient df" & .data$stat >= 0.5 & eval_log_cv == paste("log_cv <=", params$cvlog_max) & .data$unweighted < params$CCNP_b & .data$unweighted >= params$CCNP_a ~ "weakly-reliable",
        eval_df == "Sufficient df" & .data$stat >= 0.5 & eval_log_cv == paste("log_cv <=", params$cvlog_max) & .data$unweighted < params$CCNP_a ~ "non-reliable",
        eval_df == "Sufficient df" & .data$stat >= 0.5 & eval_log_cv == paste("log_cv >", params$cvlog_max) & .data$unweighted >= params$CCNP_a ~ "weakly-reliable",
        eval_df == "Sufficient df" & .data$stat >= 0.5 & eval_log_cv == paste("log_cv >", params$cvlog_max) & .data$unweighted < params$CCNP_a ~ "non-reliable",
        TRUE ~ "non-reliable"
      ))
  }
  evaluation <- add_class(evaluation, "cepal2023.eval")
  return(evaluation)
}

#-------------------------------------------------

## economicas standard
assess_economicas <- function(table, params, class = "calidad.mean", domain_info = FALSE, ratio_between_0_1 = TRUE) {

  if('n_obj' %in% names(table)){
    if(table %>% dplyr::pull(.data$n_obj) %>% is.numeric()){
      table <- table %>%
        dplyr::mutate(compliance_rate = .data$n/.data$n_obj)

    }else{
      table <- table %>%
        dplyr::mutate(compliance_rate = NA)
    }

  }else{
    table <- table %>%
      dplyr::mutate(compliance_rate = NA)
  }

  # General case
  if ((sum(class %in% c("calidad.mean", "calidad.size", "calidad.total")) == 1 )| (sum(class %in% 'calidad.prop') == 1 & (sum(table$stat>1)>0 | !ratio_between_0_1))) {

    if ((ratio_between_0_1) & sum(class %in% 'calidad.prop') == 1){
      warning('Oops! A ratio estimation greater than 1 was detected. The assessment will use cv.')
    }


    evaluation <- table %>%
      dplyr::mutate(eval_n = dplyr::if_else(.data$n >= params$n, "sufficient sample size", "insufficient sample size"),
                    eval_compliance_rate = dplyr::if_else(is.na(.data$compliance_rate),'insufficient rate',
                                                            dplyr::if_else(.data$compliance_rate>=1, 'sufficient rate', 'insufficient rate')),

                    eval_df = dplyr::if_else(.data$df >= params$df, "sufficient df", "insufficient df"),
                    eval_cv = dplyr::case_when(
                      .data$cv <= params$cv_lower_econ  & .data$cv > 0                   ~ paste("cv <=", params$cv_lower_econ) ,
                      .data$cv > params$cv_lower_econ & .data$cv <= params$cv_upper_econ ~ paste("cv between", params$cv_lower_econ, "and", params$cv_upper_econ),
                      .data$cv > params$cv_upper_econ                                    ~ paste("cv >", params$cv_upper_econ)
                      )
                    ) %>%

      dplyr::mutate(label = dplyr::case_when(
        eval_n == "insufficient sample size" & domain_info == FALSE ~ "non-reliable",
        eval_n == "insufficient sample size" & domain_info == TRUE & eval_compliance_rate == 'insufficient rate' ~ "non-reliable",
        eval_n == "sufficient sample size" & eval_df == "sufficient df" & eval_cv == paste("cv <=", params$cv_lower_econ)~ "reliable",
        eval_n == "insufficient sample size" & domain_info == TRUE & eval_compliance_rate == 'sufficient rate' &
          eval_df == "sufficient df" & eval_cv == paste("cv <=", params$cv_lower_econ)~ "reliable",
        eval_n == "sufficient sample size" & eval_df == "sufficient df" & eval_cv ==  paste("cv between", params$cv_lower_econ, "and", params$cv_upper_econ) ~ "weakly reliable",
        eval_n == "insufficient sample size" & domain_info == TRUE & eval_compliance_rate == 'sufficient rate' &
          eval_df == "sufficient df" & eval_cv ==  paste("cv between", params$cv_lower_econ, "and", params$cv_upper_econ) ~ "weakly reliable",

        TRUE ~ "non-reliable"
      ))



    # Proportion case
  } else {

    evaluation <- table %>%
      dplyr::mutate(eval_n = dplyr::if_else(.data$n >= params$n, "sufficient sample size", "insufficient sample size"),
                    eval_compliance_rate = dplyr::if_else(is.na(.data$compliance_rate),'insufficient rate',
                                                            dplyr::if_else(.data$compliance_rate>=1, 'sufficient rate', 'insufficient rate')),
                    eval_df = dplyr::if_else(.data$df >= params$df, "sufficient df", "insufficient df"),
                    eval_se = dplyr::case_when(
                      .data$stat< 0.5 & .data$se<=quadratic(.data$stat) ~ "admissible SE",
                      .data$stat>=0.5 & .data$se<=quadratic(.data$stat) ~ "admissible SE",
                      TRUE ~ "high SE"
                    )
      ) %>%

      dplyr::mutate(label = dplyr::case_when(
        eval_n == "insufficient sample size" & domain_info == FALSE ~ "non-reliable",
        eval_n == "insufficient sample size"  & domain_info == TRUE & eval_compliance_rate == 'insufficient rate' ~ "non-reliable",
        eval_n == "sufficient sample size" & eval_df == "sufficient df" & eval_se == "admissible SE" ~ "reliable",
        eval_n == "insufficient sample size"  & domain_info == TRUE & eval_compliance_rate == 'sufficient rate'&
          eval_df == "sufficient df" & eval_se == "admissible SE" ~ "reliable",
        eval_n == "sufficient sample size" & eval_df == "sufficient df" & eval_se == "high SE" ~ "weakly reliable",
        eval_n == "insufficient sample size"  & domain_info == TRUE & eval_compliance_rate == 'sufficient rate'&
          eval_df == "sufficient df" & eval_se == "high SE" ~ "weakly reliable",

        TRUE ~ "weakly reliable"
      ))

  }

  evaluation <- add_class(evaluation, "economicas.eval")


  return(evaluation)
}



#-------------------------------------------------
publish_table <- function(evaluation) {
  evaluation <- evaluation %>%
    dplyr::ungroup() %>%
    dplyr::filter(!is.na(.data$n) & !is.na(.data$df) & !is.na(.data$cv)) %>%
    dplyr::mutate(pasa = sum(dplyr::if_else(.data$label == "reliable", 1, 0)) / nrow(.) * 100,
                  pasa = round(.data$pasa, 2),
                  publication = dplyr::if_else(.data$pasa >= 50, "publish", "do not publish"),
                  pass = paste0(.data$pasa, "% reliable estimates")) %>%
    dplyr::select(-"pasa")
  return(evaluation)
}
#-------------------------------------------------
combine_params <- function(default_params, user_params) {
  # Combine defaults params with user inputs
  default_params <- default_params[!names(default_params) %in% names(user_params)]
  final_params <- c(default_params, user_params)
  return(final_params)
}
Any scripts or data that you put into this service are public.
calidad documentation built on June 8, 2025, 12:36 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
calidad
Assesses the Quality of Estimates Made by Complex Sample Designs

R/evaluate_internal.R
In calidad: Assesses the Quality of Estimates Made by Complex Sample Designs

Defines functions publish_table assess_economicas assess_cepal2023 assess_cepal2020 assess_ine quadratic merge_columns check_type_cols check_n_obj_var check_cepal_inputs

Documented in quadratic

Try the calidad package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

calidad Assesses the Quality of Estimates Made by Complex Sample Designs

R/evaluate_internal.R In calidad: Assesses the Quality of Estimates Made by Complex Sample Designs

Defines functions publish_table assess_economicas assess_cepal2023 assess_cepal2020 assess_ine quadratic merge_columns check_type_cols check_n_obj_var check_cepal_inputs

Documented in quadratic

Try the calidad package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

calidad
Assesses the Quality of Estimates Made by Complex Sample Designs

R/evaluate_internal.R
In calidad: Assesses the Quality of Estimates Made by Complex Sample Designs