R/classify.R

Defines functions classify_indicators

Documented in classify_indicators

################################################################################
#
#' Perform classification analysis of WASH indicators by varying stratification
#' using lot quality assurance sampling (LQAS)
#' 
#' @param indicators A data frame containing indicators data
#' @param vars A character vector of variables names to classify
#' @param upper Upper standard for lot quality assurance sampling. Default is 0.80
#' @param lower Lower standard for lot quality assurance sampling. Default is 0.50
#' @param strata Level of stratification to which classification results are to
#'     be made. Possible values are \code{"slum"}, \code{"wealth"}, \code{"overall"}
#' @return A data frame containing classification results per indicator per
#'     stratification
#' @examples
#' #
#' classify_indicators(indicators = washdata::indicatorsDataBGD,
#'                     vars = c("jmpWater1", "jmpWater2", "jmpWater3", "jmpWater4"),
#'                     strata = "slum")
#' 
#' @export 
#' 
#
################################################################################

classify_indicators <- function(indicators,
                                vars,
                                upper = 0.8, 
                                lower = 0.5,
                                strata) {
  ## Check strata - "slum"
  if(strata == "slum") {
    total <- aggregate(. ~ country + ccode + month + year + zone + type,
                       data = indicators[ , c("country", "ccode", "month", 
                                              "year", "zone", "type", vars)],
                       FUN = length)
    ##
    cases <- aggregate(. ~ zone + type,
                       data = indicators[ , c("zone", "type", vars)],
                       FUN = sum)
    ##
    results <- aggregate(. ~ zone + type,
                         data = indicators[ , c("zone", "type", vars)],
                         FUN = applyLQAS)
    ##
    resultsDF <- data.frame(tidyr::gather(data = total[total$type == 1, ], 
                                   key = indicator.code, 
                                   value = slum.total, vars),
                            "slum.cases" = tidyr::gather(data = cases[cases$type == 1, ], 
                                                         key = indicator.code, 
                                                         value = slum.cases, vars)[ , "slum.cases"],
                            "slum.class" = tidyr::gather(data = results[results$type == 1, ], 
                                                         key = indicator.code, 
                                                         value = slum.class, vars)[ , "slum.class"],
                            "other.total" = tidyr::gather(data = total[total$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.total, vars)[ , "other.total"],
                            "other.cases" = tidyr::gather(data = cases[cases$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.cases, vars)[ , "other.cases"],
                            "other.class" = tidyr::gather(data = results[results$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.class, vars)[ , "other.class"])    
    ##                        
    resultsDF <- data.frame(resultsDF[ , c("country", "ccode", "month", "year", "zone")],
                            "indicator.names" = unlist(lapply(X = resultsDF$indicator.code, 
                                                              FUN = get_var_names, 
                                                              steer = steerIndicators)),
                            resultsDF[ , c("indicator.code", 
                                           "slum.total", "slum.cases", "slum.class", 
                                           "other.total", "other.cases", "other.class")])
  }
  ## Check strata - "wealth"
  if(strata == "wealth") {
    total <- aggregate(. ~ country + ccode + month + year + pQuintile + type,
                       data = indicators[ , c("country", "ccode", "month", 
                                              "year", "pQuintile", "type", vars)],
                       FUN = length)
    ##
    cases <- aggregate(. ~ pQuintile + type,
                       data = indicators[ , c("pQuintile", "type", vars)],
                       FUN = sum)
    ##
    results <- aggregate(. ~ pQuintile + type,
                         data = indicators[ , c("pQuintile", "type", vars)],
                         FUN = applyLQAS)
    ##
    resultsDF <- data.frame(tidyr::gather(data = total[total$type == 1, ], 
                                          key = indicator.code, 
                                          value = slum.total, vars),
                            "slum.cases" = tidyr::gather(data = cases[cases$type == 1, ], 
                                                         key = indicator.code, 
                                                         value = slum.cases, vars)[ , "slum.cases"],
                            "slum.class" = tidyr::gather(data = results[results$type == 1, ], 
                                                         key = indicator.code, 
                                                         value = slum.class, vars)[ , "slum.class"],
                            "other.total" = tidyr::gather(data = total[total$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.total, vars)[ , "other.total"],
                            "other.cases" = tidyr::gather(data = cases[cases$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.cases, vars)[ , "other.cases"],
                            "other.class" = tidyr::gather(data = results[results$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.class, vars)[ , "other.class"])    
    ##
    resultsDF <- data.frame(resultsDF[ , c("country", "ccode", "month", "year", "pQuintile")],
                            "indicator.names" = unlist(lapply(X = resultsDF$indicator.code, 
                                                              FUN = get_var_names, 
                                                              steer = steerIndicators)),
                            resultsDF[ , c("indicator.code", 
                                           "slum.total", "slum.cases", "slum.class", 
                                           "other.total", "other.cases", "other.class")])
  }
  ## Check strata - "overall"
  if(strata == "overall") {
    ##
    total <- aggregate(. ~ country + ccode + month + year + type + zone,
                       data = indicators[ , c("country", "ccode", "month", 
                                              "year", "type", "zone", vars)],
                       FUN = length)
    ##
    cases <- aggregate(. ~ type + zone,
                       data = indicators[ , c("type", "zone", vars)],
                       FUN = sum)    
    ##
    results <- aggregate(. ~ type + zone,
                         data = indicators[ , c("type", "zone", vars)],
                         FUN = applyLQAS)
    ##
    resultsDF <- data.frame(tidyr::gather(data = total[total$type == 1, ], 
                                          key = indicator.code, 
                                          value = slum.total, vars),
                            "slum.cases" = tidyr::gather(data = cases[cases$type == 1, ], 
                                                         key = indicator.code, 
                                                         value = slum.cases, vars)[ , "slum.cases"],
                            "slum.class" = tidyr::gather(data = results[results$type == 1, ], 
                                                         key = indicator.code, 
                                                         value = slum.class, vars)[ , "slum.class"],
                            "other.total" = tidyr::gather(data = total[total$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.total, vars)[ , "other.total"],
                            "other.cases" = tidyr::gather(data = cases[cases$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.cases, vars)[ , "other.cases"],
                            "other.class" = tidyr::gather(data = results[results$type == 2, ], 
                                                          key = indicator.code, 
                                                          value = other.class, vars)[ , "other.class"])    
    ##
    resultsDF <- data.frame(resultsDF[ , c("country", "ccode", "month", "year", "zone")],
                            "indicator.names" = unlist(lapply(X = resultsDF$indicator.code, 
                                                              FUN = get_var_names, 
                                                              steer = steerIndicators)),
                            resultsDF[ , c("indicator.code", 
                                           "slum.total", "slum.cases", "slum.class", 
                                           "other.total", "other.cases", "other.class")])
  }
  ## Return output
  return(resultsDF)
}
validmeasures/wsup documentation built on Dec. 16, 2019, 4:50 a.m.