R/water.R

Defines functions get_water_vars

Documented in get_water_vars

################################################################################
#
#' Calculate indicators: Water
#' 
#' @param surveyData A data frame containing survey data with information for
#'     caluclating water indicators.
#' @return A data frame containing calculated water indicators
#' @examples
#' # Apply get_water_vars() to surveyDataBGD dataset
#' waterDF <- get_water_vars(washdata::surveyDataBGD)
#' 
#' @export
#' 
#
################################################################################

get_water_vars <- function(surveyData) {
  ## waterSource: source of drinking water
  waterSource <- ifelse(surveyData$water1 == "Other (please specify)", 
                        surveyData$water2, 
                        surveyData$water1)
  waterSource <- ifelse(waterSource == "Deep tube well ", 
                        "Deep tube well", 
                        waterSource)
  waterSource <- ifelse(waterSource == "Shallow tubwell", 
                        "Shallow tube well", 
                        waterSource)
  ## water1: improved source of drinking water
  water1 <- ifelse(waterSource %in% c("Bottled water", 
                                      "Deep tube well",
                                      "Piped water into dwelling", 
                                      "Piped water to yard/plot",
                                      "Public tap/standpipe/kiosk", 
                                      "Water lifted by motor",
                                      "Cart with small tank/drum or tanker-truck",
                                      "Rainwater collection"), 1, 0)
  ## water2: formal/informal source of drinking water for Pareto chart
  water2 <- ifelse(surveyData$water3 == "", "Don't know", surveyData$water3)
  ## water2a: formal/informal source of drinking water for estimation
  water2a <- ifelse(surveyData$water3 == "", NA,
               ifelse(surveyData$water3 == "Formal", 1, 0))
  ## water3: source of water a WSUP-supported facility
  water3 <- ifelse(surveyData$water5 == "", "Don't know", surveyData$water5)
  ## water3a: source of water a WSUP-supported facility estimation
  water3a <- ifelse(surveyData$water5 == "", NA,
               ifelse(surveyData$water5 == "Yes", 1, 0))
  ## water4: mean number of hours per day water is available
  water4 <- surveyData$water7
  ## water4a: water available at least 12 hours (half a day)
  water4a <- ifelse(surveyData$water7 >= 12, 1, 0)
  ## water4b: water available for full day
  water4b <- ifelse(is.na(surveyData$water7), 0, 
               ifelse(surveyData$water7 == 24, 1, 0))
  ## water5: mean number of days per week water available
  water5 <- ifelse(surveyData$water9 == "", NA,
              ifelse(surveyData$water9 == "7 day per week", 7,
                ifelse(surveyData$water9 == "6 day per week", 6,
                  ifelse(surveyData$water9 == "5 day per week", 5,
                    ifelse(surveyData$water9 == "4 day per week", 4,
                      ifelse(surveyData$water9 == "3 day per week", 3,
                        ifelse(surveyData$water9 == "2 day per week", 2,
                          ifelse(surveyData$water9 == "1 day per week", 1, 0))))))))
  ## water5a: water available more than 3 days a week
  water5a <- ifelse(water5 > 3, 1, 0)
  ## water5b: water available the whole week
  water5b <- ifelse(is.na(water5), 0, 
               ifelse(water5 == 7, 1, 0))
  ## water6: water available for the whole year
  water6 <- ifelse(surveyData$water11 == "", "Don't know",
              ifelse(surveyData$water11 == "Yes", "Yes", "No"))
  ## water6a: water available for the whole year (estimation)
  water6a <- ifelse(surveyData$water11 == "", 0,
               ifelse(surveyData$water11 == "Yes", 1, 0))
  ## water7: mean number of minutes to collect water
  water7 <- ifelse(is.na(surveyData$water14), 0, surveyData$water14)
  ## water7a: distance to water source is within 30 minutes or less
  water7a <- ifelse(water7 <= 30, 1, 0)
  ## water7b: satisfied with distance between home and water source
  water7b <- ifelse(surveyData$water16 == "", 1,
               ifelse(surveyData$water16 == "Yes", 1, 0))
  ## water7c: satisfied with distance between home and water source (estimate)
  water7c <- ifelse(water7b == 2, NA, water7b)
  ## water8: satisfied with queuing time
  water8 <- vector(mode = "numeric", length = nrow(surveyData))
  water8[surveyData$water18 == "No"] <- 0
  water8[surveyData$water18 == "Yes"] <- 1
  water8[surveyData$water18 == "" & surveyData$water19 == TRUE] <- 2
  water8[surveyData$water18 == "" & surveyData$water20 == TRUE] <- 3
  water8[is.na(surveyData$water18) & 
         is.na(surveyData$water19) & 
         is.na(surveyData$water20)] <- 4
  ## water8a: satisfied with queuing time
  water8a <- ifelse(water8 %in% 2:4, NA, water8)
  ## water9: person who usually collects water
  ## waterCollect: person who usually collects water
  waterCollect <- ifelse(surveyData$water21 == "Other (please specify)", 
                         surveyData$water22, 
                         surveyData$water21)
  waterCollect <- ifelse(waterCollect == "", 
                         NA, 
                         waterCollect)
  waterCollect <- ifelse(waterCollect == "Source inside house", 
                         NA, 
                         waterCollect)
  waterCollect <- stringr::str_split(string = waterCollect, pattern = ", ", simplify = TRUE)
  ## Create temporary container for vector outputs
  temp <- NULL
  ## Cycle through columns of waterCollect
  for(i in 1:ncol(waterCollect)) {
    ## Concatenate waterCollect columns into a single vector
    temp <- c(temp, waterCollect[ , i])
  }
  ## Re-assign NA value to "" answers 
  temp <- ifelse(temp == "", NA, temp)
  ## Get responses
  tempNames <- names(table(temp))
  ## Create empty vector container
  water9 <- NULL
  ## Cycle through responses
  for(i in 1:length(tempNames)) {
    ## Create empty vector with length of surveyData
    assign(paste("water9", letters[i], sep = ""), 
           vector(mode = "numeric", length = nrow(surveyData)))
    ## Cycle through data columns in waterCollect
    for(j in 1:ncol(waterCollect)) {
      ## Assign values
      x <- ifelse(waterCollect[ , j] == tempNames[i], i, 0)
      ## Concatenate responses
      assign(paste("water9", letters[i], sep = ""), 
             get(paste("water9", letters[i], sep = "")) + x)
    }
    ## Rename responses to character strings
    assign(paste("water9", letters[i], sep = ""), 
           ifelse(get(paste("water9", letters[i], sep = "")) == i, tempNames[i], NA))
    ##
    water9 <- data.frame(cbind(water9, 
                               get(paste("water9", letters[i], sep = ""))))
  }
  ## Rename the data.frame
  names(water9) <- paste("water9", letters[1:length(tempNames)], sep = "")
  ## water10: amount of water (litres) used by household per day
  water10 <- ifelse(surveyData$water23 == "", "Don't know", surveyData$water23)
  ## water10a: amount of water sufficient
  water10a <- ifelse(surveyData$water25 == "", "Don't know", surveyData$water25)
  ## water10b: amount of water sufficient (estimate)
  water10b <- ifelse(water10a == "Don't know", NA,
                ifelse(water10a == "Yes", 1, 0))
  ## water11: water from other sources
  ## Re-code "Don't know"
  water11 <- ifelse(surveyData$water27 == "", "Don't know", surveyData$water27)
  ## Re-code NAs
  water11a <- ifelse(water11 == "Don't know", NA,
                ifelse(water11 == "Yes", 1, 0))
  ## Re-code factors
  water11b <- ifelse(surveyData$water29 == "", NA,
                ifelse(surveyData$water29 == "Other (please specify)", 
                  surveyData$water30, surveyData$water29))
  ## Re-code
  water11b <- ifelse(water11b == "", NA, water11b)
  water11b[water11b == "Mosque"] <- "Mosque, school, etc."
  water11b[water11b %in% c("Neighbour's house", 
                           "Pump point of water")] <- "Other's water point"
  water11b[water11b == "Deep tube well"] <- "Tube well"
  water11b[water11b == "Cann't arranged"] <- "Can't arrange"
  ## Re-code
  water11c <- ifelse(water11b %in% c("Bottled water", 
                                     "Piped water into dwelling",
                                     "Piped water to yard/plot", 
                                     "Protected well or spring in yard",
                                     "Public tap/standpipe/kiosk", 
                                     "Tube well", 
                                     "Water lifted by motor"), 1,
                ifelse(is.na(water11b), 0, 0))
  ## Re-code
  water11d <- ifelse(water1 == 1 & water7a == 1 & 
                       water4b == 1 & water5b == 1 & water6a == 1, 1,
                ifelse(water1 == 1 & water7a == 1 & 
                       ((water4b == 0 | water5b == 0 | water6a == 0) & 
                       water11c == 1), 1, 0))
  ## water12: spend on water
  ## mean spend on water
  water12 <- ifelse(surveyData$water32 == "", NA, surveyData$water32)
  ## satisfied with price of water
  water12a <- ifelse(surveyData$water35 == "", "Don't know", surveyData$water35)
  ## satisfied with price of water - estimate
  water12b <- ifelse(water12a == "Don't know", NA,
                ifelse(water12a == "Yes", 1, 0))
  ## water13: investment in improving water source
  ## Recode
  water13 <- ifelse(surveyData$water37 == "", "Don't know", surveyData$water37)
  ## Recode
  water13a <- ifelse(water13 == "Don't know", NA,
                ifelse(water13 == "Yes", 1, 0))
  ## water14: water quality
  ## Recode
  water14 <- ifelse(surveyData$water40 == "", "Don't know", surveyData$water40)
  ## Recode
  water14a <- ifelse(water14 == "Don't know", NA,
                ifelse(water14 == "Yes", 1, 0))
  ## water15: water quality improvement
  ## waterImprovement: water quality improvement
  waterImprovement <- ifelse(surveyData$water42 == "Other improvement (please specify)", 
                             surveyData$water43, 
                             surveyData$water42)
  waterImprovement <- ifelse(waterImprovement == "", 
                             "Don't know/no answer/not applicable", 
                             waterImprovement)
  waterImprovement <- str_split(string = waterImprovement, pattern = ", ", simplify = TRUE)
  ## Create temporary container for vector outputs
  temp <- NULL
  ## Cycle through columns of waterImprovement
  for(i in 1:ncol(waterImprovement)) {
    waterImprovement[ , i] <- ifelse(waterImprovement[ , i] == "Other improvement (please specify)", 
                                     surveyData$water43, 
                                     waterImprovement[ , i])
  }
  ## Cycle through columns of waterImprovement
  for(i in 1:ncol(waterImprovement)) {
    ## Concatenate waterImprovement columns into a single vector
    temp <- c(temp, waterImprovement[ , i])
  }
  ## Re-assign NA value to "" answers 
  temp <- ifelse(temp == "", NA, temp)
  ## Get responses
  tempNames <- names(table(temp))
  ## Create vector accumulator
  water15 <- NULL
  ## Cycle through responses
  for(i in 1:length(tempNames))
  {
    ## Create empty vector with length of surveyData
    assign(paste("water15", letters[i], sep = ""), 
           vector(mode = "numeric", length = nrow(surveyData)))
    ## Cycle through data columns in waterCollect
    for(j in 1:ncol(waterImprovement))
    {
      ## Assign values
      x <- ifelse(waterImprovement[ , j] == tempNames[i], i, 0)
      ## Concatenate responses
      assign(paste("water15", letters[i], sep = ""), 
             get(paste("water15", letters[i], sep = "")) + x)
    }
    ## Rename responses to character strings
    assign(paste("water15", letters[i], sep = ""), 
           ifelse(get(paste("water15", letters[i], sep = "")) == i, tempNames[i], NA))
    ## Create water quality data.frame
    water15 <- data.frame(cbind(water15, get(paste("water15", letters[i], sep = ""))))
  }
  ## Rename the data.frame
  names(water15) <- paste("water15", letters[1:length(tempNames)], sep = "")
  ## water16: water pressure
  ## Re-code
  water16 <- ifelse(surveyData$water45 == "", "Don't know", surveyData$water45)
  ## Re-code
  water16a <- ifelse(water16 == "Don't know", NA,
                ifelse(water16 == "Yes", 1, 0))
  ## water17: support provider
  ## Re-code
  water17 <- ifelse(surveyData$water47 == "", "Don't know/not applicable",
               ifelse(surveyData$water47 == "Other (please specify)", 
                 surveyData$water48, surveyData$water47))
  ## Re-code
  water17 <- ifelse(water17 == "", "Don't know/not applicable", water17)
  ## Re-code: these lines of code [473-495] are Dhaka, Bangladesh 2017 specific. 
  water17 <- ifelse(water17 == "Landlord/House owner ", "Landlord/House owner",
               ifelse(water17 == "Caretaker ", "Caretaker",
                 ifelse(water17 %in% c("Gaurd", "Gaurd "), "Guard",
                   ifelse(water17 %in% c("No complain arise", 
                                         "No complain  arise", 
                                         "Till lemdon't face any problem"), "No complaints",
                     ifelse(water17 == "Water office ", "Water office",
                       ifelse(water17 == "Do no't cpmlain", "Do not complain",
                         ifelse(water17 == "Parlament Member (MP)", "Parliament Member (MP)",
                           ifelse(water17 == "Relavent office", "Relevant office",
                             ifelse(water17 == "Messengaer ", "Messenger", 
                               ifelse(water17 == "Local Gverenment Engineering office", "Local Government Engineering office", 
                                 water17))))))))))
  ## Re-code: these lines of code [499-506] are Dhaka, Bangladesh 2017 specific
  water17 <- ifelse(water17 %in% c("Facility operator", 
                                   "Local Government Engineering office",
                                   "Power and Water Development office", 
                                   "Relevant office",
                                   "Water office", 
                                   "Water supplier", 
                                   "Water utility",
                                   "Women Affair Directory"), 1, 0)
  
  
  ################################################################################
  #
  # water18: water storage
  #
  ################################################################################
  ## waterStorage
  waterStorage <- ifelse(as.character(surveyData$wash60) == "", 
                         "Don't know/not applicable", 
                         as.character(surveyData$wash60))
  waterStorage <- stringr::str_split(string = waterStorage, pattern = ", ", simplify = TRUE)
  ## water18a: Clean container (with lid)
  water18a <- vector(mode = "numeric", length = nrow(surveyData))
  water18a[waterStorage[,1] == "Clean container (with lid)" |
           waterStorage[,2] == "Clean container (with lid)" |
           waterStorage[,3] == "Clean container (with lid)"] <- 1
  ## water18b: Clean container (without lid)
  water18b <- vector(mode = "numeric", length = nrow(surveyData))
  water18b[waterStorage[,1] == "Clean container (without lid)" |
           waterStorage[,2] == "Clean container (without lid)" |
           waterStorage[,3] == "Clean container (without lid)"] <- 2
  ## water18c: Dirty container
  water18c <- vector(mode = "numeric", length = nrow(surveyData))
  water18c[waterStorage[,1] == "Dirty container" |
           waterStorage[,2] == "Dirty container" |
           waterStorage[,3] == "Dirty container"] <- 3
  ## water18d: Don't know/not applicable
  water18d <- vector(mode = "numeric", length = nrow(surveyData))
  water18d[waterStorage[,1] == "Don't know/not applicable" |
           waterStorage[,2] == "Don't know/not applicable" |
           waterStorage[,3] == "Don't know/not applicable"] <- 4
  ## water18
  water18 <- ifelse(water18d == 4, NA,
               ifelse(water18a == 1 & water18b != 2 & water18c != 3, 1, 0))
  ## waterQuality: this indicator was not collected in survey. For future
  #               surveys, if water quality tests are performed, this indicator
  #               will be calculated based on that data.
  # Check whether waterQualityDF has been loaded
  if(exists("waterQualityDF")) {
    ## Add logic here to process water quality data with waterQuality variable as
    ## the result to be used for calculating indicators
  } else
    ## Assign NULL value to waterQuality variable if waterQualityDF is not available
    waterQuality <- NULL
  ## JMP indicators for drinking water - post-2015
  ## Surface water: river, dam, lake, pond, stream, canal or irrigation channel
  jmpWater1 <- ifelse(waterSource == "Surface water", 1, 0)
  ## Unimproved: unprotected dug wells, unprotected springs
  jmpWater2 <- vector(mode = "numeric", length = nrow(surveyData))
  jmpWater2[water1 != 1 & waterSource != "Surface water"] <- 1
  ## Limited: Improved but more than 30 minutes collection time
  jmpWater3 <- vector(mode = "numeric", length = nrow(surveyData))
  jmpWater3[water1 == 1 & water7a == 0] <- 1
  ## Determine if water quality data is available
  if(is.null(waterQuality)) {
    ## At least basic: improved and no more than 30 minutes collection time
    jmpWater4 <- vector(mode = "numeric", length = nrow(surveyData))
    jmpWater4[water1 == 1 & water7a == 1] <- 1
    ## no safely managed: create jmpWater5 empty vector
    jmpWater5 <- vector(mode = "numeric", length = nrow(surveyData))
    jmpWater5[jmpWater5 == 0] <- NA
  } else {
    ## Basic: Improved and no more than 30 minutes collection time
    jmpWater4 <- vector(mode = "numeric", length = nrow(surveyData))
    ##
    jmpWater4[water1 == 1 & 
                !waterSource %in% c("Piped water into dwelling",
                                    "Piped water to yard/plot",
                                    "Protected dug well or spring in yard") & 
                water7a == 1] <- 1
    ##
    jmpWater4[water1 == 1 & 
                waterSource %in% c("Piped water into dwelling",
                                   "Piped water to yard/plot",
                                   "Protected dug well or spring in yard") &
                (water4a == 0 | water5b == 0 | water6a == 0) & 
                water7a == 1] <- 1
    ## Self-managed: improved and no more than 30 minutes collection time and available 
    #               when needed and free from priority contamination 
    jmpWater5 <- vector(mode = "numeric", length = nrow(surveyData))
    ##
    jmpWater5[water1 == 1 & 
                waterSource %in% c("Piped water into dwelling", 
                                   "Piped water to yard/plot", 
                                   "Protected dug well or spring in yard") & 
                water4a == 1 & 
                water5b == 1 & 
                water6a == 1 & 
                waterQuality == 1] <- 1
  }
  ## accessWater: Access to sufficient and sustained drinking water
  ## Re-code
  accessWater <- ifelse(water10 %in% c("61-100 litres per day", 
                                       "101-150 litres per day",
                                       "150-200 litres per day", 
                                       "201-300 literes per day",
                                       ">301 litres per day") &
                          water4b == 1 & water5b == 1 & water6a == 1, 1, 0)
  ## Create waterDF
  ## Concatenate water indicators
  waterDF <- data.frame("uniqueID" = surveyData[ , "uniqueID"],
                        waterSource, water1, water2, water2a, water3, water3a,
                        water4, water4a, water4b, water5, water5a, water5b,
                        water6, water6a, water7, water7a, water7b, water7c, water8, water8a,
                        water9, water10, water10a, water10b,
                        water11, water11a, water11b, water11c, water11d,
                        water12, water12a, water12b,
                        water13, water13a,
                        water14, water14a,
                        water16, water16a,
                        water17, water18,
                        jmpWater1, jmpWater2, jmpWater3, jmpWater4, jmpWater5,
                        accessWater)
  ## Return results
  return(waterDF)
}
validmeasures/wsup documentation built on Dec. 16, 2019, 4:50 a.m.