R/js_prep.R

Defines functions get_offices js_cond_table_prep js_num_selected js_results js_vote_time

library(tidyverse)
library(fastDummies)
library(stringr)
library(lubridate)
library(data.table)

wards_divisions <- data.frame(wards = 1:66,
                              divisions = c(21, 29, 22, 21, 37,
                                            18, 23, 35, 17, 29,
                                            20, 24, 25, 11, 19,
                                            18, 29, 19, 19, 11,
                                            45, 29, 23, 19, 24,
                                            23, 23, 18, 18, 17,
                                            19, 31, 24, 42, 32,
                                            41, 21, 21, 46, 51,
                                            26, 25, 25, 19, 25,
                                            23, 14, 23, 22, 30,
                                            28, 28, 23, 22, 29,
                                            41, 28, 44, 25, 23,
                                            28, 26, 25, 18, 23,
                                            46))
wards_divisions$wards[nchar(wards_divisions$wards) == 1] <- paste0("0", wards_divisions$wards[nchar(wards_divisions$wards) == 1])
locations <- c()
for (i in 1:nrow(wards_divisions)) {
  temp <- paste0(wards_divisions$wards[i], "-", 1:wards_divisions$divisions[i])
  temp[nchar(temp) == 4] <- gsub("-", "-0", temp[nchar(temp) == 4])
  locations <- c(locations, temp)
}

#  locations[!locations %in% unique(election_2019_general_bir$location)]
#
setwd("C:/Users/user/Dropbox/R_project/phillyvotes/data/clean_data/")
files <- list.files(pattern = ".rda")
BIR_files <- files[grep("bir.rda", files)]
CAR_files <- files[grep("car.rda", files)]
BIR_files_reg <- BIR_files[!grepl("special", BIR_files)]

for (file in BIR_files[19]) {
  setwd("C:/Users/user/Dropbox/R_project/phillyvotes/data/clean_data/")
  file_name <- gsub(".rda", "", file)
  election <- gsub("_...$", "", file_name)
  load(file)
  assign("data", get(file_name))
  do.call("rm", list(as.name(file_name)))
  print(file_name)

  message("\n")
  #print(sort(unique(data$candidate)))
  #print(grep("court order|write", unique(data$candidate)))

  #cands <- sort(unique(data$candidate))
  # print(cands)
  # print(cands[nchar(cands) == min(nchar(cands))])
  # print(cands[nchar(cands) == max(nchar(cands))])
  message("\n\n\n\n\n")
  #
  #
  js_results(data, election)
  message("Results Done!!!!")
  js_num_selected(data, election)
  message("Number Selected Done!!!!")
  js_cond_table_prep(data, election)
  message("Conditional Table Done!!!!")
  #  vote_time_results <-  js_vote_time(data, election)
  #   message("Vote Time Done!!!!")
  #
  rm(data); gc();  Sys.sleep(3)
}

js_vote_time <- function(data, election) {

  data$vote_time_minutes[data$vote_time_minutes > 30] <- NA
  data$vote_time_minutes[data$vote_time_minutes == 0] <- NA
  final_data <- data.frame(stringsAsFactors = FALSE)

  data$hour[data$hour < 7] <- 7
  data$hour[data$hour > 19] <- 19

  # city-wide data
  df <-
    data %>%
    dplyr::filter(!is.na(hour)) %>%
    dplyr::group_by(hour) %>%
    dplyr::summarise(votes = n(),
                     vote_time = median(vote_time_minutes,
                                        na.rm = TRUE)) %>%
    dplyr::mutate(vote_percent = round(votes / sum(votes) * 100, digits =  1),
                  ward         = "All",
                  division     = "All")
  final_data <- dplyr::bind_rows(final_data, df)

  # ward-wide data
  df <-
    data %>%
    dplyr::filter(!is.na(hour)) %>%
    dplyr::group_by(hour, ward) %>%
    dplyr::summarise(votes = n(),
                     vote_time = median(vote_time_minutes,
                                        na.rm = TRUE)) %>%
    dplyr::group_by(ward) %>%
    dplyr::mutate(vote_percent = round(votes / sum(votes) * 100, digits =  1),
                  division    = "All")
  df$ward <- as.character(df$ward)
  final_data <- dplyr::bind_rows(final_data, df)

  # ward-division data
  df <-
    data %>%
    dplyr::filter(!is.na(hour)) %>%
    dplyr::group_by(hour, ward, division) %>%
    dplyr::summarise(votes = n(),
                     vote_time = median(vote_time_minutes,
                                        na.rm = TRUE)) %>%
    dplyr::group_by(ward, division) %>%
    dplyr::mutate(vote_percent = round(votes / sum(votes) * 100,
                                       digits =  1))
  df$ward <- as.character(df$ward)
  df$division <- as.character(df$division)
  final_data <- dplyr::bind_rows(final_data, df)
  print(summary(final_data$vote_time))

  for (selected_ward in unique(final_data$ward)) {
    temp <-
      final_data %>%
      dplyr::filter(ward %in% selected_ward) %>%
      dplyr::select(-ward) %>%
      dplyr::mutate(vote_time = round(vote_time, 2)) %>%
      dplyr::select(hour,
                    votes,
                    vote_percent,
                    division,
                    vote_time)

    temp <- t(temp)
    temp <- jsonlite::toJSON(temp, pretty = TRUE)
    setwd(paste0("~/phillyvotingtool/data/vote_time/", election))
    write(temp, paste0(election, "_ward_", selected_ward,  "_time.json"))

  }
  possible_wards <- unique(final_data$ward)
  possible_wards <- possible_wards[possible_wards != "All"]
  possible_wards <- sort(as.numeric(possible_wards))
  possible_wards <- c("All", possible_wards)
  possible_wards <- jsonlite::toJSON(possible_wards,
                                     pretty = TRUE)
  write(possible_wards,
        paste0("wards.json"))

  possible_locations <- unique(paste0(final_data$ward, "-",
                                      final_data$division))
  possible_locations <- possible_locations[!grepl("All", possible_locations)]
  possible_locations <- jsonlite::toJSON(possible_locations,
                                         pretty = TRUE)
  write(possible_locations,
        paste0("locations.json"))

  return(final_data)
}

js_results <- function(data, election) {
  offices <- get_offices(data)
  data <-
    data %>%
    dplyr::filter(category %in% offices)

  final_matrix <- data.frame(stringsAsFactors = FALSE)
  # city-wide data
  df <-
    data %>%
    dplyr::group_by(category, candidate) %>%
    dplyr::summarise(votes = n()) %>%
    dplyr::arrange(desc(votes))
  df$ward <- "All"
  df$division <- "All"
  final_matrix <- dplyr::bind_rows(final_matrix, df)

  # ward-wide data
  df <-
    data %>%
    dplyr::group_by(category, candidate, ward) %>%
    dplyr::summarise(votes = n()) %>%
    dplyr::arrange(desc(votes)) %>%
    dplyr::mutate(ward = as.character(ward))
  df$division <- "All"
  final_matrix <- dplyr::bind_rows(final_matrix, df)

  # ward-division data
  df <-
    data %>%
    dplyr::group_by(category, candidate, ward, division) %>%
    dplyr::summarise(votes = n()) %>%
    dplyr::arrange(desc(votes))
  df$ward     <- as.character(df$ward)
  df$division <- as.character(df$division)
  final_matrix <- dplyr::bind_rows(final_matrix, df)

  # Saves a file to generate the available offices dropdown
  all_offices <- unique(df$category)
  setwd(paste0("~/phillyvotingtool/data/election_results/", election))
  all_offices <- jsonlite::toJSON(all_offices,
                                  pretty = TRUE)
  write(all_offices,
        paste0("election_results_office_choices.json"))


  for (office in offices) {
    results <-
      final_matrix %>%
      dplyr::filter(category %in% office) %>%
      dplyr::select(-category)

    possible_wards <- unique(results$ward)
    possible_wards <- possible_wards[possible_wards != "All"]
    possible_wards <- sort(as.numeric(possible_wards))
    possible_wards <- c("All", possible_wards)
    possible_wards_vector <- possible_wards
    possible_wards <- jsonlite::toJSON(possible_wards,
                                       pretty = TRUE)
    write(possible_wards,
          paste0("wards_", office, ".json"))

    possible_locations <- unique(paste0(results$ward, "-",
                                        results$division))
    possible_locations <- possible_locations[!grepl("All", possible_locations)]
    possible_locations <- jsonlite::toJSON(possible_locations,
                                           pretty = TRUE)
    write(possible_locations,
          paste0("locations_", office, ".json"))

    # If no votes for a candidate-location pair, adds a row with a
    # votes value of 0
    #
    #     results <- dummy_rows(results, select_columns = c("candidate",
    #                                                       "ward",
    #                                                       "division"),
    #                           dummy_value = 0)
    for (selected_ward in possible_wards_vector) {
      final_results <-
        results %>%
        dplyr::filter(ward %in% selected_ward) %>%
        dplyr::select(-ward)
      final_results <- t(final_results)
      final_results <- jsonlite::toJSON(final_results, pretty = TRUE)
      setwd(paste0("~/phillyvotingtool/data/election_results/", election))
      write(final_results,
            paste0("election_results_", office, "_ward_", selected_ward, ".json"))
    }
  }
}



# This produces a vector of the names of offices where a person can vote for
# more than one candidate - useful for making the options a user can select
# for the num_selected_graph
js_num_selected <- function(data, election) {
  if (!grepl("special", election)) {

    office_choices <- data.frame(stringsAsFactors = FALSE)
    offices <- get_offices(data)
    data <-
      data %>%
      dplyr::filter(tolower(candidate) != "write in" &
                      category %in% offices)
    results <- data.frame(stringsAsFactors = FALSE)

    # Ward 6, division 5 has an issue in this election where there are duplicate
    # reports of votes (some voter reported in 2 files) so this removes
    # that location entirely.
    if (election == "election_2017_general") {
      data <-
        data %>%
        dplyr::filter(location != "06-05")
    }


    if (grepl("primary", election, ignore.case = TRUE)) {
      data <-
        data %>%
        dplyr::group_by(uniqueID) %>%
        dplyr::mutate(democrat = ifelse(grepl("democrat", category,
                                              ignore.case = TRUE),
                                        1, 0),
                      republican = ifelse(grepl("republican", category,
                                                ignore.case = TRUE),
                                          1, 0)) %>%
        dplyr::filter(democrat == 1 | republican == 1)


    } else {
      # This is just a placeholder to give every value 1 so everything is
      # grouped together during general elections when votes aren't split
      # by party. It doesn't actually mean everyone is Democrat.
      data$democrat <- 1
    }

    # total_uniqueIDs <-
    #   data %>%
    #   group_by(democrat) %>%
    #   dplyr::summarize(total_uniqueIDs = length(unique(uniqueID)))
    #
    # ward_total_uniqueIDs <-
    #   data %>%
    #   group_by(ward, democrat) %>%
    #   dplyr::summarize(total_uniqueIDs = length(unique(uniqueID)))

    ward_division_total_uniqueIDs <-
      data %>%
      group_by(ward, division, democrat) %>%
      dplyr::summarize(total_uniqueIDs = length(unique(uniqueID)))

    # ward-division wide data
    df <-
      data %>%
      dplyr::group_by(category, ward, division, uniqueID, democrat) %>%
      dplyr::summarize(count = n()) %>%
      dplyr::group_by(category, ward, division, count, democrat) %>%
      dplyr::tally() %>%
      dplyr::left_join(ward_division_total_uniqueIDs)
    temp <-
      df %>%
      dplyr::group_by(category, ward, division) %>%
      dplyr::mutate(n     = total_uniqueIDs - sum(n),
                    count = 0) %>%
      dplyr::distinct(category, .keep_all = TRUE)
    df <- bind_rows(df, temp)
    df$ward <- as.character(df$ward)
    df$division <- as.character(df$division)
    results <- bind_rows(results, df)

    # ward-wide data
    ward_total_uniqueIDs <-
      results %>%
      group_by(category, ward, democrat) %>%
      dplyr::summarize(total_uniqueIDs = sum(n))
    ward_total_uniqueIDs$ward <- as.numeric(ward_total_uniqueIDs$ward)

    df <- data.frame(df)
    df <-
      df %>%
      dplyr::mutate(ward = as.numeric(ward)) %>%
      dplyr::group_by(category, ward, count, democrat) %>%
      dplyr::rename(temp_n = n) %>%
      dplyr::summarize(n = sum(temp_n)) %>%
      dplyr::mutate(division = "All") %>%
      dplyr::left_join(ward_total_uniqueIDs)
    df$ward <- as.character(df$ward)
    results <- bind_rows(results, df)


    # City-wide data
    df <-
      df %>%
      dplyr::select(-ward, -division) %>%
      dplyr::group_by(category, democrat, count) %>%
      dplyr::rename(temp_n = n) %>%
      dplyr::summarize(n = sum(temp_n),
                       total_uniqueIDs = sum(total_uniqueIDs)) %>%
      dplyr::group_by(category, count, democrat) %>%
      dplyr::mutate(ward     = "All",
                    division = "All")
    results <- bind_rows(df, results)



    multiple_choice_offices <-
      results %>%
      dplyr::group_by(category) %>%
      dplyr::summarize(max_choices = max(count))

    results <-
      results %>%
      dplyr::filter(category %in% multiple_choice_offices$category) %>%
      dplyr::mutate(percent = round(n / total_uniqueIDs * 100, 1)) %>%
      dplyr::select(-democrat,
                    -n,
                    -total_uniqueIDs)
    results$democrat <- NULL
    results <- data.frame(results)
    for (office in unique(results$category)) {
      final_results <-
        results %>%
        dplyr::filter(category %in% office) %>%
        dplyr::select(-category) %>%
        dplyr::arrange(count)


      setwd(paste0("~/phillyvotingtool/data/num_selected/", election))
      possible_wards <- unique(final_results$ward)
      possible_wards <- possible_wards[possible_wards != "All"]
      possible_wards <- sort(as.numeric(possible_wards))
      possible_wards <- c("All", possible_wards)
      possible_wards_vector <- possible_wards
      possible_wards <- jsonlite::toJSON(possible_wards,
                                         pretty = TRUE)
      write(possible_wards,
            paste0("wards_", office, ".json"))

      possible_locations <- unique(paste0(final_results$ward, "-",
                                          final_results$division))
      possible_locations <- possible_locations[!grepl("All", possible_locations)]
      possible_locations <- jsonlite::toJSON(possible_locations,
                                             pretty = TRUE)
      write(possible_locations,
            paste0("locations_", office, ".json"))

      for (selected_ward in possible_wards_vector) {
        real_final_results <-
          final_results %>%
          dplyr::filter(ward %in% selected_ward) %>%
          dplyr::select(-ward)
        real_final_results <- t(real_final_results)
        real_final_results <- jsonlite::toJSON(real_final_results, pretty = TRUE)
        setwd(paste0("~/phillyvotingtool/data/num_selected/", election))
        write(real_final_results,
              paste0("num_selected_", office, "_ward_", selected_ward, ".json"))
      }
    }
    setwd(paste0("~/phillyvotingtool/data/num_selected/", election))
    multiple_choice_offices <- as.data.frame(multiple_choice_offices)
    multiple_choice_offices <- jsonlite::toJSON(multiple_choice_offices,
                                                pretty = TRUE)
    write(multiple_choice_offices,
          paste0("num_selected_office_choices.json"))
  }
}


js_cond_table_prep <- function(data, election) {
  data$uniqueID <- as.character(data$uniqueID)
  data <- as.data.frame(data)
  data <- data[, which(names(data) %in% c("location",
                                          "serial_number",
                                          "uniqueID",
                                          "ballot_position",
                                          "candidate",
                                          "category",
                                          "ward",
                                          "division",
                                          "file",
                                          "uniqueID_with_pdf"))]

  # Ward 6, division 5 has an issue in this election where there are duplicate
  # reports of votes (some voter reported in 2 files) so this removes
  # that location entirely.
  if (election == "election_2017_general") {
    data <-
      data %>%
      dplyr::filter(location != "06-05")
  }
  source('C:/Users/user/Dropbox/R_project/phillyvotes/R/conditional_table.R')
  offices <- get_offices(data)
  setwd(paste0("~/phillyvotingtool/data/cand_comb/", election))
  all_offices <- jsonlite::toJSON(offices,
                                  pretty = TRUE)
  write(all_offices,
        paste0("cand_comb_office_choices.json"))

  data$office_location <- paste(data$category,
                                data$ward,
                                data$division)


  if (grepl("primary", election, ignore.case = TRUE)) {
    dem_offices <- offices[grepl("democrat", offices, ignore.case = TRUE)]
    rep_offices <- offices[grepl("republican", offices, ignore.case = TRUE)]
    dem_office_lists <- combn(dem_offices, 2, simplify = FALSE)
    rep_office_lists <- combn(rep_offices, 2, simplify = FALSE)
    office_lists <- append(as.list(offices), dem_office_lists)
    office_lists <- append(office_lists, rep_office_lists)
  } else {
    office_lists <- combn(offices, 2, simplify = FALSE)
    office_lists <- append(as.list(offices), office_lists)
  }

  # Since you cannot vote for two district, removes if both of the offices
  # are districts
  gen_assembly_rep     <- "repres.*assembly.*district"
  gen_assembly_senator <- "senator.*assembly.*district"
  congress_rep         <- "repres.*congress.*district"
  district_council_rep <- "district council"
  for (i in length(office_lists):1) {
    num_districts_1 <- str_count(tolower(office_lists[[i]]), gen_assembly_rep)
    num_districts_2 <- str_count(tolower(office_lists[[i]]), gen_assembly_senator)
    num_districts_3 <- str_count(tolower(office_lists[[i]]), congress_rep)
    num_districts_4 <- str_count(tolower(office_lists[[i]]), district_council_rep)
    num_districts_5 <- str_count(tolower(office_lists[[i]]),
                                 "general assembly")
    if (sum(num_districts_1) > 1 |
        sum(num_districts_2) > 1 |
        sum(num_districts_3) > 1 |
        sum(num_districts_4) > 1 |
        sum(num_districts_5) > 1) {
      office_lists[[i]] <- NULL
    }
  }


  data <- data[data$category %in% offices, ]
  data <- data.table::data.table(data)
  gc(); Sys.sleep(1)
  current_files <- list.files()

  # Total number of possible office-combinations
  # Not all will run in this loop since some have already been
  # created earlier or have no common locations
  print(paste("Number of offices-combinations:", length(office_lists)))
  message("\n\n\n\n")


  for (n in 1:length(office_lists)) {

    office <- office_lists[[n]]
    office <- sort(office)
    office_temp <- data[data$category %in% office]


    office_save_name <- paste(office, collapse = " ")
    # If file is already created, skip the loop
    if (any(grepl(paste0("cand_comb_",
                         office_save_name,
                         "_ward_"), current_files))) {
      next
    }
    gc()


    # If the offices DO share any common wards, allow to continue
    location1 <- unique(office_temp$location[office_temp$category %in% office[1]])
    location2 <- unique(office_temp$location[office_temp$category %in% office[length(office)]])
    common_locations <- intersect(location1, location2)

    # If no common locations (i.e. only votes from different wards), skips the loop
    if (length(common_locations) == 0) {
      next
    }
    office_temp <- office_temp[office_temp$location %in% common_locations]
    gc()

    message(n)

    common_locations <- sort(common_locations)
    unique_locations <- c("All-All",
                          sort(paste0(unique(office_temp$ward), "-All")),
                          common_locations)
    possible_wards <- c()
    possible_locations <- c()
    final_data <- data.frame(stringsAsFactors = FALSE)

    for (i in 1:length(unique_locations)) {
      selected_location <- unique_locations[i]
      if (selected_location == "All-All") {
        df <- office_temp
      } else if (grepl("[0-9]-All", selected_location)) {
        selected_ward <- gsub("-All", "", selected_location)
        df <- office_temp[ward %in% selected_ward]
      } else {
        df <- office_temp[location %in% selected_location]
      }
      office_temp$uniqueID <- as.character(office_temp$uniqueID)
      df$uniqueID          <- as.character(df$uniqueID)


      df          <- conditional_table(office_temp, df)
      temp        <- data.frame(t(names(df)), stringsAsFactors = FALSE)
      names(temp) <- names(df)
      df          <- bind_rows(temp, df)

      names(df) <- paste0("col_", 1:ncol(df))
      df$ward   <- gsub("-.*", "", selected_location)
      if (df$ward[1] != "All"){
        df$ward <- as.character(as.numeric(df$ward))
      }
      df$division   <- gsub(".*-", "", selected_location)
      if (df$division[1] != "All") {
        df$division <- as.character(as.numeric(df$division))
      }
      final_data <- dplyr::bind_rows(final_data, df)


      possible_wards     <- c(possible_wards,
                              unique(df$ward))
      possible_locations <- c(possible_locations,
                              selected_location)
    }

    possible_wards <- unique(possible_wards)
    possible_wards <- possible_wards[possible_wards != "All"]
    possible_wards <- sort(as.numeric(possible_wards))
    possible_wards <- c("All", possible_wards)
    possible_wards_vector <- possible_wards
    possible_wards <- jsonlite::toJSON(possible_wards,
                                       pretty = TRUE)

    possible_locations <- possible_locations[!grepl("All", possible_locations)]
    possible_locations <- jsonlite::toJSON(possible_locations,
                                           pretty = TRUE)
    write(possible_locations,
          paste0("locations_", office_save_name, ".json"))


    for (k in 1:ncol(final_data)) {
      final_data[, k][is.na(final_data[, k])] <- 0
      final_data[, k][final_data[, k] == "V1"] <- ""
      final_data[, k][grepl("To Remove", final_data[, k])] <- ""

    }

    write(possible_wards,
          paste0("wards_", office_save_name, ".json"))


    for (selected_ward in possible_wards_vector) {

      real_final_data <-
        final_data %>%
        dplyr::filter(ward %in% selected_ward) %>%
        dplyr::select(-ward)

      if (length(real_final_data) > 0) {
        readr::write_csv(real_final_data, path = paste0("cand_comb_",
                                                        office_save_name,
                                                        "_ward_",
                                                        selected_ward,
                                                        ".csv"))
      }
    }
  }
}




get_offices <- function(data) {
  offices <- unique(data$category)
  state_committee <- grep("state committee",
                          offices,
                          value = TRUE,
                          ignore.case = TRUE)
  offices <- grep("INSPECTOR|JUDGE OF ELECTION|CHARTER|RETENTION|BOND|WARD|AMENDMENT|ALTERNATE DELEGATE|COMMITTEE|PROPOSED",
                  offices,
                  value = TRUE,
                  ignore.case = TRUE,
                  invert = TRUE)
  offices <- c(offices, state_committee)
  offices <- offices[!is.na(offices)]
  offices <- sort(offices)
  return(offices)
}
jacobkap/phillyvotes documentation built on May 18, 2023, 5:48 a.m.