R/interpolation.R
In eechidna: Exploring Election and Census Highly Informative Data Nationally for Australia

Documented in allocate_electorate extract_centroids mapping_fn weighted_avg_census weighted_avg_census_sa1

#' Extract centroids from the polygons within a shapefile.
#' 
#' @param shapefile SpatialPolygonsDataFrame containing polygons
#' @return list containing centroids as SpatialPoints and a dataframe with
#' basic data on each polygon (e.g. name)
#' @export
#' @examples 
#' \dontrun{
#' sF_download(year = 2016)
#' electorate_centroids_2016 <- extract_centroids(sF_16)
#' }
# Function to extract centroids from shapefile
extract_centroids <- function(shapefile) {
  
  # Change polygon format
  polys <- methods::as(shapefile, "SpatialPolygons")
  
  # Function to get centroid from polygon
  centroid <- function(i, polys) {
    ctr <- sp::Polygon(polys[i])@labpt
    data.frame(long_c=ctr[1], lat_c=ctr[2])
  }
  
  # Apply
  centroids <-  purrr::map_df(seq_along(polys), centroid, polys=polys)
  
  # Transform to SpatialPoints, consistent format
  spatial_centroids <- SpatialPoints(centroids)
  proj4string(spatial_centroids) <- proj4string(shapefile)
  
  # Create list with centroids and data from shapefile
  spatial_centroids_ls <- list(centroids = spatial_centroids, data = shapefile@data)
  
  return(spatial_centroids_ls)
}

#' Determine which electoral division contains the centroid from each of the Census polygons.
#' 
#' Using the electoral boundaries at the time of an election and the centroids from the SA1
#' polygons from a neighbouring Census, allocate each SA1 to the electoral division that contains
#' its centroid.
#' 
#' @param centroids_ls list containing centroids as SpatialPoints and a dataframe with
#' basic data on each polygon (e.g. name)
#' @param electorates_sf shapefile with electoral boundaries
#' @param census_year census year
#' @param election_year election year
#' @return data frame detailing which electoral division each Census polygon is allocated to
#' @export
#' @examples 
#' \dontrun{
#' # Mapping each SA1 from the 2011 Census to the 2013 electoral boundaries
#' mapping_c11_e13 <- allocate_electorate(centroids_ls = centroids_sa1_2011, electorates_sf = sF_13, 
#' census_year = "2011", election_year = "2013")
#' }


# Function to allocate centroids to electorates
allocate_electorate <- function(centroids_ls, electorates_sf, census_year = NA, election_year = NA) {
  
  # Column names to use
  if ("CD_CODE_2001" %in% colnames(centroids_ls$data)) {
    centroids_ls$data <- centroids_ls$data %>% 
      rename("SA1_7DIGIT" = "CD_CODE_2001", "STATE_NAME" = "STE_NAME_2001")
  }
  
  if ("CD_CODE_2006" %in% colnames(centroids_ls$data)) {
    centroids_ls$data <- centroids_ls$data %>% 
      rename("SA1_7DIGIT" = "CD_CODE_2006", "STATE_NAME" = "STE_NAME_2006")
  }
  
  # Dataframe with SA1 names and placeholder electorate
  assign_df <- data.frame(sa1 = centroids_ls$data$SA1_7DIGIT, electorate = 0, 
    census_year = census_year, election_year = election_year)
  
  # Fix state names to be consistent
  fix_state_names <- function(state) {
    if (state == "Australian Capital Territory") {state = "ACT"}
    if (state == "New South Wales") {state = "NSW"}
    if (state == "Northern Territory") {state = "NT"}
    if (state == "Other Territories") {state = "OT"}
    if (state == "Queensland") {state = "QLD"}
    if (state == "South Australia") {state = "SA"}
    if (state == "Tasmania") {state = "TAS"}
    if (state == "Victoria") {state = "VIC"}
    if (state == "Western Australia") {state = "WA"}
    
    return(state)
  }
  
  centroids_ls$data$STATE_NAME_ABV <- sapply(centroids_ls$data$STATE_NAME, fix_state_names)
  
  # Loop through point-in-polygon checking
  for (i in 1:length(centroids_ls$centroids)) {
    
    # Print for progress tracking
    if (i %% 2000 == 0) {print(i)}
    
    # Extract centroid
    centroid <- centroids_ls$centroids[i]
    centroid_state <- centroids_ls$data$STATE_NAME_ABV[i]
    
    # Loop through electorates (in state if possible - for efficiency)
    if (centroid_state %in% unique(electorates_sf$state)) {
      
      # Get polygons in state only
      state_electorates_sf <- subset(electorates_sf, state == centroid_state)
      
      # Loop through electorates to assign
      for (j in 1:length(state_electorates_sf@polygons)) {
        
        # Electorate name and polygon
        electorate_name <- state_electorates_sf$elect_div[j]
        electorate_poly <- subset(state_electorates_sf, elect_div == electorate_name)
        
        # Does it contain centroid
        electorate_contains = gContains(electorate_poly, centroid)
        
        if (electorate_contains == TRUE) {
          assign_df$electorate[i] = electorate_name
          break
        } 
        
      }
      
    } else { # Not assigned to state, must check all electorates
      
      # Loop through electorates to assign
      for (j in 1:length(electorates_sf@polygons)) {
        
        # Electorate name and polygon
        electorate_name <- electorates_sf$elect_div[j]
        electorate_poly <- subset(electorates_sf, elect_div == electorate_name)
        
        # Does it contain centroid
        electorate_contains = gContains(electorate_poly, centroid)
        
        if (electorate_contains == TRUE) {
          assign_df$electorate[i] = electorate_name
          break
        }
      }
    }
  }
  
  assign_df <- assign_df %>% 
    filter(electorate != 0)
  
  return(assign_df)
}


#' Function to compute weighted average of Census information
#' using imputed populations as weights
#' 
#' @param mapping_df data frame detailing how much Census divisions intersect with each 
#' electoral division at the time of the election.
#' @param abs_df data frame holding Census information from Census year
#' @return data frame with imputed Census data for electoral boundaries at the time of 
#' the Census
#' @export
#' 
#' @examples 
#' \dontrun{
#' # Each 2013 electorate boundary's characteristics as at the time of the 2016 Census
#' mapping_c16_e13 <- allocate_electorate(centroids_ls = centroids_sa1_2016, electorates_sf = sF_13, 
#' census_year = "2016", election_year = "2013")
#' 
#' # Estimate 2016 Census data for the 2013 electorates
#' imputed_data_2016 <- weighted_avg_census_sa1(mapping_df = mapping_2016, abs_df = abs2016_cd)
#' }
weighted_avg_census_sa1 <- function(mapping_df, abs_df) {
  mapping_df <- mapping_df %>% 
    arrange(electorate) %>% 
    mutate(sa1 = as.character(sa1))
  
  abs_df <- abs_df %>% 
    mutate(CD = as.character(CD)) %>% 
    filter(!is.na(Population), Population > 0) %>%
    select(-c(ends_with("NS"), Area)) %>% 
    select(sort(tidyselect::peek_vars())) %>% 
    select(CD, Population, everything())
  
  divs <- unique(mapping_df$electorate)
  divs <- divs[divs != "0"]
  
  for (i in 1:length(divs)) {
    # Election division
    div <- divs[i]
    
    # Mapping for the division
    mapping_div <- mapping_df %>% 
      filter(electorate == div)
    
    # Census info from the relevant divisions
    census_divs <- abs_df %>%
      filter(CD %in% mapping_div$sa1) 
    
    # Weighted average of each variable, using only valid numbers
    imputed_df <- data.frame(
      vars = colnames(census_divs %>% select(-CD)), 
      value = 0)
    
    for (j in 1:nrow(imputed_df)) {
      
      # Variable
      var_name <- as.character(imputed_df$vars[j])
      var_vals <- census_divs[, var_name] %>% unlist
      var_population <- census_divs[, "Population"] %>% unlist
      
      # Filter valid numbers
      valid_var_index <- which(!var_vals %in% c(NA, Inf, NaN))
      valid_pop_index <- which(!var_population %in% c(NA, Inf, NaN, 0))
      valid_index <- intersect(valid_var_index, valid_pop_index)
      valid_var_vals <- var_vals[valid_index]
      valid_var_population <- var_population[valid_index]
      
      if (var_name != "Population") {
        # Imputation weights
        var_imputation_weights <- valid_var_population/sum(valid_var_population)
        
        # Imputed value
        imputed_df$value[j] <- sum(valid_var_vals * var_imputation_weights)
        
      } else {
        # Imputed Population
        imputed_df$value <- sum(valid_var_vals)
      }
      
    }
    
    
    # Imputed profile
    imputed_profile <- imputed_df %>% 
      column_to_rownames(var = "vars") %>% 
      t() %>% 
      data.frame() %>% 
      mutate(DivisionNm = div)
    
    # Update
    if (i == 1) {
      keep_imputed_profiles <- imputed_profile
    } else {
      keep_imputed_profiles <- bind_rows(keep_imputed_profiles, imputed_profile)
    }
    
  }
  
  return(keep_imputed_profiles)
}







#' Compute areas of intersection between each election boundary and those in
#' the Census of interest. This is a less refined method than using SA1 centroids.
#' 
#' At the time of an election, compute how much each electoral division
#' intersects with the divisions in place at the time of the Census.
#' This is to be used in interpolating Census information for electoral
#' divisions in a year that a Census did not occur.
#' 
#' @param aec_sF shapefile with boundaries at election time
#' @param abs_sF shapefile with boundaries at census time
#' @param area_thres threshold for which mapping is sufficient (default is 99.5\%)
#' @return data frame detailing how much Census divisions intersect with each 
#' electoral division at the time of the election.
#' @export
#' @examples 
#' \dontrun{
#' # Each 2013 electorate boundary's composition in terms of the 
#' # boundaries in place for the 2016 Census
#' aec_sF_2013 <- loadShapeFile(path_to_aec_shapefile)
#' abs_sF_2016 <- loadShapeFile(path_to_abs_shapefile)
#' 
#' mapping_df <- mapping_fn(aec_sF = aec_sF_2013, abs_sF = abs_sF_2016, area_thres = 0.995)
#' }

mapping_fn <- function(aec_sF, abs_sF, area_thres = 0.995) {
  
  # Function for distance between two points
  cdist <- function(x1, y1, x2, y2) {
    dist = sqrt((x2 - x1)^2 + (y2 - y1)^2)
  }
  
  # Placeholder
  Mapping_df <- data.frame(AEC_division = 0, ABS_division = 0, Intersect_area = 0, ABS_division_area = 0, AEC_division_area = 0)[-1,]
  
  # Electoral division names at election time
  division_names <- aec_sF$elect_div
  
  # Loop for breakdown of electorates
  for (i in 1:length(division_names)) {
    
    div_name <- division_names[i]
    div_poly <- aec_sF %>% subset(as.character(elect_div) == as.character(div_name))
    div_lat_c <- div_poly$lat_c
    div_long_c <- div_poly$long_c
    div_area <- suppressWarnings(rgeos::gArea(div_poly))
    
    # Ordering Census divisions by distance to electoral division
    comp <- abs_sF@data %>%
      dplyr::select(elect_div, long_c, lat_c) %>% rowwise %>%
      mutate(dist_centroid = cdist(lat_c, long_c, div_lat_c, div_long_c))
    
    comp <- comp[order(comp$dist_centroid),] # order by distance to centroid
    
    cens_names <- comp$elect_div
    cens_mapped <- data.frame(AEC_division = div_name, ABS_division = cens_names, 
      Intersect_area = 0, ABS_division_area = 0, AEC_division_area=div_area)
    
    # Loop for that electorate until break
    for (j in 1:length(cens_names)) {
      cens_poly <- abs_sF %>% subset(elect_div == cens_names[j])
      
      if (rgeos::gIntersects(div_poly, cens_poly)) { # Only if polygons intersect
        poly_intersect <- rgeos::gIntersection(div_poly, cens_poly)
        cens_mapped$Intersect_area[j] = suppressWarnings(rgeos::gArea(poly_intersect))
      }
      
      # break if sum of intersection areas is over threshold (area_thres)
      if ( (sum(cens_mapped$Intersect_area[1:j])/div_area) > area_thres ) {
        break
        }
      
    } 
    
    Mapping_df <- rbind(Mapping_df, cens_mapped)
    
  }
  
  # Remove zero intersections
  Mapping_df <- filter(Mapping_df, Intersect_area > 0)
  
  # Adding area of Census divsions
  cens_area <- data.frame(ABS_division = abs_sF$elect_div, ABS_division_area = 0)
  
  for (i in 1:nrow(cens_area)) {
    cens_area$ABS_division_area[i] = rgeos::gArea(abs_sF %>% subset(elect_div == cens_area[i,1]))
  }
  
  for (i in 1:nrow(Mapping_df)) {
    Mapping_df$ABS_division_area[i] = cens_area[which(cens_area$ABS_division == Mapping_df$ABS_division[i]), 2]
  }
  
  ### Adding percentage of Census and Electorate intersections
  Mapping_df <- Mapping_df %>%
    mutate(Percent_Elec_Composition = Intersect_area/AEC_division_area,
      Percent_Census_Composition = Intersect_area/ABS_division_area)
  
  return(Mapping_df)
}

#' Function to compute weighted average of Census information
#' using imputed populations as weights.
#' 
#' This is a less refined method than using SA1 centroids, because it uses Census data
#' aggregated at Census division level.
#' 
#' @param mapping_df data frame detailing how much Census divisions intersect with each 
#' electoral division at the time of the election.
#' @param abs_df data frame holding Census information from Census year
#' @return data frame with imputed Census data for electoral boundaries at the time of 
#' the Census
#' @export
#' 
#' @examples 
#' \dontrun{
#' data("abs2016")
#' 
#' # Each 2013 electorate boundary's composition in terms of the 
#' # boundaries in place for the 2016 Census
#' aec_sF_2013 <- loadShapeFile(path_to_aec_shapefile)
#' abs_sF_2016 <- loadShapeFile(path_to_abs_shapefile)
#' mapping_2016 <- mapping_fn(aec_sF = aec_sF_2013, abs_sF = abs_sF_2016)
#' 
#' # Estimate 2016 Census data for the 2013 electorates
#' imputed_data_2016 <- weighted_avg_census(mapping_df = mapping_2016, abs_df = abs2016)
#' }

weighted_avg_census <- function(mapping_df, abs_df) {
  mapping_df <- mapping_df %>% 
    arrange(as.character(AEC_division), as.character(ABS_division))
  
  abs_df <- abs_df %>% 
    arrange(as.character(DivisionNm))
  
  divs <- unique(mapping_df$AEC_division)
  
  for (i in 1:length(divs)) {
    # Election division
    div <- divs[i]
    
    # Mapping for the division
    mapping <- mapping_df %>% 
      filter(AEC_division == div)
    
    # Census info from the relevant divisions
    census_divs <- abs_df %>% 
      filter(DivisionNm %in% mapping$ABS_division) %>% 
      select(-c(ends_with("NS"), Area, UniqueID, State)) %>% 
      left_join(mapping, by = c("DivisionNm" = "ABS_division")) %>% 
      # add imputed population
      mutate(imputed_population = Percent_Census_Composition*Population)
    
    # Net imputed population
    census_divs <- census_divs %>% 
      mutate(total_pop = sum(census_divs$imputed_population),
        weight = imputed_population/total_pop)
    
    # Weighted average
    imputed_profile <- (census_divs %>% 
        select(c(Age00_04:Volunteer)) *
        census_divs$weight) %>% 
      colSums() %>% 
      t() %>% 
      data.frame() 
    
    imputed_profile <- imputed_profile %>% 
    select(noquote(order(colnames(imputed_profile)))) %>% 
      mutate(DivisionNm = div)
    
    if (i == 1) {
      keep_imputed_profiles <- imputed_profile
    } else {
      keep_imputed_profiles <- bind_rows(keep_imputed_profiles, imputed_profile)
    }
    
  }
  
  return(keep_imputed_profiles)
}

Any scripts or data that you put into this service are public.

eechidna documentation built on Feb. 25, 2021, 5:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

eechidna
Exploring Election and Census Highly Informative Data Nationally for Australia

R/interpolation.R
In eechidna: Exploring Election and Census Highly Informative Data Nationally for Australia

Defines functions weighted_avg_census mapping_fn weighted_avg_census_sa1 allocate_electorate extract_centroids

Documented in allocate_electorate extract_centroids mapping_fn weighted_avg_census weighted_avg_census_sa1

Try the eechidna package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

eechidna Exploring Election and Census Highly Informative Data Nationally for Australia

R/interpolation.R In eechidna: Exploring Election and Census Highly Informative Data Nationally for Australia

Defines functions weighted_avg_census mapping_fn weighted_avg_census_sa1 allocate_electorate extract_centroids

Documented in allocate_electorate extract_centroids mapping_fn weighted_avg_census weighted_avg_census_sa1

Try the eechidna package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

eechidna
Exploring Election and Census Highly Informative Data Nationally for Australia

R/interpolation.R
In eechidna: Exploring Election and Census Highly Informative Data Nationally for Australia