library(testthat)
#' Add postcode variables e.g. Ward, to a data frame with postcodes #' #' @param df data frame with a postcode variable #' @param pcd_name name of the postcode variable in passed the data frame #' @param .admin_district boolean indicating whether to add an admin_district variable #' @param .lat_long boolean indicating whether to add a pair of latitude & longitude variables #' @param other_vars character vector of other variables to add #' #' @return #' data frame with variables added based on postcode #' @export #' #' @examples add_postcode_vars <- function(df, pcd_name = "postcode", .admin_district = TRUE, .lat_long = FALSE, other_vars = character(0)){ ## bulk_postcode_lookup() is currently not ignoring white space ---- # - looks like it might be a postcodes.io issue rather than PostcodesioR # New variable with postcode minus white space df <- dplyr::mutate( df, postcode_query = stringr::str_replace_all(postcode, " " , "") ) ## Build a vector of variables of interest ---- # Create an empty character vector pcd_vars <- character(0) # From the arguments passed in the function call if(.admin_district) pcd_vars <- append(pcd_vars, "admin_district") if(.lat_long) pcd_vars <- append(pcd_vars, c("longitude", "latitude")) if(length(other_vars) > 0) pcd_vars <- append(pcd_vars, other_vars) # Remove any duplicates pcd_vars <- unique(pcd_vars) ## Build a postcode lookup template and a list of columns in the template ---- # Empty postcode lookup data frame template pcd_template <- PostcodesioR::postcode_lookup("S1 2HH") %>% dplyr::filter(postcode == "ZZ ZZZ") # Data frame of postcode lookup column names, types & indices pcd_cols <- tidyr::tibble( name = colnames(pcd_template), type = lapply(pcd_template, class)) %>% #TODO: lose or use & extract from list tibble::rowid_to_column(var = "rowid") %>% dplyr::relocate(rowid, .after = last_col()) # The position of "codes" helps with nested "*_code" values min_codes_var <- pcd_cols %>% dplyr::filter(stringr::str_ends(name, "_code")) %>% dplyr::filter(rowid == min(rowid)) codes_position <- min_codes_var$rowid # Add index used to extract var from results pcd_cols <- dplyr::mutate( pcd_cols, index = purrr::map( rowid, ~ ifelse(.x < codes_position, list(c(2, .x)), list(c( 2, codes_position, .x + 1 - codes_position )) ) )) %>% dplyr::select(-rowid) # Filter list of postcode columns to vars of interest pcd_cols <- dplyr::filter(pcd_cols, name %in% pcd_vars) # Filter template columns to vars of interest pcd_details <- dplyr::select(pcd_template, dplyr::all_of(pcd_vars)) %>% dplyr::mutate(postcode_query = as.character()) # Need to distinguish between postcode passed & matched e.g. s1 2hh & S1 2HH pcd_match <- "postcode" %in% colnames(pcd_details) # flag is used more than once if(pcd_match) pcd_details <- pcd_details %>% dplyr::mutate(pcd_match = character(0)) ## Do some checks ----- # Check we have some postcode variables in the function arguments stopifnot(length(pcd_vars) > 0) # Check we have some VALID postcode variables in the function arguments stopifnot(nrow(dplyr::filter(pcd_cols, name %in% pcd_vars)) > 0) # Warn if there's an unused postcode variable from the function arguments if(length(pcd_vars)!=length(pcd_cols)) { warnings(paste0( "One or more of the variables requested is not available. ", "Check spelling and docs.ropensci.org/PostcodesioR/") ) } ## Handle empty & duplicate postcodes ----- # Snapshot to refer and join to later df_orig <- df # PostcodesioR expects postcodes to be labelled "postcode" df <- dplyr::rename(df, postcode = {{pcd_name}}) # we rename it back in the returned df # Don't process empty or duplicate postcodes df <- df %>% tidyr::drop_na(postcode) %>% dplyr::distinct(postcode, .keep_all = TRUE) ## Batch-by-batch postcode lookup ---- # PostcodesioR puts 100 row limit on bulk_postcode_lookup # - so we're going to have to process the request in batches batch_size_max <- 100 ### Batch loop prep ---- # Numbers involved in breaking postcode lookup into batches total_rows <- nrow(df) whole_batches <- total_rows %/% batch_size_max remainder_batch_size <- total_rows %% batch_size_max n_batches <- ifelse(remainder_batch_size == 0, whole_batches, whole_batches + 1) ### Batch loop ---- for (i in 1:n_batches) { #### Slice a batch ---- # Determine the size of the batch batch_size <- ifelse(i != n_batches, batch_size_max, remainder_batch_size) # Small probability (1 in 100) but need to check ... batch_size <- ifelse(batch_size == 0, batch_size_max, batch_size) # Determine the start and end records of the batch batch_start <- ifelse(i == 1, 1, ((i-1) * batch_size_max) + 1) batch_end <- ifelse( i == n_batches, (ifelse(i == 1, i, (i - 1)) * batch_size_max) + remainder_batch_size, i * batch_size_max ) # Get a batch of petition records batch <- dplyr::slice(df, batch_start:batch_end) #### Bulk lookup request to postcodes.io ---- bulk_lookup_rslt <- PostcodesioR::bulk_postcode_lookup( list(postcodes = batch$postcode_query) ) #### Extract the vars from the postcodes.io results in to a data frame ---- df_batch_rslt <- purrr::map2_dfc( pcd_cols$name, pcd_cols$index, ~ dplyr::tibble(!!.x := purrr::map( bulk_lookup_rslt, unlist(.y), .default = NA) ) ) %>% tidyr::unnest(cols = dplyr::all_of(pcd_vars)) # Need to distinguish between postcode passed & matched e.g. s1 2hh & S1 2HH if(pcd_match) df_batch_rslt <- df_batch_result %>% dplyr::rename(pcd_match = postcode) # Add postcode passed i.e. our primary key df_batch_rslt <- tibble::add_column( df_batch_rslt, postcode_query = purrr::map_chr(bulk_lookup_rslt, c(1, 1), .default = ""), .before = 1 ) # Cumulative results from batches pcd_details <- dplyr::bind_rows(pcd_details, df_batch_rslt) } # Add postcode variables of interest to the original data df_new <- df_orig %>% dplyr::left_join(pcd_details, by = c("postcode_query")) %>% dplyr::select(-postcode_query) }
# Create a data frame with some example records df <- tibble::tribble( ~name, ~postcode, "SCC", "S1 2HH", "Blades", "S2 4SU", "Owls", "S6 1SW" ) add_postcode_vars( df, pcd_name = "postcode", .admin_district = FALSE, .lat_long = TRUE, other_vars = c("admin_ward", "msoa_code") )
# test_that("add_postcode_vars works properly and show error if needed", { # expect_true(add_postcode_vars(1:12) == 6.5) # expect_error(add_postcode_vars("text")) # })
# Keep eval=FALSE to avoid infinite loop in case you hit the knit button # Execute in the console directly fusen::inflate(flat_file = "dev/flat_full.Rmd", vignette_name = "Get started")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.