library(sf) library(tidyverse) library(tmap) library(tmaptools) library(tigris) library(tidycensus) library(rmapshaper) library(matrixStats) library(SpatialAcc) library(reticulate) library(dplyr) library(tidygeocoder) library(readxl) library(DBI)
con <- get_db_conn(db_pass = "rsu8zvrsu8zv") md_locs <- st_read(con, query = " SELECT a.* FROM corelogic_usda.current_tax_200627_latest_all_add_vars_add_progs_geom_blk as a WHERE a.geoid_cnty LIKE '24%'") # CHANGE 24 to 11, 51, etc. dbDisconnect(con) data <- md_locs data$property_indicator <- as.factor(data$property_indicator) # Residential: 10 (Single Family Residence), 11 (Condominium), 21 (Duplex), 22 (Apartment) # Considered 24 (Commercial Condominium) but cross-checking with land use code, county use code, and googling property addresses # showed these were in fact all medical offices, business offices, etc. Excluding here. data <- data %>% filter(property_indicator == 10 | # single family residence property_indicator == 11 | # condo property_indicator == 21 | # duplex property_indicator == 22) # apartment data$geoid_bg <- substr(data$geoid_blk, 1, 12) data$geoid_tr <- substr(data$geoid_blk, 1, 11) md.data <- data[grepl(" MD ", data$mail_address),] # change to DC, VA, etc.
md.bg <- get_acs(geography = "block group", year = 2019, variables = c(median_household_income = "B19013_001"), state = "MD", survey = "acs5", output = "wide", geometry = T) # need geography for st_centroid md.data.2 <- md.data %>% distinct(mail_address, .keep_all = TRUE) # randomly sample 5 addresses "within" each block group (within = based on the label for that address) all.md.data <- md.data.2 %>% group_by(geoid_bg) %>% sample_n(5, replace = T) %>% select(mail_address, property_centroid_latitude, property_centroid_longitude, geoid_bg) x <- st_distance(all.md.data, st_centroid(md.bg) %>% st_transform(st_crs(all.md.data))) # get distance matrix # empty final sample matrix all.md.data.2 <- as.data.frame(matrix(, nrow = 0, ncol = 3)) # loop over block groups for (i in 1:(dim(x)[2])) { # get closest 3 addresses to block group centroid within our random sample all.md.data.2 <- rbind(all.md.data.2, st_drop_geometry(all.md.data[which(x[, i] %in% sort(x[, i])[1:3]),]) %>% select(mail_address, property_centroid_latitude, property_centroid_longitude) %>% mutate(geoid_bg = md.bg[i,]$GEOID) %>% sample_n(3)) # Not sure about - Adding missing grouping variables: `geoid_bg` } # APPROACH - GET 5 ADDRESSES LISTED WITHIN EACH BLOCK GROUP, THEN find closest addresses to bg centroids within this group # there are some repeats (again this is the closest address to that block group, so it will be roughly accurate - given that there isn't a lot of variability I genuinely don't think it will matter) write.csv(all.md.data.2, "md_three_address_in_block_group.csv") # use this CSV for part 2 !
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.