library(dplyr)
library(igraph)
library(purrr)
library(magrittr)
library(tidyr)
library(parallel)
library(sf)
library(spdep)
library(geosphere)
library(rmapshaper)
#------------------------------- Load raw data
# The cross section dataset contains information for only those municipalities
# with land distribution data
cross_section_raw <- haven::read_dta("data-raw/Cross_Section_final_RAW_1076.dta")
# The panel dataset contains information for municipalities that we cannot
# use in our models because we lack land distribution data for them
panel_raw <- haven::read_dta("data-raw/panel-raw.dta")
survey_raw <- haven::read_dta("data-raw/survey-raw.dta")
municipalities_and_regions_raw <- readr::read_csv("data-raw/DANE_municipality_codes_and_regions.csv")
#-------------------------------------------------------------------------------
# Shapefile for Colombian municipalities
#    source:   <https://data.humdata.org/dataset/cod-ab-col>
#    filename: "Col Administrative Divisions Shapefiles.zip"
#-------------------------------------------------------------------------------
# Read in shapefile.
CO_shape <- st_read('data-raw/CO-shapefiles/col_admbnda_adm2_mgn_20200416.shp')
#------------------- Merge department/province names
municipalities_and_regions <- municipalities_and_regions_raw %>%
  rename(department = number_dept, municipality = Muni_code) %>%
  mutate(municipality = as.numeric(municipality))
cross_section <- left_join(cross_section_raw, municipalities_and_regions)
panel <- left_join(panel_raw, municipalities_and_regions)
rm(municipalities_and_regions_raw, cross_section_raw, panel_raw,
   municipalities_and_regions)
#------------------------------- Remove two islands: 88001 and 88564
CO_islands <- c('88001', '88564')
survey_raw %>%
  filter(municipality_origin %in% CO_islands) #None in the survey
cross_section %<>% # Assignment pipe!
  filter(!(municipality %in% CO_islands))
panel <- panel %>%
  filter(!(municipality %in% CO_islands))
CO_shape <- CO_shape %>%
  mutate(municipality = as.numeric(stringr::str_remove(ADM2_PCODE, 'CO'))) %>%
  filter(!(municipality %in% CO_islands))
rm(CO_islands)
#-------------------------------- Construct List of Neighbors
# construct list of neighbors: neighbors gives *indices* corresponding to
# rows of CO_shape while neighbor_codes gives *municipality codes*, i.e. the
# correponding values of the column municipality from CO_shape
neighbors <- poly2nb(CO_shape)
neighbor_codes <- lapply(neighbors, function(x) CO_shape$municipality[x])
names(neighbor_codes) <- CO_shape$municipality
#-------------------------------------- Select and rename panel variables
panel %<>% # Assignment pipe!
  filter(year %in% 1996:2008) %>% # Only use panel data from 1996-2008
  select(municipality,
         year,
         V_cum = ac_vcivilparas_UR, # Cumulative paramilitary violence
         V_flow = vcivilparas_UR, # Flow of paramilitary violence
         placeboV_cum = ac_vcivilnotparas_UR, # Cumulative non-paramilitary violence
         placeboV_flow = vcivilnotparas_UR, # Flow of non-paramilitary violence
         D_AS = desplazados_paras_AS, # Four measures of displacement
         D_RUV = desplazados_total_RUV,
         D_CEDE = desplazados_CEDE,
         D_JYP = desplazados_JYP,
         pop1993 = Total_Poblacion1993)
#--------------------------- Compare municipalities in neighbors list vs panel
panel_munis <- panel %>%
  pull(municipality) %>%
  unique()
munis <- as.numeric(stringr::str_remove(CO_shape$ADM2_PCODE, 'CO'))
# All municipalities in panel are also in adjacency matrix
identical(sum(panel_munis %in% munis), length(panel_munis))
#---------------------- Construct average violence in neighboring municipalities
get_V_flow_neighbors <- function(muni_code) {
  V_flow_neighbors <- panel %>%
    filter(municipality %in% neighbor_codes[[paste(muni_code)]]) %>%
    select(municipality, year, V_flow) %>%
    pivot_wider(names_from = municipality, values_from = V_flow) %>%
    select(-year) %>%
    apply(1, mean)
  out <- panel %>%
    filter(municipality == muni_code) %>%
    select(municipality, year)
  # A very small number of municipalities have neighbors, but not neighbors for
  # which we observe violence data, in which case V_flow_neighbors equals numeric(0)
  if(length(V_flow_neighbors) > 0){
    out$V_flow_neighbors <- V_flow_neighbors
  } else {
    out$V_flow_neighbors <- NA_real_
  }
  return(out)
}
V_flow_neighbors <- mclapply(panel_munis, get_V_flow_neighbors, mc.cores = 8)
V_flow_neighbors <- do.call(rbind, V_flow_neighbors)
panel %<>% # Assignment pipe!
  left_join(V_flow_neighbors)
rm(V_flow_neighbors, get_V_flow_neighbors, munis, panel_munis)
#-------------------------------- Clean land distribution characteristics
#------------------------------------------------------------------------------
# The column landless_families in from Cross_section.dta was constructed by
# Camilo's original RA. There are some discrepancies in this column. Our
# procedure for constructing a measure of the number of landless families in a
# municipality from the underlying census data is as follows. We assume, absent
#evidence to the contrary, that there is at most one landowner per family.
#------------------------------------------------------------------------------
# 1. Calculate n_landowners by summing counts in each column "landown_*"
# 2. Set n_families = pmax(num_families, n_landowners)
# 3. Set n_landless = n_families - n_landowners
#------------------------------------------------------------------------------
cross_section %<>% # Assignment pipe!
  rowwise(municipality) %>%
  mutate(n_landowners = sum(c_across(starts_with('landown_')))) %>%
  ungroup() %>%
  rename(n_families_census = num_families) %>%
  mutate(n_families = pmax(n_families_census, n_landowners),
         n_landless = n_families - n_landowners,
         omega_n = n_landless / n_families) %>%
  select(-landless_families)
#---------------------------------------------------------------------------------------
# We observe a discretized land distribution: we know the total amount of land and the
# total number of landholders within each of the following "bins" measured in hectares
#---------------------------------------------------------------------------------------
# 0
# (0,1)
# [1,3)
# [3,5)
# [5,10)
# [10,15)
# [15, 20)
# [20, 50)
# [50,100)
# [100, 200)
# [200, 500)
# [500, 1000)
# >=2000
#---------------------------------------------------------------------------------------
landowner_bins <- cross_section %>% # Number of families in each landholding "bin"
  select(n_landless, starts_with('landown_')) %>%
  relocate(n_landless) %>% # Make this the first column
  rename(`0` = n_landless,
         `(0,1)` = landown_less_1,
         `[1,3)` = landown_1_3,
         `[3,5)` = landown_3_5,
         `[5,10)` = landown_5_10,
         `[10,15)` = landown_10_15,
         `[15,20)` = landown_15_20,
         `[20,50)` = landown_20_50,
         `[50,100)` = landown_50_100,
         `[100,200)` = landown_100_200,
         `[200,500)` = landown_200_500,
         `[500,1000)` = landown_500_1000,
         `[1000,2000)` = landown_1000_2000 ,
         `>=2000` = landown_2000_plus)
landtotal_bins <- cross_section %>% # Total land in each landholding "bin"
  mutate(tot_land_0 = 0) %>% # landless families are those with exactly 0 land
  select(starts_with('tot_land_')) %>%
  relocate(tot_land_0) %>% # Make this the first column
  rename(`0` = tot_land_0,
         `(0,1)` = tot_land_1,
         `[1,3)` = tot_land_2,
         `[3,5)` = tot_land_3,
         `[5,10)` = tot_land_4,
         `[10,15)` = tot_land_5,
         `[15,20)` = tot_land_6,
         `[20,50)` = tot_land_7,
         `[50,100)` = tot_land_8,
         `[100,200)` = tot_land_9,
         `[200,500)` = tot_land_10,
         `[500,1000)` = tot_land_11,
         `[1000,2000)` = tot_land_12,
         `>=2000` = tot_land_13)
# Construct a list of land distributions: one for each municipality
make_land_dist <- function(i) {
  total_land <- landtotal_bins[i,]
  n_families <- landowner_bins[i,]
  out <- data.frame(t(rbind(total_land = landtotal_bins[i,], n_families = landowner_bins[i,])))
  out$mean_land <- ifelse(out$n_families > 0, out$total_land / out$n_families, 0)
  out$frac_families <- out$n_families / sum(out$n_families)
  return(out)
}
land_distributions <- lapply(seq_len(nrow(cross_section)), make_land_dist)
names(land_distributions) <- cross_section$municipality
rm(make_land_dist, landowner_bins, landtotal_bins)
# Function that computes summary statistics of the land distribution for use in
# our LINEAR IV models. The non-linear IV models rely on the entire land distribution
# stored in land_distributions
get_land_statistics <- function(x) {
  z <- x[-1,] # summary stats for *landholders* so remove landless
  z$frac_families <- z$frac_families / sum(z$frac_families)
  c('omega' = x$frac_families[1],
    'H1' = sum(asinh(z$mean_land) * z$frac_families),
    'H2' = sum(asinh(z$mean_land)^2 * z$frac_families),
    n_families = sum(x$n_families))
}
land_statistics <- lapply(land_distributions, get_land_statistics)
land_statistics <- as.data.frame(do.call(rbind, land_statistics))
land_statistics$municipality <- as.numeric(rownames(land_statistics))
rownames(land_statistics) <- NULL
rm(get_land_statistics)
cross_section %<>% # Assignment pipe!
  left_join(y = land_statistics) %>%
  mutate(omegaC = 1 - omega) %>% # It's convenient to have (1 - omega) as a separate variable
  select(-omega_n) # This is identical to omega: share of landless -- don't need both
rm(land_statistics)
# Cumulative violence in the final year of the panel, i.e. total violence, can
# be viewed as a cross-sectional characteristic. Merge this with the other
# cross_section data
merge_me <- panel %>%
  filter(year == max(year)) %>%
  select(municipality, V_cum)
cross_section %<>%
  inner_join(merge_me) %>%
  rename(V_total = V_cum) # Better to call it V_total: total violence from 1996:2008
rm(merge_me)
#------------------------ Flag municipalities w/ land data and >=100 landholders
# The municipalities in cross_section are those for which we have land dist data
identical(sum(!is.na(cross_section$H1)), nrow(cross_section))
cross_section %<>%
  mutate(at_least_100_landowners = n_landowners >= 100)
munis_100_plus <- cross_section %>%
  filter(at_least_100_landowners) %>%
  pull(municipality)
panel %<>%
  mutate(has_land_data = municipality %in% cross_section$municipality,
         at_least_100_landowners = municipality %in% munis_100_plus)
rm(munis_100_plus)
#-------------------------------------Clean Displacement Data
#-------------------------------------------------------------------------------
# Some displacement measures are zero for all municipalities during a given year.
# These are not "true" zeros, but rather missing observations: some agencies did
# not choose to report in a given year:
#-------------------------------------------------------------------------------
# AS      did not record in: 1996, 2012
# CEDE    did not record in: 1996, 2010, 2011, 2012
#-------------------------------------------------------------------------------
# Replace these zeros with NA.
#-------------------------------------------------------------------------------
# Technically not necessary to include 2010:2012 since we've already subsetted
# to remove those years.
panel[panel$year %in% c(1996, 2012),]$D_AS <- NA
panel[panel$year %in% c(1996, 2010:2012),]$D_CEDE <- NA
#-------------------------------------------------------------------------------
# For all measures except JYP, there are some municipalities in which a single
# measure reports zero total displacement over the sample period, while all other
# measures report positive displacement. These "zeros" are really NAs.
#-------------------------------------------------------------------------------
disagreement <- panel %>%
  select(municipality, year, starts_with('D_')) %>%
  group_by(municipality) %>%
  summarise(across(.cols = starts_with('D_'),
                   .fns = sum, na.rm = TRUE)) %>%
  rowwise() %>%
  filter(sum(c_across(starts_with('D_')) == 0) == 1) %>%
  ungroup()
AS_replace <- disagreement %>%
  filter(D_AS == 0) %>%
  pull(municipality)
panel[panel$municipality %in% AS_replace,]$D_AS <- NA
RUV_replace <- disagreement %>%
  filter(D_RUV == 0) %>%
  pull(municipality)
panel[panel$municipality %in% RUV_replace,]$D_RUV <- NA
CEDE_replace <- disagreement %>%
  filter(D_CEDE == 0) %>%
  pull(municipality)
panel[panel$municipality %in% CEDE_replace,]$D_CEDE <- NA
rm(disagreement, AS_replace, RUV_replace, CEDE_replace)
#--------------------------------------- Create lagged violence flow
panel <- panel %>%
  group_by(municipality) %>%
  mutate(lag_V_flow = lag(V_flow, order_by = year)) %>%
  ungroup()
#--------------------------- Re-scale displacement measures
# What share of total RUV and JYP displacement was reported in 1996?
# Average these two shares
share_1996 <- panel %>%
  select(year, D_RUV, D_JYP) %>%
  group_by(year) %>%
  summarize(RUV_sum = sum(D_RUV, na.rm = TRUE),
            JYP_sum = sum(D_JYP, na.rm = TRUE)) %>%
  mutate(RUV_share = RUV_sum / sum(RUV_sum),
         JYP_share = JYP_sum / sum(JYP_sum)) %>%
  filter(year == 1996) %>%
  select(RUV_share, JYP_share) %>%
  rowMeans()
panel %<>%
  mutate(across(c(D_RUV, D_JYP), # Normalize total displacement to 6 million
         list(norm = ~ 6e6 * . / sum(., na.rm = TRUE)))) %>%
  # Because AS and CEDE did not report in 1996 we don't normalize them to 6 million.
  # Instead we normalize them to a *slightly smaller value* constructed using
  # share_1996
  mutate(across(c(D_AS, D_CEDE),
                list(norm = ~ (1 - share_1996) * 6e6 * . / sum(., na.rm = TRUE)))) %>%
  rowwise() %>%
  mutate(D_med = median(c_across(ends_with('_norm')), na.rm = FALSE),
         D_avg = mean(c_across(ends_with('_norm')), na.rm = FALSE),
         D_med_any = median(c_across(ends_with('_norm')), na.rm = TRUE),
         D_avg_any = mean(c_across(ends_with('_norm')), na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(D_avg_no_AS = (D_RUV + D_CEDE + D_JYP) / 3,
         D_avg_no_RUV = (D_AS + D_CEDE + D_JYP) / 3,
         D_avg_no_CEDE = (D_AS + D_RUV + D_JYP) / 3,
         D_avg_no_JYP = (D_AS + D_RUV + D_CEDE) / 3) %>%
  # After taking medians of the normalized displacement variables (sum to 6 million) the result
  # will no longer sum to 6 million. Renormalize so it does
  mutate(across(c(starts_with('D_med'), starts_with('D_avg')), ~ 6e6 * . / sum(., na.rm = TRUE)))
rm(share_1996)
# Covariates ------------------------------------------------------------------
cross_section %<>%
  rename(mines = mine_titles_90, oil = oil_prod_98) %>%
  mutate(mines = if_else(mines > 0, 1, 0),
         drug_routes = if_else(drug_routes > 1, 1, 0),
         oil = if_else(oil > 0, 1, 0),
         bureaucracy = local_bureaucracy_95 + state_bureaucracy_95,
         elec_comp = 0.5 * (ratio_votes_dif_alc + ratio_votes_dif_asa),
         offices = rowSums(across(c(notaries_95,
                                    banks_95,
                                    savings_95,
                                    health_centers_95,
                                    health_posts_95,
                                    schools_95,
                                    libraries_95,
                                    fire_95,
                                    jails_95,
                                    culture_95,
                                    instruments_95,
                                    tax_95,
                                    police_95,
                                    police_posts_95,
                                    courts_95,
                                    telecom_95,
                                    post_95,
                                    agrarian_95,
                                    hospitals_95)))) %>%
  select(-starts_with('tot_land'), -starts_with('landown_'), -gini,
         -convexity_L, -starts_with('owners_'), -ends_with('_95'),
         -starts_with('ratio_votes'), -n_families_census,
         -contains('_signal'), -contains('educ_'), -dum_2001_land)
# Spatial stuff ----------------------------------------------------------------
# Read in ARCGIS-generated list of municipalities intersecting road network
roads <- readr::read_csv('data-raw/real_roads.csv') %>%
  mutate(municipality = stringr::str_remove(ADM2_PCODE, 'CO'),
         municipality = as.numeric(municipality))
# Read and clean forest & elevation data --------------------------------------
forests <- readr::read_csv("data-raw/Forest_Master_50.csv") %>%
  mutate(share_forested = extent_2000_ha / area_ha,
         is_forested = share_forested > 0.5) %>%
  select(ADM2_PCODE, is_forested) %>%
  mutate(municipality = stringr::str_remove(ADM2_PCODE, 'CO'),
         municipality = as.numeric(municipality)) %>%
  select(-ADM2_PCODE)
# Municipality 70523 (Palmito, Sucre) appears twice in forests, once with
# is_forested TRUE and again with is_forested FALSE. There are no forests here!
forests %<>%
  filter(!((municipality == 70523) & (is_forested)))
# Elevation *difference*
elevation <- readr::read_csv("data-raw/elevationsFinal.csv") %>%
  select(-`...1`) %>% # first column is a row number starting from zero
  mutate(municipality = stringr::str_remove(ADM2_PCODE, 'CO'),
         municipality = as.numeric(municipality)) %>%
  select(-ADM2_PCODE) %>%
  arrange(municipality)
# Create dataframe of geographic information ----------------------------------
geography <- cross_section %>%
  select(municipality, ruggedness, slope, elevation) %>%
  left_join(elevation) %>%
  left_join(forests) %>%
  mutate(has_road = if_else(municipality %in% roads$municipality, 1, 0),
         is_forested = 1 * is_forested)
rm(elevation, forests, roads)
centroid_coords <- CO_shape %>%
  st_centroid() %>% # compute lat and lon of centroids
  st_coordinates() # extract lat and lon of centroids as *matrix*
get_neighbor_distances <- function(i) {
  if(all(neighbors[[i]] == 0)) {
    return(NA) # No neighbors? No distances!
  } else {
    own_coords <- centroid_coords[i, ]
    neighbor_coords <- centroid_coords[neighbors[[i]], ]
    distGeo(own_coords, neighbor_coords) / 1000 # convert meters to km
  }
}
neighbor_distances <- lapply(1:length(neighbors), get_neighbor_distances)
get_pairwise_distances <- function(i) {
# Only return neighbors whose index is greater than i: eliminate "repeat" pairs
  i_neighbors <- neighbors[[i]]
  i_neighbor_distances <- neighbor_distances[[i]]
  keep <- i < i_neighbors
  if(any(keep)) {
    data.frame(i, j = i_neighbors[keep], dist = i_neighbor_distances[keep])
  } else {
    # Return NULL if no neighbors (entry in neighbors is zero) or all repeats
    NULL
  }
}
pairwise_distances <- lapply(1:length(neighbors), get_pairwise_distances)
pairwise_distances <- do.call(rbind, pairwise_distances)
pairwise_distances <- tibble(pairwise_distances)
rm(get_neighbor_distances, get_pairwise_distances, centroid_coords)
# Impute missing values --------------------------------------------------------
# There are 41 missing values for the variables elevation_difference and
# is_forested from geography. We impute these with an average of the same
# variables for *neighboring* municipalities. If a variable is missing for all
# neighboring municipalities, we fill in the overall mean of the variable for
# geography.
# Create list of neighboring municipalities with municipality *codes* rather
# than row numbers from CO_shape
muni_codes <- as.numeric(stringr::str_remove(CO_shape$ADM2_PCODE, 'CO'))
neighbors_codes <- lapply(neighbors, function(x) muni_codes[x])
names(neighbors_codes) <- muni_codes
# geography only has data for 1049 municipalities while CO_shape has data for
# 1122. Add the missing municipalities to geography
geography %<>%
  full_join(tibble(municipality = muni_codes))
impute_missing_var <- function(var_name) {
  muni_missing_var <- geography[is.na(geography[, var_name]), 'municipality', drop = TRUE]
  municipality_indices <- which(muni_codes %in% muni_missing_var)
  neighbor_list <- neighbors_codes[municipality_indices]
  f <- function(x) {
    neighbor_geography <- pull(geography[geography$municipality %in% x, var_name])
    mean(neighbor_geography, na.rm = TRUE)
  }
  out <- lapply(neighbor_list, f)
  out <- do.call(rbind, out)
  out <- data.frame(row.names(out), out[,1])
  names(out) <- c('municipality', paste0('impute_', var_name))
  row.names(out) <- NULL
  out$municipality <- as.numeric(out$municipality)
  # If no neighbors, then there's an NaN in the second column. Replace with the
  # overall average
  out[is.na(out[, 2]), 2] <- mean(geography[, var_name, drop = TRUE], na.rm = TRUE)
  return(tibble(out))
}
# Impute missing values for all variables *except* municipality (first column)
# and store the result in a list
imputed_geography <- lapply(names(geography)[-1], impute_missing_var)
imputed_geography %<>%
  reduce(full_join, by = 'municipality')
# Last step of the imputation, following the logic of the comment, but
# for all the imputed variables, not just impute_forest
geography %<>%
  full_join(imputed_geography) %>%
  mutate(ruggedness = if_else(!is.na(ruggedness), ruggedness, impute_ruggedness)) %>%
  mutate(slope = if_else(!is.na(slope), slope, impute_slope)) %>%
  mutate(elevation = if_else(!is.na(elevation), elevation, impute_elevation)) %>%
  mutate(elevation_difference = if_else(!is.na(elevation_difference),
                                       elevation_difference,
                                       impute_elevation_difference)) %>%
  mutate(is_forested =  if_else(!is.na(is_forested), is_forested, impute_is_forested)) %>%
  mutate(has_road = if_else(!is.na(has_road), has_road, impute_has_road)) %>%
  select(-starts_with('impute_'))
rm(impute_missing_var, imputed_geography, muni_codes, neighbors_codes)
cross_section <- cross_section %>%
  full_join(geography)
# Construct dataset of all unique *pairs* of neighboring municipalities --------
# Create geography_i by renaming the columns of geography to have an '_i'
# at the end. Do the same for geography_j. Then merge with pairwise distances.
geography_i <- geography %>%
  rename_with(~ stringr::str_c(., '_i'))
geography_j <- geography %>%
  rename_with(~ stringr::str_c(., '_j'))
pairwise_distances %<>% # Assignment pipe!
  mutate(municipality_i = stringr::str_remove(CO_shape$ADM2_PCODE[i], 'CO'),
         municipality_j = stringr::str_remove(CO_shape$ADM2_PCODE[j], 'CO'),
         municipality_i = as.numeric(municipality_i),
         municipality_j = as.numeric(municipality_j)) %>%
  select(-i, -j) %>%
  left_join(geography_i) %>%
  left_join(geography_j)
rm(geography_i, geography_j, geography)
link_stats <- pairwise_distances %>%
  mutate(forest = is_forested_i + is_forested_j,
         elevation = (elevation_difference_i + elevation_difference_j) / 2,
         ruggedness = (ruggedness_i + ruggedness_j) / 2,
         slope = (slope_i + slope_j) / 2,
         elevation_diff = abs(elevation_i - elevation_j)) %>%
  select(municipality_i, municipality_j, dist, forest, elevation, slope,
         ruggedness, elevation_diff)
pca <- prcomp(~ forest + elevation + ruggedness + slope + elevation_diff,
              data = link_stats, scale = TRUE, na.action = na.omit)
# Extract first PC and "rotate" so that higher values indicate less
# favorable terrain (higher "friction"). Note that this is mean zero
print(pca)
link_stats$PC1_geography <- -1 * pca$x[,1]
rm(pca, pairwise_distances)
# "Effective distance" is defined as:
#    dist * friction
# The friction is constructed from PC1 as follows:
#     z = (PC1 - min(PC1)) / (max(PC1) - min(PC1))
#     friction = 1 + (max_friction - 1) * z
#
# This gives a friction of 1 at the minimum of PC1 and a fraction of max_friction
# at the maximum of PC1. We use four values of max_friction: 1, 2, 2.5, and 3.
# Notice that max_friction = 1 means that PC1 plays no role: this is distance
# "as the crow flies" from one municipality centroid to the next.
link_stats %<>%
  mutate(z = (PC1_geography - min(PC1_geography)) / diff(range(PC1_geography)))
# Dijkstra's algorithm ---------------------------------------------------------
# First construct a graph from our dataframe of link_statistics
munigraph <- graph_from_data_frame(link_stats, directed = FALSE)
# The "Epicenter" of paramilitary violence: Valencia, Cordoba
epicenter <- '23855'
# Unweighted graph, no weights -> return "number of hops" between nodes
hops <- distances(munigraph, v = epicenter) # to argument defaults to V(graph)
get_dist_friction <- function(max_friction) {
  dist <- edge_attr(munigraph, 'dist')
  z <- edge_attr(munigraph, 'z')
  friction <- 1 + (max_friction - 1) * z
  my_weights <- dist * friction
  distances(munigraph, v = epicenter, weights = my_weights)
}
max_friction <- c(1, 2, 2.5, 3)
d_geography <- lapply(max_friction, get_dist_friction)
d_geography <- do.call(rbind, d_geography)
d_geography <- as.data.frame(t(d_geography))
names(d_geography) <- paste0('d_geography', max_friction)
d_geography$municipality <- as.numeric(rownames(d_geography))
dist_to_epicenter <- d_geography
dist_to_epicenter$d_hops <- drop(hops)
dist_to_epicenter <- as_tibble(dist_to_epicenter)
dist_to_epicenter <- dist_to_epicenter %>%
  relocate(municipality) %>%
  rename(d_crow = d_geography1)
# Calculate percentiles of distance measures (except hops)
dist_to_epicenter <- dist_to_epicenter %>%
  mutate(across(c('d_crow', starts_with('d_geography')), ~ (rank(.x) / length(.x)),
                .names = 'p_{.col}'))
rm(get_dist_friction, epicenter, max_friction, d_geography, hops,
   link_stats, neighbor_distances, munigraph)
cross_section <- cross_section %>%
  full_join(dist_to_epicenter)
rm(dist_to_epicenter)
# Simplify CO_shape polygons and remove islands --------------------------------
CO_shape <- ms_simplify(CO_shape) # defaults to retaining 5% of points
#-------------------------------------------------------------------------------
# Create abandoned land dataset.
# Note: There was an intermediate hand-cleaning step that was
# omitted between merging raw digitized with AttributeTableFinal.csv and input
# here to remove duplicates; the abandoned_land_handcleaned.csv file is the
# result of that cleaned merge.
#-------------------------------------------------------------------------------
usethis::use_data(CO_shape, overwrite = TRUE)
usethis::use_data(cross_section, overwrite = TRUE)
usethis::use_data(land_distributions, overwrite = TRUE)
usethis::use_data(neighbors, overwrite = TRUE)
usethis::use_data(neighbor_codes, overwrite = TRUE)
usethis::use_data(panel, overwrite = TRUE)
# clean up
rm(list = ls())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.