In rexdouglass/MeasuringLandscape: Replication file for the paper Measuring the Landscape of CivilWar

This file demonstrates that the kinds of locations that are imputed are different from the true locations, in terms of things like population, distance from roads, ruggedness, etc.

rm(list=ls()); gc()
# !diagnostics off
library(MeasuringLandscape)
library(tidyverse)

dir_figures <- glue::glue(getwd(), "/../paper/figures/")

gc()

knitr::opts_knit$set(progress = TRUE, verbose = TRUE)
knitr::opts_chunk$set(fig.width=12, fig.height=8,  warning=FALSE, message=FALSE, cache=TRUE)
options(width = 160)

#Load Events
events_sf <- readRDS(system.file("extdata", "events_sf.Rdata", package = "MeasuringLandscape")) 

events_sf_text_coord_unique <- plyr::ddply(events_sf[,c('location_text','name_clean','name_cleaner','document_district_clean','map_coordinate_clean_latitude','map_coordinate_clean_longitude')],
                                     "location_text", transform,
      map_coordinate_has =sum(!is.na(map_coordinate_clean_latitude))
      )

#Reload from scratch each time in case we subset sometehing weirdly
georef_all_dt <- readRDS(system.file("extdata", "georef_all_dt_recomendations.Rds", package = "MeasuringLandscape")) 

table(events_sf$name_cleaner %in% georef_all_dt$name_cleaner) #All events are in here
table(events_sf$name_cleaner %in% georef_all_dt$name_cleaner[!is.na(georef_all_dt$georef_b)]) #7,742 events with at least one gazeteer suggestion

#Exclude all distance = 0 obs, those are self matches
georef_all_dt <- subset(georef_all_dt, 
                        !is.na(name_cleaner) & # must have a name
                        (is.na(distance_km) | distance_km!=0)  ) #Can be either missing or not zero. Only thing we drop is zero because that's a self match

Create a subset just for those events that have both original coords and a potential match

#Subset to make sure both sides have lat/long
georef_all_dt_covariates <- subset(georef_all_dt, !is.na(X1) & !is.na(X2)) ; dim(georef_all_dt_covariates)

#Create an events dataset for merging on covariates
georef_all_dt_covariates_events <- georef_all_dt_covariates[!duplicated(event_hash)]
georef_all_dt_covariates_events_sf <- georef_all_dt_covariates_events  %>% data.frame() %>% sf::st_as_sf(coords = c("X1","Y1"),  crs = 4326, agr = "constant", remove=F, na.fail =T)
georef_all_dt_covariates_events_sf$imputed <- 0

#Create a gaz dataset for mering on covarates
georef_all_dt_covariates_gaz_sf <- georef_all_dt_covariates[,distance_km_min:=min(distance_km, na.rm=T),
                                                            by=list(event_hash, source_dataset)]
georef_all_dt_covariates_gaz_sf <- georef_all_dt_covariates_gaz_sf[distance_km==distance_km_min]

georef_all_dt_covariates_gaz_sf <- georef_all_dt_covariates_gaz_sf[,head(.SD, 1), by=list(event_hash, source_dataset)] %>% 
                                  data.frame() %>% sf::st_as_sf(coords = c("X2","Y2"),  crs = 4326, agr = "constant", remove=F, na.fail =T)
georef_all_dt_covariates_gaz_sf$imputed <- 1

Load all of the spatial files for the covariates

#covariate_list <- readRDS( '/home/rexdouglass/Dropbox (rex)/Kenya Article Drafts/MeasuringLandscapeCivilWar/inst/extdata/covariate_list.Rds' )
covariate_list <-  MeasuringLandscape:::prep_covariates()

sapply(covariate_list, FUN=function(q) class(q)[1])

Extract the values of covariates found at each location and imputed location

#georef_all_dt_covariates_events_sf$district <- new_over(georef_all_dt_covariates_events_sf , covariate_list[[1]]  , 'name' )
#georef_all_dt_covariates_events_sf$cadastral <- new_over(georef_all_dt_covariates_events_sf , covariate_list[[2]]  , 'name' )
#georef_all_dt_covariates_events_sf$language <- new_over(georef_all_dt_covariates_events_sf , covariate_list[[3]]  , 'LANGUAGE' )
#georef_all_dt_covariates_events_sf$tribe <- new_over(georef_all_dt_covariates_events_sf , covariate_list[[4]]  , 'Tribe' )
georef_all_dt_covariates_events_sf$rain <- MeasuringLandscape:::new_over(georef_all_dt_covariates_events_sf ,
                                                    covariate_list[['raster_rain']]  , 'Tribe' )
georef_all_dt_covariates_events_sf$population <- MeasuringLandscape:::new_over(georef_all_dt_covariates_events_sf ,
                                                          covariate_list[['pop_raster_roi']]  , '' )
georef_all_dt_covariates_events_sf$treecover <- MeasuringLandscape:::new_over(georef_all_dt_covariates_events_sf ,
                                                         covariate_list[['forest_raster_roi']]  , '' )
georef_all_dt_covariates_events_sf$ruggedness <- MeasuringLandscape:::new_over(georef_all_dt_covariates_events_sf ,
                                                          covariate_list[['ruggedness_raster_roi']]  , '' )
georef_all_dt_covariates_events_sf$roads_distance <- MeasuringLandscape:::new_over(georef_all_dt_covariates_events_sf ,
                                                              covariate_list[['roads_distance_to']]  , '' )

#georef_all_dt_covariates_events_sf$landuse <- new_over(georef_all_dt_covariates_events_sf , covariate_list[[9]]  , 'LANDUSE' )
#georef_all_dt_covariates_gaz_sf$district <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[1]]  , 'name' )
#georef_all_dt_covariates_gaz_sf$cadastral <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[2]]  , 'name' )
#georef_all_dt_covariates_gaz_sf$language <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[3]]  , 'LANGUAGE' )
#georef_all_dt_covariates_gaz_sf$tribe <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[4]]  , 'Tribe' )
#georef_all_dt_covariates_gaz_sf$rain <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[5]]  , '' )
#georef_all_dt_covariates_gaz_sf$population <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[6]]  , '' )
#georef_all_dt_covariates_gaz_sf$treecover <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[7]]  , '' )
#georef_all_dt_covariates_gaz_sf$ruggedness <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[8]]  , '' )
#georef_all_dt_covariates_gaz_sf$roads_distance <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[9]]  , '' )
#georef_all_dt_covariates_gaz_sf$landuse <- new_over(georef_all_dt_covariates_gaz_sf , covariate_list[[9]]  , 'LANDUSE' )

Plot the differences of points at real locations versus their imputed counterparts

sentence_case <- function(x) stringr::str_to_sentence(tolower(gsub("_"," ",x)))


#Make that bias in the coef plot
data.table::setkey(georef_all_dt, "rule_ensemble") #sort by ensemble score, pick the best option for each source
georef_all_dt_source <- georef_all_dt[,.SD[1], by=list(event_hash,
                                                       source_dataset) ]

temp <- as.data.frame(georef_all_dt_covariates_events_sf)[,c('source_dataset',
                                                          'population',
                                                          'treecover',
                                                          'ruggedness',
                                                          'roads_distance',
                                                          'rain')
                                                         ] %>% 
                                                           group_by(source_dataset) %>% 
                                                          summarise_all(funs(mean, .args =list(na.rm=T)) )

#I don't think we want to scale this, I think we want to do percentages
#temp[,2:6] <- lapply(temp[,2:6], scale)

#temp[,2:6] <- temp[,2:6] - temp[rep(2,nrow(temp)),2:6] #This demeans each
test <- as.numeric(unlist(as.vector(data.frame(temp[2,]))))
temp <- as.data.frame(temp)
temp[,2] <- temp[,2] / rep(test[2],10) #This demeans each
temp[,3] <- temp[,3] / rep(test[3],10)  #This demeans each
temp[,4] <- temp[,4] / rep(test[4],10)  #This demeans each
temp[,5] <- temp[,5] / rep(test[5],10)  #This demeans each
temp[,6] <- temp[,6] / rep(test[6],10)  #This demeans each

temp <- temp %>% tidyr::gather(variable, value, -source_dataset)

p9 <-  temp %>% 
       mutate(source_dataset=sentence_case(source_dataset),
              variable=sentence_case(variable)
      ) %>%
       ggplot(aes(x = value,
                 y = as.factor(sub("_","",source_dataset)),
                 shape=as.factor(sub("_","",source_dataset)))) + 
                 geom_point() + 
                 facet_grid(.~variable, scales="free") + 
                 geom_vline(xintercept=1) + ylab(sentence_case("Source Dataset")) +
                 xlab(sentence_case("Ratio of Mean Value at Imputed Location Relative to True Location")) +
                 scale_shape_manual(values=1:15) + theme_bw() + theme(legend.position="none") 
p9
ggsave(
  filename = glue::glue(dir_figures, "p_bias_in_covariates_by_source.pdf"),
  plot = p9, width = 16, height = 3
)

rexdouglass/MeasuringLandscape documentation built on May 13, 2019, 6:16 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rexdouglass/MeasuringLandscape
Replication file for the paper Measuring the Landscape of CivilWar

In rexdouglass/MeasuringLandscape: Replication file for the paper Measuring the Landscape of CivilWar

Load all of the spatial files for the covariates

Extract the values of covariates found at each location and imputed location

Plot the differences of points at real locations versus their imputed counterparts

R Package Documentation

Browse R Packages

We want your feedback!

rexdouglass/MeasuringLandscape Replication file for the paper Measuring the Landscape of CivilWar

In rexdouglass/MeasuringLandscape: Replication file for the paper Measuring the Landscape of CivilWar

Load all of the spatial files for the covariates

Extract the values of covariates found at each location and imputed location

Plot the differences of points at real locations versus their imputed counterparts

R Package Documentation

Browse R Packages

We want your feedback!

rexdouglass/MeasuringLandscape
Replication file for the paper Measuring the Landscape of CivilWar