Demonstrate what kinds of events tend to systematically get excluded. Here, in terms of whether the event would have received an original military coordinate or not.
rm(list=ls()); gc() # !diagnostics off library(MeasuringLandscape) library(tidyverse) dir_figures <- glue::glue(getwd(), "/../paper/figures/") gc() knitr::opts_knit$set(progress = TRUE, verbose = TRUE) knitr::opts_chunk$set(fig.width=12, fig.height=8, warning=FALSE, message=FALSE, cache=TRUE) options(width = 160) sentence_case <- function(x) stringr::str_to_sentence(tolower(gsub("_"," ",x)))
#Load Events events_sf <- readRDS(system.file("extdata", "events_sf.Rdata", package = "MeasuringLandscape")) events_sf_text_coord_unique <- plyr::ddply(events_sf[,c('location_text', 'name_clean','name_cleaner','document_district_clean','map_coordinate_clean_latitude','map_coordinate_clean_longitude')], "location_text", transform, map_coordinate_has =sum(!is.na(map_coordinate_clean_latitude)) )
Plot the predicted effects for a single model, Mil. Coords or no Mil. Coords
pred_cords <- MeasuringLandscape:::predict_missingness_dv(is.na(events_sf$map_coordinate_clean_latitude)) rf <- pred_cords$xb_model train <- pred_cords$x_all_pre_dummy label <- pred_cords$label x_all_pre_dummy <- pred_cords$x_all_pre_dummy x_all <- dummies::dummy.data.frame(pred_cords$x_all_pre_dummy, all=T, dummy.classes=c('character','factor','ordered')) dtrain <- xgboost::xgb.DMatrix(data=as.matrix( x_all ), label = label, missing = NA ) #options(na.action='na.pass') #testdata_dummy <- model.matrix(~ . - 1, pred_cords$x_all_pre_dummy) #options(na.action='na.omit') testdata_dummy <- x_all_pre_dummy %>% as.tibble %>% fastDummies::dummy_columns() %>% dplyr::select_if( (is.numeric) ) #testdata_dummy <- dummies::dummy.data.frame(x_all_pre_dummy, drop=F, # dummy.classes=c('character','factor','ordered')) dtest <- xgboost::xgb.DMatrix(data=as.matrix( testdata_dummy ), missing = NA ) dtest <- xgboost::xgb.DMatrix(data=as.matrix( pred_cords$postdummy ), missing = NA )
Importance scores for each variable, predicting the missingness of exact map coordinates as a function of each event's details.
importance_importance <- xgboost::xgb.importance(feature_names=names(testdata_dummy), model = rf) xgboost::xgb.plot.importance(importance_importance)
histogram=T scale=2 a <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing",var="document_date_type",minsize=100, scale=scale,histogram=T) b <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing",var="document_date_best_year",minsize=100, scale=scale,histogram=T) c <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing",var="initiator_clean_1_aggmed",minsize=100, scale=scale,histogram=T) d <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing",var="target_clean_1_aggmed",minsize=100, scale=scale,histogram=T) e <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing",var="type_clean_aggmed",minsize=100, scale=scale,histogram=T) f <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing",var="document_unit_type",minsize=100, scale=scale,histogram=T) g <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing", var="document_district_clean", minsize=100, train=pred_cords$x_all_pre_dummy , scale=scale,histogram=T) h <- MeasuringLandscape:::plot_partial_effects(rf=pred_cords$xb_model, outcome="mapcoordinate_clean_missing",var="event_date_clean_year",minsize=100, scale=scale,histogram=T) #e <- plot_partial_effects(rf=pred_cords$xb_model, # outcome="mapcoordinate_clean_missing",var="locationtext_ruleclean_suffix",minsize=100)
#p_load(cowplot) final_histogram <- cowplot::plot_grid( a+ggtitle(sentence_case('Document Date Type')), b+ggtitle(sentence_case('Document Year')), c+ggtitle(sentence_case('Initiator')), d+ggtitle(sentence_case('Target')), e+ggtitle(sentence_case('Act Type')), f+ggtitle(sentence_case('Document Unit')), g+ggtitle(sentence_case('Document District')), h+ggtitle(sentence_case('Event Year')) + ylab(sentence_case("Probability of Missing Military Coordinates")), #i+ggtitle('Reporting Office'), ncol = 3, align = "hv" ) #,rel_heights=heights) final_histogram ggsave( filename = glue::glue(dir_figures, "rf_mapcoordinate_clean_missing.pdf"), plot = final_histogram, width = 10, height = 8 )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.