#' Merge a dataset with country and date info with the Gleditsch-Ward system of states
#'
#' @param data
#' @param country_col
#' @param date_col
#'
#' @return a dataset merged with the Gleditsch-Ward system of states \link{gw_system}
#' @export
#'
#' @import lubridate, dplyr
#'
#' @examples
#' library(dplyr)
#' data <- bmr %>% select(bmr_country, bmr_ccode, year, democracy) %>% filter(grepl("YUG|SERBIA",bmr_country))
#' to_gw_system(data, "bmr_country", "year")
to_gw_system <-
function(data, country_col = "country", date_col = "year", code_col = NULL, match_on = "overlap", include_extras = TRUE, match_condition = "date_matches == max(date_matches) & !duplicated(date_matches)") {
if (include_extras) {
col_names <-
c(
"country_name","GWn","GWc","region","continent","GW_startdate","GW_enddate", "microstate","lat","lon"
)
} else {
col_names <- c("country_name","GWn")
}
match_on <-
c("12-31", "1-1", "overlaps","within")[pmatch(match_on, c("31-dec", "1-jan", "overlaps", "within"))]
if (is.na(match_on))
stop("No matching method available")
stopifnot(length(date_col) <= 2, length(date_col) >= 1)
dict <- PoliticalDatasets::data
dict$GW_enddate <-
plyr::mapvalues(dict$GW_enddate,from = NA, to = as.character(round_date(now(),unit = "day")))
dict <- arrange(dict,GWn,country_name,desc(GW_startdate))
destination_data <- data.frame()
data <- as.data.frame(data)
if (length(date_col) == 2) {
if ((is.Date(data[, date_col][1]) |
is.POSIXt(data[, date_col[1]])) &
(is.Date(data[, date_col][2]) |
is.POSIXt(data[, date_col[2]]))) {
startdate <- data[,date_col[1]]
enddate <- data[,date_col[2]]
} else {
startdate <- ymd(data[,date_col[1]])
enddate <- ymd(data[,date_col[2]])
}
} else if (is.Date(data[, date_col]) |
is.POSIXt(data[, date_col])) {
startdate <- data[,date_col]
enddate <- data[,date_col]
} else {
if (match_on == "12-31" | match_on == "1-1") {
start_match <- paste0("-",match_on)
end_match <- paste0("-",match_on)
} else if (match_on == "overlaps") {
start_match <- "-1-1"
end_match <- "-12-31"
} else {
start_match <- "-6-15"
end_match <- "-6-15"
}
startdate <- ymd(paste0(data[,date_col],start_match))
enddate <- ymd(paste0(data[,date_col],end_match))
}
for (i in 1:nrow(dict)) {
country_matches <-
grepl(dict$regex[i], data[, country_col], perl = TRUE, ignore.case = TRUE)
if (!is.null(code_col)) {
code_matches <- data[, code_col] %in% dict$GWn[i]
}
if (any(country_matches)) {
if (match_on == "overlaps") {
date_matches <-
int_overlaps(
new_interval(startdate,enddate),
new_interval(dict$GW_startdate[i],dict$GW_enddate[i])
)
} else if (match_on %in% c("within","12-31","1-1")) {
date_matches <- new_interval(startdate,enddate) %within%
new_interval(dict$GW_startdate[i],dict$GW_enddate[i])
} else {
stop("No match method")
}
temp <-
merge(data[country_matches, c(country_col, date_col)], dict[i, c(col_names, "num_periods", "problem_history")], by = NULL)
temp$country_matches <- country_matches[country_matches]
temp$date_matches <- date_matches[country_matches]
if (!is.null(code_col)) {
temp$code_matches <- code_matches[country_matches]
}
destination_data <- rbind(temp, destination_data)
}
}
destination_data <-
destination_data %>% group_by_(country_col, .dots = date_col) %>% filter_(.dots = match_condition)
data <- left_join(data,destination_data)
data <-
data %>% distinct() %>% group_by_(country_col, .dots = date_col) %>% mutate(num_matches = n(), num_matches = as.numeric(ifelse(
is.na(country_name),0,as.numeric(num_matches)
)))
if (any(is.na(data$country_name))) {
print("The following countries were not matched:")
print(data %>% filter(is.na(country_name)) %>% select_(country_col, date_col))
}
if (any(data$num_matches > 1)) {
print("The following countries were matched more than once:")
print(
data %>% filter(num_matches > 1) %>% select_(.dots = c(country_col, date_col, "country_name", "num_matches"))
)
}
return(data)
}
#' Utility function for counting sequence breaks
#'
#' @param seq
#' @param seq_step
#'
#' @return a vector of periods
#' @export
#'
count_sequence_breaks <- function(seq, seq_step = 1) {
first_diff <- c(seq_step, diff(seq)) - seq_step
periods <- cumsum(abs(first_diff))
periods
}
#' Merging datsets by date intervals
#'
#' @param data1
#' @param data2
#' @param key_col
#' @param startdate1
#' @param enddate1
#' @param startdate2
#' @param enddate2
#' @param group_vars
#'
#' @return A dataset merging \code{data1} and \code{data2} by the \code{key_col}
#' with each row of \code{data1} split according to which part of it falls
#' within the intervals in \code{data2}
#' @export
#'
#' @examples
#' library(dplyr)
#' data1 <- archigos2014 %>% select(country_name,obsid,leader,startdate,enddate) %>% filter(country_name == "Cuba")
#' data2 <- polity_cases %>% select(country_name, polity, polity_startdate, polity_enddate) %>% filter(country_name == "Cuba")
#' results <- merge_by_date_interval(data1, data2, "country_name", "startdate", "enddate", "polity_startdate", "polity_enddate", "obsid")
#' results
merge_by_date_interval <-
function(data1, data2, key_col = "country_name", startdate1, enddate1, startdate2, enddate2, group_vars = NULL) {
data1 <-
rename_(data1, startdate1 = startdate1, enddate1 = enddate1, key_col = key_col)
data2 <-
rename_(data2, startdate2 = startdate2, enddate2 = enddate2, key_col = key_col)
has_common_names <-
any(common <- names(data1) %in% names(data2))
print(names(data1)[common])
key_set <- group_by(data1,key_col)
if (!is.null(group_vars)) {
key_set <- group_by_(data1,group_vars, add = TRUE)
}
key_set <-
do(key_set, data.frame(data2[data2$key_col == .$key_col &
int_overlaps(
new_interval(data2$startdate2,data2$enddate2),
new_interval(.$startdate1,.$enddate1)
),]))
merged_data <- left_join(data1, key_set)
return(merged_data)
}
#' Title
#'
#' @param country
#' @param polity
#' @param coups
#' @param economic_data
#' @param economic_variable
#' @param uds_data
#' @param uds_variable
#' @param interruptions
#' @param conflict_data
#' @param leader_names
#' @param independence
#' @param debug
#' @param gwf
#' @param rescale_range
#' @param bottom_label
#' @param fill_label
#'
#' @return
#' @export
#'
#' @examples
deep_history <- function(country, polity = TRUE, coups = TRUE, economic_data = FALSE, economic_variable = "All", uds_data = TRUE, uds_variable = "Extended", interruptions = TRUE, conflict_data = TRUE, leader_names = TRUE, independence = TRUE, debug = TRUE, gwf = FALSE, rescale_range = c(-10,10), bottom_label = "", fill_label = "Type") {
if(debug) {
message(paste("Starting",country))
}
p <- ggplot()
# Basic Polity score
if(polity) {
if(nrow(polity_cases %>% filter(country_name %in% country)) > 1) {
data <- polity_cases %>% filter(country_name %in% country)
data$polity <- ifelse(data$polity > -11, data$polity, NA)
data$polity <- scales::rescale(data$polity, to = rescale_range)
data2 <- data %>% ungroup() %>% mutate(polity_startdate = polity_enddate)
data <- rbind(data,data2) %>% arrange(country_name,polity_startdate)
rm(data2)
p <- p +
geom_path(data=data,aes(y=polity_startdate,x=polity))
if(debug) {
message("Finished polity")
}
}
}
# Coups
if(coups) {
if(nrow(PowellThyne %>% filter(country_name %in% country)) > 0) {
data <- PowellThyne %>% filter(country_name %in% country)
data$x <- rescale_range[1]
data$xend <- rescale_range[2]
p <- p + geom_segment(data = data,aes(y=date,yend=date,x=x,xend=xend,linetype=reorder(attempt_type,-coup)),alpha=0.5, color = "red") # Coup lines
if(debug) {
message("Finished coups")
}
}
}
# Economic data
if(economic_data) {
if(nrow(economic.data %>% filter(country_name %in% country,!is.na(per_capita))) > 0) {
data <- economic.data %>% filter(country_name %in% country)
if(economic_variable != "All") {
data <- data %>% filter(grepl(economic_variable,variable, ignore.case = TRUE))
stopifnot(nrow(data) > 0)
}
data$value_rescaled_log <- scales::rescale(data$per_capita,to= rescale_range)
data$date <- ymd(paste0(data$year,"-12-31"))
p <- p + geom_path(data = data,aes(y=date,x=value_rescaled_log, color = primary_source, group = variable),alpha=0.5)
# Dollar values at start and end of periods of leaders
if(nrow(data %>% filter(year %in% year(archigos2014$enddate[ archigos2014$country_name %in% country ]))) > 0) {
p <- p + geom_text(data = data %>% filter(year %in% year(archigos2014$enddate[ archigos2014$country_name %in% country ])),aes(y=date,x=value_rescaled_log + 2,label=paste0(primary_source,": ", dollar(per_capita))),alpha=0.4, angle = -90, size = 2, position = "jitter")
}
p <- p + geom_text(data = data %>% group_by(country_name) %>% filter(year == max(year) | year == year(gw_system$startdate[ gw_system$country %in% country])),aes(y=date,x=value_rescaled_log + 2,label=paste0(primary_source,": ", dollar(per_capita))),alpha=0.4, angle = -90, size = 2, position = "jitter")
if(debug) {
message("Finished economic data")
}
}
}
# UDS score
if(uds_data) {
if(nrow(extended_uds %>% filter(country_name %in% country)) > 1) {
extended_uds$mean_rescaled <- rescale(extended_uds$mean, to= rescale_range, from=c(min(extended_uds$pct025),max(extended_uds$pct975)))
extended_uds$pct025_rescaled <- rescale(extended_uds$pct025, to= rescale_range, from=c(min(extended_uds$pct025),max(extended_uds$pct975)))
extended_uds$pct975_rescaled <- rescale(extended_uds$pct975, to= rescale_range, from=c(min(extended_uds$pct025),max(extended_uds$pct975)))
extended_uds$date <- ymd(paste0(extended_uds$year,"-12-31"))
if(uds_variable != "All") {
extended_uds <- extended_uds %>% filter(grepl(uds_variable,variable, ignore.case = TRUE))
stopifnot(nrow(extended_uds) > 0)
}
data <- extended_uds %>% filter(country_name %in% country, variable == "Extended UDS")
p <- p + geom_path(data = data,aes(y=date,x=mean_rescaled),alpha=0.3, color = "red")
positions <- data.frame(x = c(data$pct025_rescaled,data$pct975_rescaled[length(data$pct975_rescaled):1]), y = c(data$date,data$date[length(data$date):1]))
p <- p + geom_polygon(data = positions,aes(x=x,y=y),alpha=0.1, fill = "red")
data <- extended_uds %>% filter(country_name %in% country, variable == "Original UDS")
p <- p + geom_path(data = data,aes(y=date,x=mean_rescaled),alpha=0.3, color = "blue")
positions <- data.frame(x = c(data$pct025_rescaled,data$pct975_rescaled[length(data$pct975_rescaled):1]), y = c(data$date,data$date[length(data$date):1]))
p <- p + geom_polygon(data = positions,aes(x=x,y=y),alpha=0.1, fill = "blue")
if(debug) {
message("Finished uds")
}
}
}
# Polity interregnums and interruptions
if(interruptions) {
interruptions <- polity_cases %>% filter(polity < -10,!is.na(country_name))
interruptions$xmin <- rescale_range[1]
interruptions$xmax <- rescale_range[2]
interruptions <- interruptions %>% filter(country_name %in% country)
if(nrow(interruptions) > 0) {
p <- p + geom_rect(data=interruptions,aes(ymin=polity_startdate,ymax=polity_enddate,xmin=xmin,xmax=xmax),alpha=0.2)
}
if(debug) {
message("Finished interruptions")
}
}
# Conflicts
if(conflict_data) {
if(nrow(ucdpConflict %>% filter(country_name %in% country)) > 0) {
ucdpConflict <- ucdpConflict %>% filter(country_name %in% country)
ucdpConflict$xmin <- rescale_range[1]
ucdpConflict$xmax <- rescale_range[2]
p <- p + geom_rect(data=ucdpConflict,aes(ymin=startdate,ymax=enddate,xmin=xmin,xmax=xmax),fill = "lightgrey", alpha=0.2, color = "grey") + # Wars
geom_text(data=ucdpConflict,aes(y=int_start(int_shift(interval(startdate,enddate),by=duration(int_length(interval(startdate,enddate))/2))),x=(xmin+xmax)/2,label=paste("Conflict: ", SideA,"vs",SideB, ", ",TypeOfConflict),size=IntensityLevel)) + scale_size_discrete(range=c(2,3))
}
if(debug) {
message("Finished conflicts")
}
}
# GWF regime type
if(gwf) {
if(nrow(all_gwf_periods %>% filter(country_name %in% country)) > 0) {
all_gwf_periods <- all_gwf_periods %>% filter(country_name %in% country)
all_gwf_periods$xmin <- rescale_range[1]
all_gwf_periods$xmax <- rescale_range[2]
p <- p + geom_rect(data=all_gwf_periods,aes(ymin=gwf_startdate,ymax=gwf_enddate,xmin=xmin,xmax=xmax,fill=gwf_full_regimetype),alpha=0.2) + # Regime type
geom_text(data=all_gwf_periods,aes(y=int_start(int_shift(interval(gwf_startdate,gwf_enddate),by=duration(int_length(interval(gwf_startdate,gwf_enddate))/2))),x=(xmin+xmax)/2,label=gwf_full_regimetype), size = 2.5)
}
if(debug) {
message("Finished gwf")
}
}
# Leader names and exit types
if(leader_names) {
if(nrow(archigos2014 %>% filter(country_name %in% country)) > 0) {
archigos2014 <- archigos2014 %>% filter(country_name %in% country)
archigos2014$x <- rescale_range[1]
archigos2014$xend <- rescale_range[2]
p <- p + geom_segment(data = archigos2014,aes(y=enddate,yend=enddate,x=x,xend=xend),linetype = 3,alpha=0.5, color = "blue") #Exit lines
p <- p + geom_text(data = archigos2014,aes(y=enddate,x=(x+xend)/2,label=paste0(leader," (",exit, ", ",exitcode,")")),position=position_dodge(width=1), size = 2.5)
}
if(debug) {
message("Finished leaders")
}
}
# System of states data: entry and exit from state system
if(independence) {
country1 <- country
data <- gw_system %>% filter(country %in% country1)
data$x <- rescale_range[1]
data$xend <- rescale_range[2]
p <- p + geom_segment(data=data,aes(y=startdate,yend=startdate,x=x,xend=xend),color="green",linetype=4,size=2,alpha=0.2) +
geom_text(data=data,aes(x=(x+xend)/2,y=startdate,label="Entry into state system/Independence"),alpha=0.2,color="lightblue") +
geom_segment(data=data,aes(y=enddate,yend=enddate,x=x,xend=xend),color="red",linetype=4,size=2,alpha=0.2) +
geom_text(data=data,aes(x=(x+xend)/2,y=enddate,label="End of independence/exit from state system"),alpha=0.2,color="lightblue")
if(debug) {
message("Finished independence")
}
}
# final plotting
if(debug) {
message("Beginning final plotting")
}
country1 <- country
data <- gw_system %>% filter(country %in% country1)
p <- p + labs(y="Year",x=bottom_label,alpha="Mode of leader exit",size="Conflict intensity",fill=fill_label,color="Source for GDP per capita",linetype="Coup?") +
facet_wrap(~country_name) +
coord_cartesian(ylim=c(data$startdate - years(5),ymd(20160101))) +
theme_bw() +
guides(fill=guide_legend(title.position="top",ncol=2),color=guide_legend(title.position="top",ncol=2),alpha=guide_legend(title.position="top",ncol=2),size=guide_legend(title.position="top",ncol=2))+
scale_y_datetime(breaks=c(round_date(archigos2014$enddate[ archigos2014$country_name %in% country ],"year"),round_date(polity_cases$enddate[ polity_cases$country_name %in% country ],"year")), labels=date_format("%Y")) +
theme(legend.position="bottom")
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.