Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "##"
)
## ----include = FALSE----------------------------------------------------------
devtools::load_all()
df <- small_maverick_event_report %>% dplyr::arrange(event_id)
## ----message = FALSE, warning = FALSE-----------------------------------------
#install.packages("tidyverse")
#install.packages("tinytable")
library(tidyverse)
library(tinytable)
## ----echo = FALSE-------------------------------------------------------------
table <- df %>%
dplyr::select(event_id, city, location, actor1, deaths_best, source) %>%
dplyr::filter(event_id == "CIV-0003") %>%
head()
tt(table)
## -----------------------------------------------------------------------------
#install.packages("eventreport")
## -----------------------------------------------------------------------------
library(eventreport)
## -----------------------------------------------------------------------------
dscore(
df,
group_var = "event_id",
variables = c("country", "actor1", "deaths_best")
) %>%
head(10)
## -----------------------------------------------------------------------------
mean_dscore(
df,
group_var = "event_id",
variables = c("country", "actor1", "deaths_best", "injuries_best")
)
## -----------------------------------------------------------------------------
mean_dscore(
df,
group_var = "event_id",
variables = c("country", "actor1", "deaths_best", "injuries_best"),
normalize = TRUE
)
## ----fig.width=6, fig.height=4, dpi=150, out.width="70%"----------------------
mean_dscore(
df,
group_var = "event_id",
variables = c("country", "actor1", "deaths_best"),
normalize = TRUE,
plot = TRUE
)
## -----------------------------------------------------------------------------
diagnostics <- aggregation_diagnostics(
df,
group_var = "event_id",
variables = c("city", "deaths_best", "actor1")
)
tt(diagnostics)
## -----------------------------------------------------------------------------
calc_mode(c("Sweden", "Sweden", "Denmark", "Sweden"))
## -----------------------------------------------------------------------------
calc_mode(
c("Sweden", "Sweden", "Denmark", "Denmark"),
tie_break = c(1, 1, 1, 1),
second_tie_break = c(1, 4, 1, 1)
)
## -----------------------------------------------------------------------------
calc_mode(
c("Sweden", "Sweden", "Denmark", "Denmark")
)
## -----------------------------------------------------------------------------
calc_mode(
c("Sweden", "", "", "Denmark")
)
## -----------------------------------------------------------------------------
calc_mode_na_ignore(
c("Sweden", "", "", "Denmark"),
tie_break = c(1, 1, 1, 1),
second_tie_break = c(4, 1, 1, 1)
)
## -----------------------------------------------------------------------------
calc_mode_binary(
c(0, 1, 1, 1, 0, 0)
)
## -----------------------------------------------------------------------------
calc_mode_numeric(
c(1, 1, 1, 2, 3, 5)
)
## -----------------------------------------------------------------------------
calc_mode_date(
c("2024-01-01", "2024-01-01", "2024-01-02")
)
## -----------------------------------------------------------------------------
calc_max_precision(
x = c("Tranas", "Smaland", "Sweden"),
precision_var = c(3, 2, 1)
)
## -----------------------------------------------------------------------------
calc_min_precision(
x = c("Tranas", "Smaland", "Sweden"),
precision_var = c(3, 2, 1)
)
## -----------------------------------------------------------------------------
aggregate_strings(
c("Sweden", "Sweden", "Denmark", "", "Finland")
)
## -----------------------------------------------------------------------------
df <- maverick_event_report %>% dplyr::arrange(event_id) %>% utils::head(n = 100)
## -----------------------------------------------------------------------------
df %>%
aggregateData(
group_var = "event_id",
find_mode = "city"
) %>%
utils::head(10)
## -----------------------------------------------------------------------------
df %>%
aggregateData(
group_var = "event_id",
find_mode = c("city", "location", "actor1")
) %>%
utils::head(10)
## -----------------------------------------------------------------------------
df %>%
aggregateData(
group_var = "event_id",
find_mode = c("city", "location"),
find_mode_na_ignore = "actor1",
find_max = "deaths_best",
combine_strings = "source"
) %>%
dplyr::select(event_id:actor1, deaths_best:unit_of_analysis, source) %>%
dplyr::filter(event_id == "CIV-0002")
## -----------------------------------------------------------------------------
df %>%
aggregateData(
group_var = "event_id",
find_mode = c("city", "location"),
find_mode_na_ignore = "actor1",
find_max = "deaths_best",
tie_break = "source_classification",
second_tie_break = "certain"
) %>%
utils::head(10)
## -----------------------------------------------------------------------------
df %>%
aggregateData(
group_var = "event_id",
find_most_precise = list(
list(var = "city", precision_var = "geo_precision"),
list(var = "location", precision_var = "geo_precision")
),
find_mode_na_ignore = "actor1",
find_max = "deaths_best",
tie_break = "source_classification",
second_tie_break = "certain",
) %>%
utils::head(10)
## -----------------------------------------------------------------------------
conservative <- df %>%
aggregateData(
group_var = "event_id",
find_mode = c("city", "location"),
find_min = c("deaths_best", "injuries_best"),
tie_break = "source_classification",
second_tie_break = "certain",
aggregation_name = "Most-conservative"
) %>%
utils::head(10)
maximalist <- df %>%
aggregateData(
group_var = "event_id",
find_mode_na_ignore = c("city", "location"),
find_max = c("deaths_best", "injuries_best"),
tie_break = "source_classification",
second_tie_break = "certain",
aggregation_name = "Most-informative"
) %>%
utils::head(10)
rbind(conservative, maximalist) %>%
dplyr::arrange(event_id)
## -----------------------------------------------------------------------------
# Calculate the average divergence score
mean_dscore(
maverick_event_report,
group_var = "event_id",
variables = c("date_start", "deaths_best")
)
## -----------------------------------------------------------------------------
# Create representative aggregation set
representative <- maverick_event_report %>%
aggregateData(
group_var = "event_id",
find_mode = "country",
find_mode_numeric = "deaths_best",
find_mode_date = "date_start",
tie_break = "source_classification",
second_tie_break = "certain",
aggregation_name = "Representative"
)
# Create informative aggregation set
informative <- maverick_event_report %>%
aggregateData(
group_var = "event_id",
find_mode = "country",
find_max = c("deaths_best", "date_start"),
tie_break = "source_classification",
second_tie_break = "certain",
aggregation_name = "Informative"
)
# Combine dataframes
combined <- rbind(representative, informative)
## -----------------------------------------------------------------------------
# Subset and calculate deaths per week
maverick_time_series_week <- combined %>%
dplyr::filter(number_of_sources > 1) %>%
dplyr::mutate(date_start = as.Date(as.character(date_start), format = "%Y-%m-%d")) %>%
dplyr::mutate(week_start = lubridate::floor_date(date_start, unit = "week")) %>%
tidyr::complete(
week_start = seq(ymd("1995-01-01"), ymd("2023-12-31"), by = "1 week"),
country, aggregation, fill = list(deaths_best = 0)
) %>%
dplyr::group_by(week_start, country, aggregation) %>%
dplyr::summarize(deaths_best = sum(deaths_best, na.rm = TRUE), .groups = "drop")
## ----fig.width=7, fig.height=4, dpi=150, out.width="70%"----------------------
maverick_time_series_week %>%
dplyr::filter(
week_start > "2010-09-30"
& week_start < "2011-06-01"
& country == "Ivory Coast"
) %>%
ggplot2::ggplot() +
ggplot2::geom_line(aes(y = deaths_best, x = week_start, color = aggregation), linewidth = 1) +
ggplot2::scale_x_date(
breaks = seq(as.Date("2010-10-01"), as.Date("2011-06-01"), by = "1 month"),
date_labels = "%b %Y"
) +
ggplot2::labs(
x = NULL,
y = "Best estimated number of weekly deaths"
) +
ggplot2::theme_bw()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.