The extended_uds dataset can be generated with the following code:

library(dplyr)
library(QuickUDS)

measures <- c("pmm_arat", "blm", 
              "bmr_democracy_femalesuffrage",
              "bnr_extended", "pmm_bollen", "csvmdi",
              "doorenspleet", "eiu",
              "wgi_democracy","fh_total_reversed", 
              "fh_electoral", "gwf_democracy_extended", 
              "pmm_hadenius", "kailitz_tri",
              "lexical_index", "mainwaring", 
              "magaloni_democracy_extended",
              "pmm_munck", "pacl", "PEPS1v",
              "pitf", "polity2", "reign_democracy", 
              "polyarchy_original_contestation",
              "prc", "svolik_democracy", 
              "ulfelder_democracy_extended",
              "utip_dichotomous_strict", "v2x_polyarchy",
              "vanhanen_democratization", "wth_democ1")

# Generate Extended UDS

extended_model <- democracy_model(measures, 
                                  verbose = FALSE, 
                                  technical = list(NCYCLES = 2500))

extended_scores <- democracy_scores(model = extended_model)

The model takes about 1-2 minutes to converge on my machine, and accounts for more than 90 per cent of the variance in the various democracy indexes used:

extended_model@time

# Number of iterations, log likelihood, etc.
extended_model

# Correlations of latent factor with source variables, variance accounted for, etc.
summary(extended_model)
extended_uds <- extended_scores %>%
  select(-matches("uds_"))

devtools::use_data(extended_uds, overwrite = TRUE)

Cutpoints and Discrimination

The cutpoints and discrimination parameters of these scores can be obtained as follows:

library(ggplot2)

dichotomous <- cutpoints(extended_model) %>% 
  group_by(variable) %>% 
  filter(n()==1) %>% 
  ungroup() %>% 
  summarise(avg_estimate = mean(estimate),
            avg_se = sqrt(sum(se^2)/n() ^ 2 )) %>%
  mutate(avg_pct975 = avg_estimate + 1.96*avg_se,
         avg_pct025 = avg_estimate - 1.96*avg_se)

xmax <- cutpoints(extended_model) %>% 
  pull(variable) %>% 
  unique() %>% 
  length()

xmax <- xmax + 1

cutpoints(extended_model) %>%
  ggplot(aes(x=variable, 
             y = estimate, 
             ymin = pct025,
             ymax=pct975))  + 
  theme_bw() + 
  geom_rect(ymin = dichotomous$avg_pct025, ymax = dichotomous$avg_pct975,
            xmin = 0, xmax = xmax, alpha = 0.2, fill = "grey") + 
  geom_hline(yintercept = dichotomous$avg_estimate, 
             color="black", linetype = 2) + 
  labs(x = "",
       y = "Unified democracy level rater cutoffs",
       title = "Rater cutoffs",
       subtitle = "Grey band represents the average cutoff for dichotomous measures of democracy") + 
  geom_point() + 
  geom_errorbar() + 
  geom_hline(yintercept = 0, 
             color="red")+
  coord_flip()

cutpoints(extended_model, type ="discrimination") %>%
  ggplot(aes(x=reorder(variable, estimate),
           y = estimate, ymin = pct025, ymax = pct975))  + 
  theme_bw() + 
  labs(x="",y="Discrimination parameter for each rater
       \n(higher value means fewer idiosyncratic\nerrors relative to latent score)",
       title = "Discrimination parameters",
       subtitle = "Measures with higher discrimination are less likely \nto make idiosyncratic judgments relative to the rest") + 
  geom_point() + 
  geom_errorbar() + 
  coord_flip()

Score types and scores by country

The democracy_scores function produces three types of scores: the original latent variable (z1), an "adjusted" score where 0 equals the cutpoint of the dichotomous democracy indexes (z1_adj) and a probability score (z1_*_as_prob) that is normalizes the other two to a 0-1 index, and can be interepreted as a kind of probability measure: country-years with scores close to 1 are almost certainly democratic, while country-years with scores close to 0 are almost certainly not, while 0.5 represents the cut-off between democracy and non-democracy.

These extended UD scores are available for r nrow(extended_scores) country-years (r length(unique(extended_scores$extended_country_name)) unique countries and non-sovereign territories):

count_sequence_breaks <- function(seq, seq_step = 1) {
  first_diff <- c(seq_step, diff(seq)) - seq_step
  periods <- cumsum(abs(first_diff))
  periods
}

extended_scores <- extended_scores %>% 
  group_by(extended_country_name) %>% 
  mutate(group = count_sequence_breaks(year), 
         group2 = count_sequence_breaks(in_GW_system, seq_step = 0), 
         group3 = paste(group,group2))

data <- extended_scores

countries <- unique(data$extended_country_name)

periods_of_independence <- data %>% 
  group_by(extended_country_name, group3, in_GW_system) %>% 
  summarise(start = min(year), end = max(year)) %>% 
  filter(in_GW_system)

ggplot() + 
  geom_path(data = data %>% 
              filter(extended_country_name %in% countries[1:round(length(countries)/2)]), 
            aes(x = year, y = z1_adj_as_prob, group = group)) + 
  geom_ribbon(data = data %>% 
                filter(extended_country_name %in% countries[1:round(length(countries)/2)]), 
              aes(x = year, ymin = z1_pct025_adj_as_prob, 
                  ymax = z1_pct975_adj_as_prob, group = group), 
              alpha=0.2) + 
  theme_bw() + 
  labs(x = "Year", 
       y = "Extended unified democracy score,
       transformed to 0-1 probability scale, per year",
       color = "",
       title = "Extended Unified Democracy Scores",
       subtitle = "Probability scale (0-1). Scores for the period before 1800 omitted.")  + 
  theme(legend.position = "bottom") + 
  guides(color = guide_legend(ncol=1),fill = guide_legend(nrow=1)) + 
  facet_wrap(~extended_country_name,ncol=4) +
  geom_hline(yintercept = 0.5, color="red") +
  geom_rect(data = periods_of_independence %>% 
              filter(extended_country_name %in% countries[1:round(length(countries)/2)]), 
            aes(xmin = start, xmax = end, ymin = 0, ymax = 1), 
            alpha = 0.2, fill = "blue") +
  coord_cartesian(xlim = c(1800:2017))
ggplot() + 
  geom_path(data = data %>% 
              filter(extended_country_name %in% countries[(round(length(countries)/2) + 1):length(countries)]), 
            aes(x = year, y = z1_adj_as_prob, group = group)) + 
  geom_ribbon(data = data %>% 
                filter(extended_country_name %in% countries[(round(length(countries)/2) + 1):length(countries)]), 
              aes(x = year, ymin = z1_pct025_adj_as_prob, 
                  ymax = z1_pct975_adj_as_prob, group = group), 
              alpha=0.2) + 
  theme_bw() + 
  labs(x = "Year", 
       y = "Extended unified democracy score,
       transformed to 0-1 probability scale, per year",
       color = "",
       title = "Extended Unified Democracy Scores, 1800-2016",
       subtitle = "Probability scale (0-1); values over 0.5 are likely to be classified as democracies by 
       dichotomous measures.")  + 
  theme(legend.position = "bottom") + 
  guides(color = guide_legend(ncol=1),fill = guide_legend(nrow=1)) + 
  facet_wrap(~extended_country_name,ncol=4) +
  geom_hline(yintercept = 0.5, color="red") +
  geom_rect(data = periods_of_independence %>% 
              filter(extended_country_name %in% countries[(round(length(countries)/2) + 1):length(countries)]), 
            aes(xmin = start, xmax = end, ymin = 0, ymax = 1), 
            alpha = 0.2, fill = "blue") +
  coord_cartesian(xlim = c(1800:2017))

(Grey shaded areas represent 95% confidence intervals; blue shaded areas are periods where the country is either deemed to be a member of the system of states in the Gleditsch and Ward list of state system membership since 1816, i.e., independent, or is a microstate in Gleditsch's tentative list).



xmarquez/QuickUDS documentation built on May 4, 2019, 1:24 p.m.