knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(dplyr) library(rimr) data("kraj_2000") data("kraj_2004") data("kraj_2008") data("kraj_2012") kraj_2000$abs_votes <- as.integer(gsub("\\s", "", kraj_2000$abs_votes)) kraj_2004$abs_votes <- as.integer(gsub("\\s", "", kraj_2004$abs_votes)) kraj_2000$last_name <- gsub("l'", "ĺ", kraj_2000$last_name) kraj_2004$last_name <- gsub("l'", "ĺ", kraj_2004$last_name) kraj_2004$first_name <- gsub("l'", "ĺ", kraj_2004$first_name) kraj_2012$mandate <- as.integer(kraj_2012$mandate)
First, find similar between election in 2000 and 2004 and between 2004 and 2008. Joining these datasets would omit those candidates who run in 2000 and in 2008 (but did not run in 2004).
k_00_04 <- find_all_similar(source = kraj_2000, target = kraj_2004, eq = c("first_name", "last_name", "region"), eq_tol = list(c("approx_birth_year", 1)), id = "row_id", keep_most_similar = TRUE, verbose = TRUE, cores = 4) k_04_08 <- find_all_similar(source = kraj_2004, target = kraj_2008, eq = c("first_name", "last_name", "region"), eq_tol = list(c("approx_birth_year", 1)), id = "row_id", keep_most_similar = TRUE, verbose = TRUE, cores = 4) k_08_12 <- find_all_similar(source = kraj_2008, target = kraj_2012, eq = c("first_name", "last_name", "region"), eq_tol = list(c("approx_birth_year", 1)), id = "row_id", keep_most_similar = TRUE, verbose = TRUE, cores = 4)
Thus, we have to find data for those who did not run in 2004
using return_missing_data
function.
Also, to make it faster, we should find the data to persons who did run in 2000 but
not in 2004 using find_nonconsecutive_data
.
missing_08 <- find_missing(kraj_2008, "row_id", k_04_08$kraj_2008) missing_data_08 <- return_missing_data(kraj_2008, "row_id", missing_08) noncons_04 <- return_nonconsecutive_data(k_00_04, kraj_2000, kraj_2004, "row_id") # missing_12 <- find_missing(kraj_2012, "row_id", k_08_12$kraj_2012) # missing_data_12 <- return_missing_data(kraj_2012, "row_id", missing_12) # noncons_08 <- return_nonconsecutive_data(k_00_04, kraj_2000, kraj_2004, "row_id")
Then, we compare these two datasets for matching persons.
k_00_08 <- find_all_similar(noncons_04, missing_data_08, eq = c("first_name", "last_name", "region"), eq_tol = list(c("approx_birth_year", 1)), id = "row_id", keep_most_similar = TRUE, verbose = TRUE, cores = 4) colnames(k_00_08) <- c("kraj_2000", "kraj_2008") # panel_00_08 <- create_panel_output(k_00_08) missing_final_08 <- find_missing(kraj_2008, "row_id", c(k_04_08$kraj_2008, k_00_08$kraj_2008)) colnames(missing_final_08) <- c("kraj_2004", "kraj_2008") # k_04_08_2 <- append_missing(target = kraj_2008, # target_ids = "row_id", # out = k_04_08, # found_ids = c(k_04_08$kraj_2008, k_00_08$kraj_2008))
insert_nonconsecutive <- function(out, noncons_pivot, source, target){ last_column <- ncol(noncons_pivot) running <- noncons_pivot[!is.na(noncons_pivot[, last_column]), ] out[out[[source]] %in% running[[source]], target] <- running[[target]] out } out <- dplyr::full_join(k_00_04, k_04_08, by = c("kraj_2004")) out_00_08 <- insert_nonconsecutive(out, k_00_08, "kraj_2000", "kraj_2008") out_00_08_with_missing <- dplyr::bind_rows(out_00_08, missing_final_08) panel_00_08 <- create_panel_output(out_00_08, "person_id") missing_12 <- find_missing(kraj_2012, "row_id", k_08_12$kraj_2012) missing_data_12 <- return_missing_data(kraj_2012, "row_id", missing_12) noncons_12 <- return_nonconsecutive_data(k_04_08, kraj_2004, kraj_2008, "row_id") k_04_12 <- find_all_similar(noncons_12, missing_data_12, eq = c("first_name", "last_name", "region"), eq_tol = list(c("approx_birth_year", 1)), id = "row_id", keep_most_similar = TRUE, verbose = TRUE, cores = 4) colnames(k_04_12) <- c("kraj_2004", "kraj_2012") missing_12_2 <- find_missing(kraj_2012, "row_id", c(k_08_12$kraj_2012, k_04_12$kraj_2012)) missing_data_12_2 <- return_missing_data(kraj_2012, "row_id", missing_12_2) noncons_12_2 <- return_nonconsecutive_data(out_00_08_with_missing, kraj_2000, kraj_2008, "row_id") k_00_12 <- find_all_similar(noncons_12_2, missing_data_12_2, eq = c("first_name", "last_name", "region"), eq_tol = list(c("approx_birth_year", 1)), id = "row_id", keep_most_similar = TRUE, verbose = TRUE, cores = 4) colnames(k_00_12) <- c("kraj_2000", "kraj_2012") out_00_12 <- dplyr::full_join(out_00_08_with_missing, k_08_12, by = "kraj_2008") out_00_12_2 <- insert_nonconsecutive(out_00_12, k_04_12, "kraj_2004", "kraj_2012") out_00_12_3 <- insert_nonconsecutive(out_00_12_2, k_00_12, "kraj_2000", "kraj_2012") final_missing <- find_missing(kraj_2012, "row_id", out_00_12_3$kraj_2012) colnames(final_missing) <- c("kraj_2008", "kraj_2012") out_00_12_final <- dplyr::bind_rows(out_00_12_3, final_missing) panel_00_12 <- create_panel_output(out_00_12_final, "person_id", "data") #noncons_08 <- return_nonconsecutive_data(k_00_04, kraj_2000, kraj_2004, "row_id") #noncons_00 <- return_nonconsecutive_data(k_00_08, kraj_2000, kraj_2008, "row_id")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.