#' @title Nationalization
#'
#' @author Simon Page
#'
#' @description This function creates and populates datasets of party nationalization measures,
#' using CLEA data with output at the national-level, party-level, and the
#' constituency-level.
#'
#' @param dataSource Character string of the location of the CLEA data to be used for nationalization measure generation.
#' @param dataFormat Format of the data file being read. Acceptable formats are the following: ".csv",".xlsx",".rdata", or ".dta"
#' @param outputFolder Character string of the location where the user desires the output files to be written on their local machine. Note that four files will be generated at the
#' end of this script (i.e., party-level, national-level, constituency-level, and Gini inequality measures).
#' @param inequalityType Optional argument, which defaults to "Gini" to compute Gini measures of inequality. Other options are available (i.e., "RS","Atkinson","Theil","Kolm",
#' "var","square.var","entropy"), but "Gini" is highly recommended.
#' @param CandidateOrPartyBased Optional argument, which defaults to "party.based"; this argument accepts only "party.based" or "candidate.based", which allows the user to proxy
#' candidate votes/candidate shares for party votes/party shares. This is indended for the advanced user, and "party.based" is highly recomended for accurate measures.
#' @param filterSmallParties Optional argument, which defaults to `TRUE`. Generally, this package filters out parties that did not achieve at least five percent (`5%`) of the
#' national vote before computing measures of nationalization. However, the user can opt to maintain all parties in the output.
#' @return
#' @export
#'
#' @import data.table
#' @import haven
#' @import readxl
#' @import ineq
#' @import openxlsx
nationalization <- function(dataSource,
dataFormat,
outputFolder,
inequalityType = "Gini",
CandidateOrPartyBased = "party.based",
filterSmallParties = FALSE) {
# Initial message & starting efficiency timer
initial.message <- function() {
print("---------------------------------------------------", quote = FALSE)
print("Initializing... Checking Packages & Data Existence", quote = FALSE)
print("---------------------------------------------------", quote = FALSE)
}
initial.message()
start.timer <- proc.time()
#-------------------------------------------#
# Loading/Installing Packages; Loading Data #
#-------------------------------------------#
# Function to load packages/install if they do not exist on local machine
# ONLY relevant if running the script elsewhere than GitHub package; commented out
# usePackage <- function(required.package) {
# if (!is.element(required.package, installed.packages()[,1]))
# install.packages(required.package, dep = TRUE)
# require(required.package, character.only = TRUE)
# }
#
# usePackage("data.table") # For processing functions (more efficient than base R)
# usePackage("haven") # For reading Stata
# usePackage("readxl") # For reading Excel
# usePackage("ineq") # For Gini computation
# usePackage("openxlsx") # To write to .xlsx (desired output)
# Checking data file format, then loading respective data
ifelse(dataFormat == ".csv", dat <- read.csv(dataSource),
ifelse(dataFormat == ".xlsx", dat <- read_xlsx(dataSource),
ifelse(dataFormat == ".rdata", assign("dat",get(load(dataSource))),
ifelse(dataFormat == ".dta", dat <- read_dta(dataSource),{
print("No appropriate data format selected.", quote = FALSE); stop()
}))))
# Making sure inequality index is "Gini" if not specified; enabling other inequality options as well
if (inequalityType != "Gini" & inequalityType %in% c("Gini", "RS", "Atkinson", "Theil", "Kolm",
"var", "square.var", "entropy") == FALSE) {
# Coerce to Gini if the argument is present, but not in the pre-approved list
inequalityType <- "Gini"
}
#----------------------#
# Subset Data by Needs #
#----------------------#
# Selecting crucial columns as a base
base <- subset(dat, select = c(id,ctr_n,ctr,yr,mn,cst,cst_n,pty,pty_n,vv1,pv1,pvs1,cv1,cvs1,tier,seat))
# Coercing argument to "party.based" if the selection is not within pre-approved possibilities
if ((CandidateOrPartyBased != "party.based" |
CandidateOrPartyBased != "candidate.based" |
CandidateOrPartyBased != "shares.based")) {
# Coerce to "party.based" if the argument is present, but not in the pre-approved options
CandidateOrPartyBased <- "party.based"
}
# Creating the base for gini and party-level computations called data.a,
#... based on user preference of party, candidate, or shares based measures
if (CandidateOrPartyBased == "party.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.a <- subset(base,select = -c(cv1,cvs1))
data.a$can <- 0
data.a$vv1 <- ifelse(data.a$vv1 == -990 | data.a$vv1 == -992 | data.a$vv1 == -994, NA, data.a$vv1)
data.a$pv1 <- ifelse(data.a$pv1 == -990 | data.a$pv1 == -992 | data.a$pv1 == -994, NA, data.a$pv1)
data.a$pvs1 <- ifelse(data.a$pvs1 == -990 | data.a$pvs1 == -992 | data.a$pvs1 == -994, NA, data.a$pvs1)
data.a$na_candidate <- ifelse(is.na(data.a$vv1) & is.na(data.a$pv1),1,0)
data.a <- unique(data.a)
# Computing shares on our own, once, to avoid broken shares in data
data.a$pvs1 <- data.a$pv1 / data.a$vv1
data.a <- data.a[order(data.a$cst), , drop = FALSE]
data.a <- data.a[order(data.a$ctr_n,data.a$yr),]
# Renaming to generic totals/shares, then informing user of success/moving along with party-based
names(data.a)[names(data.a) == "pv1"] <- "vote.totals"
names(data.a)[names(data.a) == "pvs1"] <- "vote.shares"
print("Sucessfully created subset for Gini & party-level measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "candidate.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.a <- subset(base,select = -c(pv1,pvs1))
data.a$vv1 <- ifelse(data.a$vv1 == -990 | data.a$vv1 == -992 | data.a$vv1 == -994, NA, data.a$vv1)
data.a$cv1 <- ifelse(data.a$cv1 == -990 | data.a$cv1 == -992 | data.a$cv1 == -994, NA, data.a$cv1)
data.a$cvs1 <- ifelse(data.a$cvs1 == -990 |data.a$cvs1 == -992 | data.a$cvs1 == -994, NA, data.a$cvs1)
data.a$na_candidate <- ifelse(is.na(data.a$vv1) & is.na(data.a$cv1),1,0)
data.a <- unique(data.a)
# Renaming to generic totals/shares, then informing user of success/moving along with candidate-based
names(data.a)[names(data.a) == "cv1"] <- "vote.totals"
names(data.a)[names(data.a) == "cvs1"] <- "vote.shares"
print("Sucessfully created subset for Gini & party-level measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "shares.based") {
print("IN DEVELOPMENT"); stop()
}
)
)
# Creating the base for national-level ENP AND constituency-level computations called data.b,
#... based on user preference of party, candidate, or shares based measures
if (CandidateOrPartyBased == "party.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.b <- subset(base, select = -c(cv1,cvs1))
data.b$can <- 0
data.b$vv1 <- ifelse(data.b$vv1 == -990 | data.b$vv1 == -992 | data.b$vv1 == -994, NA, data.b$vv1)
data.b$pv1 <- ifelse(data.b$pv1 == -990 | data.b$pv1 == -992 | data.b$pv1 == -994 | data.b$pv1 == 0, NA, data.b$pv1) # Note removal of zero here
data.b$pvs1 <- ifelse(data.b$pvs1 == -990 | data.b$pvs1 == -992 | data.b$pvs1 == -994, NA, data.b$pvs1)
data.b$na_candidate <- ifelse(is.na(data.b$vv1) | is.na(data.b$pv1),1,0)
# Computing unique constituency totals BEFORE filtering out NA candidates
data.b <- as.data.frame(data.b)
data.b <- data.table(data.b)[,cst_tot := length(unique(cst)), by=c("ctr_n","ctr","yr","mn")]
# Grabbing unique cases across entire frame, then filtering non-existent candidates
data.b <- unique(data.b)
data.b <- subset(data.b, na_candidate == 0)
# Renaming to generic totals/shares, then informing the user of success/moving along with party-based
names(data.b)[names(data.b) == "pv1"] <- "vote.totals"
names(data.b)[names(data.b) == "pvs1"] <- "vote.shares"
print("Sucessfully created subset for national-level ENP measures...", quote = FALSE)
print("Sucessfully created subset for constituency-level measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "candidate.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.b <- subset(base, select = -c(pv1,pvs1))
data.b$can <- 0
data.b$vv1 <- ifelse(data.b$vv1 == -990 | data.b$vv1 == -992 | data.b$vv1 == -994, NA, data.b$vv1)
data.b$pv1 <- ifelse(data.b$cv1 == -990 | data.b$cv1 == -992 | data.b$cv1 == -994 | data.b$cv1 == 0, NA, data.b$pv1) # Note removal of zero here
data.b$pvs1 <- ifelse(data.b$cvs1 == -990 | data.b$cvs1 == -992 | data.b$cvs1 == -994 | data.b$cvs1 == 0, NA, data.b$pvs1)
data.b$na_candidate <- ifelse(is.na(data.b$vv1) | is.na(data.b$cv1),1,0) # Note the OR here
# Comptuing unique constituency totals BEFORE filtering out NA candidates
data.b <- as.data.frame(data.b)
data.b <- data.table(data.b)[,cst_tot := length(unique(cst)), by=c("ctr_n","ctr","yr","mn")]
# Grabbing unique cases across entire frame, then filtering non-existent candidates
data.b <- unique(data.b)
data.b <- subset(data.b, na_candidate == 0)
# Renaming to generic totals/shares, then informing the user of success/moving along with party-based
names(data.b)[names(data.b) == "cv1"] <- "vote.totals"
names(data.b)[names(data.b) == "cvs1"] <- "vote.shares"
print("Sucessfully created subset for national-level ENP measures...", quote = FALSE)
print("Sucessfully created subset for constituency-level measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "shares.based") {
print("IN DEVELOPMENT");stop()
}
)
)
# Creating the base for national-level PSNS computations called data.c,
#... based on user preference of party, candidate, or shares based measures
if (CandidateOrPartyBased == "party.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.c <- subset(base, select = -c(cv1,cvs1))
data.c$can <- 0
data.c$vv1 <- ifelse(data.c$vv1 == -990 | data.c$vv1 == -992 | data.c$vv1 == -994, NA, data.c$vv1)
data.c$pv1 <- ifelse(data.c$pv1 == -990 | data.c$pv1 == -992 | data.c$pv1 == -994 | data.c$pv1 == 0, NA, data.c$pv1)
data.c$pvs1 <- ifelse(data.c$pvs1 == -990 | data.c$pvs1 == -992 | data.c$pvs1 == -994 | data.c$pvs1 == 0, NA, data.c$pvs1)
data.c$na_candidate <- ifelse(is.na(data.c$vv1) & is.na(data.c$pv1),1,0)
# Computing unique constituency totals BEFORE filtering out NA candidates
data.c <- as.data.frame(data.c)
data.c <- data.table(data.c)[,cst_tot := length(unique(cst)), by=c("ctr_n","ctr","yr","mn")]
# Grabbing unique cases across entire frame, then filtering non-existent candidates
data.c <- unique(data.c)
data.c <- subset(data.c, na_candidate == 0)
# Renaming to generic totals/shares, then informing the user of success/moving along with party-based
names(data.c)[names(data.c) == "pv1"] <- "vote.totals"
names(data.c)[names(data.c) == "pvs1"] <- "vote.shares"
print("Sucessfully created subset for national-level inflation/PSNS measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "candidate.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.c <- subset(base, select = -c(pv1,pvs1))
data.c$can <- 0
data.c$vv1 <- ifelse(data.c$vv1 == -990 | data.c$vv1 == -992 | data.c$vv1 == -994, NA, data.c$vv1)
data.c$pv1 <- ifelse(data.c$cv1 == -990 | data.c$cv1 == -992 | data.c$cv1 == -994 |data.c$cv1 == 0, NA, data.c$cv1)
data.c$pvs1 <- ifelse(data.c$cvs1 == -990 | data.c$cvs1 == -992 | data.c$cvs1 == -994 |data.c$cvs1 == 0, NA, data.c$cvs1)
data.c$na_candidate <- ifelse(is.na(data.c$vv1) & is.na(data.c$cv1),1,0)
# Computing unique constituency totals BEFORE filtering out NA candidates
data.c <- as.data.frame(data.c)
data.c <- data.table(data.c)[,cst_tot := length(unique(cst)), by=c("ctr_n","ctr","yr","mn")]
# Grabbing unique cases across entire frame, then filtering non-existent candidates
data.c <- unique(data.c)
data.c <- subset(data.c, na_candidate == 0)
# Renaming to generic totals/shares, then informing the user of success/moving along with party-based
names(data.c)[names(data.c) == "cv1"] <- "vote.totals"
names(data.c)[names(data.c) == "cvs1"] <- "vote.shares"
print("Sucessfully created subset for national-level inflation/PSNS measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "shares.based") {
print("IN DEVELOPMENT");stop()
}
)
)
# Creating a base for national-level local_E computations called data.d,
#... based on user preference of party, candidate, or shares based measures
if (CandidateOrPartyBased == "party.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.d <- subset(base, select = -c(cv1,cvs1))
data.d$can <- 0
data.d$vv1 <- ifelse(data.d$vv1 == -990 | data.d$vv1 == -992 | data.d$vv1 == -994, NA, data.d$vv1)
data.d$pv1 <- ifelse(data.d$pv1 == -990 | data.d$pv1 == -992 | data.d$pv1 == -994 | data.d$pv1 == 0, NA, data.d$pv1)
data.d$pvs1 <- ifelse(data.d$pvs1 == -990 | data.d$pvs1 == -992 | data.d$pvs1 == -994 | data.d$pvs1 == 0, NA, data.d$pvs1)
data.d$seat <- ifelse(data.d$seat == -990 | data.d$seat == -992 | data.d$seat == -994 | data.d$seat == 0, NA, data.d$seat)
data.d$na_candidate <- ifelse(is.na(data.d$vv1) & is.na(data.d$pv1) & is.na(data.d$seat),1,0)
# Computing unique constituency totals BEFORE filtering out NA candidates
data.d <- as.data.frame(data.d)
data.d <- data.table(data.d)[,cst_tot := length(unique(cst)), by=c("ctr_n","ctr","yr","mn")]
# Grabbing unique cases across entire frame, then filtering non-existent candidates
data.d <- unique(data.d)
data.d <- subset(data.d, na_candidate == 0)
# Renaming to generic totals/shares, then informing the user of success/moving along with party-based
names(data.d)[names(data.d) == "pv1"] <- "vote.totals"
names(data.d)[names(data.d) == "pvs1"] <- "vote.shares"
print("Sucessfully created subset for national-level local_E measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "candidate.based") {
# Removing -990s for NAs, indicating absent candidates, etc...
data.d <- subset(base, select = -c(pv1,pvs1))
data.d$can <- 0
data.d$vv1 <- ifelse(data.d$vv1 == -990 | data.d$vv1 == -992 | data.d$vv1 == -994, NA, data.d$vv1)
data.d$pv1 <- ifelse(data.d$cv1 == -990 | data.d$cv1 == -992 | data.d$cv1 == -994 | data.d$cv1 == 0, NA, data.d$cv1)
data.d$pvs1 <- ifelse(data.d$cvs1 == -990 | data.d$cvs1 == -992 | data.d$cvs1 == -994 | data.d$cvs1 == 0, NA, data.d$cvs1)
data.d$seat <- ifelse(data.d$seat == -990 | data.d$seat == -992 | data.d$seat == -994 | data.d$seat == 0, NA, data.d$seat)
data.d$na_candidate <- ifelse(is.na(data.d$vv1) & is.na(data.d$cv1) & is.na(data.d$seat),1,0)
# Computing unique constituency totals BEFORE filtering out NA candidates
data.d <- as.data.frame(data.d)
data.d <- data.table(data.d)[,cst_tot := length(unique(cst)), by=c("ctr_n","ctr","yr","mn")]
# Grabbing unique cases across entire frame, then filtering non-existent candidates
data.d <- unique(data.d)
data.d <- subset(data.d, na_candidate == 0)
# Renaming generic totals/shares, then informing the user of success/moving along with party-based
names(data.d)[names(data.d) == "cv1"] <- "vote.totals"
names(data.d)[names(data.d) == "cvs1"] <- "vote.shares"
print("Sucessfully created subset for national-level local_E measures...", quote = FALSE)
} else (
if (CandidateOrPartyBased == "shares.based") {
print("IN DEVELOPMENT");stop()
}
)
)
#--------------------------#
# Gini Inequality Measures #
#--------------------------#
ineq.message <- function() {
print("--------------------------------", quote=FALSE)
print("Computing inequality measures...", quote=FALSE)
print("--------------------------------", quote=FALSE)
}
ineq.message()
# Grabbing distinct cases, then renaming to "gini." plus the variable/data being created
gini.unique_rows <- !duplicated(data.a[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
gini.base <- data.a[gini.unique_rows,]
# Grabbing valid votes at the national-level to filter small parties (less than 5% of the national vote)
gini.totals <- data.table(gini.base)[,vote.totals := sum(vote.totals, na.rm = TRUE), by=c("id","ctr_n","ctr","yr","mn","cst","pty","tier")]
gini.vv1 <- data.table(gini.totals)[,vv1 := sum(vv1, na.rm = TRUE),by=c("id","ctr_n","ctr","yr","mn","cst","pty","tier")]
gini.nat_vv1 <- data.table(gini.vv1)[,nat_vv1 := sum(vote.totals, na.rm = TRUE), by=c("ctr_n","ctr","yr","mn","tier")]
gini.pty_nat_vv1 <- data.table(gini.nat_vv1)[,pty_nat_vv1 := sum(vote.totals),by=c("ctr_n","ctr","yr","mn","pty","tier")]
gini.pty_nat_vv1$nat_pvs <- gini.pty_nat_vv1$pty_nat_vv1 / gini.pty_nat_vv1$nat_vv1
# Filtering small parties that do not meet the aforementioned threshold, based on user preference
if (filterSmallParties == TRUE) {
# Variant 1: Argument is specified and filter out small
gini.small_removed <- subset(gini.pty_nat_vv1,nat_pvs > 0.05)
} else (
if (filterSmallParties == FALSE) {
# Variant 2: Argument is specified and NO filtering small parties
gini.small_removed <- gini.pty_nat_vv1
}
)
# Recomputing vote shares ONCE to avoid possible broken shares; replacing NaN/NA/infinite with zero
gini.small_removed$vote.shares <- gini.small_removed$vote.totals / gini.small_removed$vv1
gini.small_removed$vote.shares <- ifelse(is.infinite(gini.small_removed$vote.shares), NA, gini.small_removed$vote.shares)
gini.small_removed$vote.shares <- ifelse(is.na(gini.small_removed$vote.shares), 0, gini.small_removed$vote.shares)
# Computing Gini inequality (note, computed by tier in multi-PR systems), then removing NA/NaN
gini.ineq <- setDT(gini.small_removed)[,giniI := ineq(vote.shares, NULL,type = inequalityType, na.rm = TRUE),by=c("ctr_n","ctr","yr","mn","pty","tier")]
gini.ineq$giniI <- replace(gini.ineq$giniI, gini.ineq$giniI < 0, 0)
gini.ineq$giniI <- replace(gini.ineq$giniI, gini.ineq$giniI == "NaN", 0)
# Cleaning the measures up, ordering, then shipping!
# Note that "full." plus anything will be the name of final products being written to .xlsx
full.gini <- subset(gini.ineq, select = c(id,ctr_n,ctr,yr,mn,tier,cst,pty,giniI,vote.totals,vv1))
names(full.gini)[names(full.gini) == "vote.totals"] <- ifelse(CandidateOrPartyBased == "party.based","pv1",
ifelse(CandidateOrPartyBased == "candidate.based","cv1",
ifelse(CandidateOrPartyBased == "shares.based","shares",NA)))
full.gini <- full.gini[order(full.gini$cst), , drop = FALSE]
full.gini <- full.gini[order(full.gini$ctr_n,full.gini$yr,full.gini$tier),]
# Writing Gini to .xlsx; informing of success
write.xlsx(full.gini, paste0(outputFolder, "gini.values.xlsx"),
keepNA = TRUE) # `keepNA` = TRUE maintains NAs in output file
print("Gini inequality measures successfully computed! Moving to party-level...", quote = FALSE)
# Removing anything starting with "gini." from the local environment for memory efficiency
rm(list = ls()[grep("^gini", ls())])
#--------------------------------------#
# Party-Level Nationalization Measures #
#--------------------------------------#
party.message <- function() {
print("---------------------------------", quote=FALSE)
print("Computing Party Level Dataset...", quote=FALSE)
print("---------------------------------", quote=FALSE)
}
party.message()
# Grabbing distinct cases, using same data.a (computed above in subset section); then,
# renaming to "pty." plus the variable/data being created
pty.unique_rows <- !duplicated(data.a[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
pty.base <- data.a[pty.unique_rows,]
# Merging in gini, then using it to compute vanilla PNS
pty.gini <- merge(pty.base, full.gini, by = c("id","ctr_n","ctr","yr","mn","cst","pty","vv1","tier"),all.x = TRUE)
pty.gini$PNS <- 1 - pty.gini$giniI
# Computing unique constituency length AND standardized PNS (PNS_s)
pty.cst_tot <- data.table(pty.gini)[,cst_tot := length(unique(cst)), by=c("ctr_n","ctr","yr","mn")]
pty.cst_tot$PNS_s <- pty.cst_tot$PNS^(1/(log(pty.cst_tot$cst_tot)))
# Replacing PNS_s with NA if it equals zero
pty.cst_tot$PNS_s <- ifelse(pty.cst_tot$PNS_s == 0, NA, pty.cst_tot$PNS_s)
# Computing prerequisites for weighted PNS (PNS_w)
# Grabbing distinct cases
pty.cst_tot <- as.data.frame(pty.cst_tot) # Need to reassign as data.frame type for !duplicated() to work
pty.cst_tot_unique_rows <- !duplicated(pty.cst_tot[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
pty.unique_cst_tot <- pty.cst_tot[pty.cst_tot_unique_rows,]
# Grabbing nationl-level vv1 and party vv1 (nat_vv1, pty_vv1)
pty.nat_vv1 <- data.table(pty.unique_cst_tot)[,nat_vv1 := sum(vote.totals, na.rm = TRUE), by=c("ctr_n","ctr","yr","mn","tier")]
pty.pty_vv1 <- data.table(pty.nat_vv1)[,pty_vv1 := sum(pv1, na.rm = TRUE), by=c("ctr_n","ctr","yr","mn","pty","tier")]
pty.national <- pty.pty_vv1
# PNS_w denominator
pty.national$denominator <- pty.national$nat_vv1 * pty.national$pty_vv1
# Ordering by party and vote shares (descending) to compute p_j's cumulative sum
pty.national$cst_vv1 <- pty.national$vv1
pty.ordered <- pty.national[order(pty.national$id,pty.national$ctr_n,pty.national$ctr,pty.national$yr,
pty.national$mn,pty.national$pty,pty.national$vote.shares),,drop = FALSE]
pty.p_j <- data.table(pty.ordered)[,p_j := cumsum(vote.totals), by=c("ctr_n","ctr","yr","mn","pty","tier")]
# Computing the "inside" of the numerator's sum, then computing PNS_w numerator
pty.inside <- data.table(pty.p_j)[,inside := cst_vv1 * (p_j - (vote.totals / 2)), by=c("ctr_n","ctr","yr","mn","pty","tier")]
pty.numerator <- data.table(pty.inside)[,numerator := sum(inside, na.rm=TRUE),by=c("ctr_n","ctr","yr","mn","pty","tier")]
# Computing PNS_w at long last! Replacing infinite/NaN with NA
pty.pns_w <- data.table(pty.numerator)[,PNS_w := ((2 * numerator)/ denominator),by=c("ctr_n","ctr","yr","mn","pty","tier")]
pty.pns_w$PNS_w[is.infinite(pty.pns_w$PNS_w)] <- NA
pty.pns_w$PNS_w[is.nan(pty.pns_w$PNS_w)] <- NA
# Computing PNS_w AGAIN, but using vote.totals as a proxy for replacement
pty.pns_w$cst_vv1_new <- pty.pns_w$vote.totals
pty.pj_new <- data.table(pty.pns_w)[,p_j_new := cumsum(vote.totals), by=c("ctr_n","ctr","yr","mn","pty","tier")]
pty.inside_new <- data.table(pty.pj_new)[,inside_new := cst_vv1_new * (p_j_new - (vote.totals / 2)), by=c("ctr_n","ctr","yr","mn","pty","tier")]
pty.numerator_new <- data.table(pty.inside_new)[,numerator_new := sum(inside_new, na.rm=TRUE),by=c("ctr_n","ctr","yr","mn","pty","tier")]
pty.pns_w_new <- data.table(pty.numerator_new)[,PNS_w_new := ((2 * numerator_new) / denominator), by=c("ctr_n","ctr","yr","mn","pty","tier")]
pty.pns_w_new$PNS_w[is.infinite(pty.pns_w_new$PNS_w)] <- NA
pty.pns_w_new$PNS_w[is.nan(pty.pns_w_new$PNS_w)] <- NA
# Replacing inappropriate PNS_w under given conditions (see below...)
pty.pns_w_new$alt_vv1 <- 0
pty.pns_w_new$alt_vv1 <- replace(pty.pns_w_new$alt_vv1, pty.pns_w_new$PNS_w >= 1, 1)
pty.pns_w_new$PNS_w <- ifelse(pty.pns_w_new$PNS_w >= 1, pty.pns_w_new$PNS_w_new, pty.pns_w_new$PNS_w)
pty.pns_w_new$PNS_w <- ifelse(pty.pns_w_new$PNS_w == 0, NA, pty.pns_w_new$PNS_w)
pty.pns_w_new$PNS_w <- ifelse(pty.pns_w_new$PNS_w >= 1, pty.pns_w_new$PNS_w_new, pty.pns_w_new$PNS_w)
pty.pns_w_new$alt_vv1 = replace(pty.pns_w_new$alt_vv1, is.na(pty.pns_w_new$PNS_w), 0)
# Select down to the relevant variables, and continue on to compute standardized-weighted PNS (PNS_sw)
pty.slim <- subset(pty.pns_w_new, select = c(id,ctr_n,ctr,yr,mn,cst,pty_n,pty,cst_vv1,
nat_vv1,PNS,PNS_s,PNS_w,alt_vv1,cst_tot,tier))
# Computing parts of PNS_sw computation
pty.slim$top <- (pty.slim$nat_vv1)^2
pty.slim$square <- (pty.slim$cst_vv1)^2
pty.pid <- data.table(pty.slim)[,pid := 1:length(nat_vv1),by=c("ctr_n","ctr","yr","mn","cst","tier")]
pty.pid$helper <- rep(NA,length(pty.pid$nat_vv1))
pty.pid$helper <- ifelse(pty.pid$pid == 1, pty.pid$square, pty.pid$helper)
pty.bottom <- data.table(pty.pid)[,bottom := sum(helper, na.rm=TRUE), by=c("ctr_n","ctr","yr","tier")]
pty.bottom$power_E <- pty.bottom$top / pty.bottom$bottom
# Computing PNS_sw
pty.pns_sw <- data.table(pty.bottom)[,PNS_sw := (PNS_w)^(1/(log10(power_E))), by=c("ctr_n","ctr","yr","tier")]
pty.pns_sw$PNS_sw <- replace(pty.pns_sw$PNS_sw, is.na(pty.pns_sw$PNS_sw), NA)
pty.pns_sw$PNS_sw <- replace(pty.pns_sw$PNS_sw, pty.pns_sw$PNS_sw > 1, NA)
# Final clean-up time! Creating "pty.to_clean" for this
# Replacing PNS if it equals 1 (meaning gini equals zero, thus NA); replacing PNS & friends with non-valuable pty codes
pty.to_clean <- pty.pns_sw
pty.to_clean$PNS <- replace(pty.to_clean$PNS, pty.to_clean$PNS == 1, NA)
pty.to_clean$PNS_s <- replace(pty.to_clean$PNS_s, pty.to_clean$pty >= 3996 & pty.to_clean$pty < 5000, NA)
pty.to_clean$PNS_w <- replace(pty.to_clean$PNS_w, pty.to_clean$pty >= 3996 & pty.to_clean$pty < 5000, NA)
pty.to_clean$PNS_sw <- replace(pty.to_clean$PNS_sw, pty.to_clean$pty >= 3996 & pty.to_clean$pty < 5000, NA)
# Selecting down to relevant output columns; arranging for viewing
pty.slimmer <- subset(pty.to_clean, select = c(id, ctr_n, ctr, yr, mn, pty_n, pty, tier, PNS, PNS_s, PNS_w, PNS_sw, cst_tot))
full.pty <- pty.slimmer
full.pty <- as.data.frame(full.pty[order(full.pty$ctr_n,full.pty$yr,full.pty$pty,full.pty$tier),])
pty.full_rows <- !duplicated(full.pty[,c("id","ctr_n","ctr","yr", "mn","pty","tier")])
full.pty <- full.pty[pty.full_rows,]
# Writing party-level to .xlsx; informing of success
write.xlsx(full.pty, paste0(outputFolder, "party.level.xlsx"),
keepNA = TRUE) # `keepNA` = TRUE maintains NAs in output file
print("Party-level measures successfully computed! Moving to national-level...", quote = FALSE)
# Removing anything starting with "pty." from the local environment for memory efficiency
rm(list = ls()[grep("^pty", ls())])
#-----------------------------------------#
# National-Level Nationalization Measures #
#-----------------------------------------#
national.message <- function() {
print("------------------------------------", quote=FALSE)
print("Computing National Level Dataset...", quote=FALSE)
print("------------------------------------", quote=FALSE)
}
national.message()
##*************************##
## Computing ENP & Friends ##
##*************************##
# Grabbing distinct cases, using data.b computed in subset section
data.b <- as.data.frame(data.b)
nat.unique.rows <- !duplicated(data.b[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
enp.distinct <- data.b[nat.unique.rows,]
# Renaming to "enp." plus the variable/data being created for ENP section
enp.totals <- data.table(enp.distinct)[,vote.totals := sum(vote.totals, na.rm = TRUE), by=c("ctr_n","ctr","yr","mn","pty")]
# Grabbing unique again to get the data to the national level
enp.totals <- as.data.frame(enp.totals)
enp.unique.rows <- !duplicated(enp.totals[c("id","ctr_n","ctr","yr", "mn","pty","tier")])
enp.unique <- enp.totals[enp.unique.rows,]
# Computing ENP_nat, then selecting down to relevant columns
enp.nat_vv1 <- data.table(enp.unique)[,nat_vv1 := sum(vote.totals, na.rm = TRUE), by=c("ctr_n","ctr","yr","mn","tier")]
enp.shares_sq <- data.table(enp.nat_vv1)[,party_prop_nat2 := (vote.totals/nat_vv1)^2, by=c("ctr_n","ctr","yr","mn","tier")]
enp.denom <- data.table(enp.shares_sq)[,denominator := sum(party_prop_nat2, na.rm = TRUE), by=c("ctr_n","ctr","yr","mn","tier")]
enp.enep_nat <- data.table(enp.denom)[,ENEP_nat := 1/denominator, by=c("ctr_n","ctr","yr","mn","tier")]
enp.enep_nat <- subset(enp.enep_nat, select = c(id,ctr_n,ctr,yr,mn,tier,ENEP_nat,cst))
# Computing ENP_cst (enp.distinct computed above)
enp.new_vv1 <- data.table(enp.distinct)[,new_vv1 := sum(vote.totals),by=c("ctr_n","ctr","yr","mn","cst","tier")]
enp.new_shares_sq <- data.table(enp.new_vv1)[,share_sq := (vote.totals/new_vv1)^2,by=c("ctr_n","ctr","yr","mn","cst","tier")]
e.new_denom <- data.table(enp.new_shares_sq)[,denominator := sum(share_sq),by=c("ctr_n","ctr","yr","mn","cst","tier")]
enp.enep_cst <- data.table(e.new_denom)[,ENEP_cst := 1/denominator,by=c("ctr_n","ctr","yr","mn","cst","tier")]
# Adding an indicator if new vv1 (above) does not equal true vv1
enp.enep_cst$indicator <- 0
enp.enep_cst$indicator <- ifelse(enp.enep_cst$new_vv1 != enp.enep_cst$vv1,1,0)
# Grabbing unique ENP_cst
enp.enep_cst <- as.data.frame(enp.enep_cst)
enp.unique.rows_enp <- !duplicated(enp.enep_cst[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
enp.unique_nat <- enp.enep_cst[enp.unique.rows_enp,]
# Summing the indicator (computed above)
enp.new_indicator <- data.table(enp.unique_nat)[,indicator := ifelse(sum(indicator, na.rm=TRUE) > 0, 1, indicator),
by=c("ctr_n","ctr","yr","mn","tier")]
# Computing weights, then weighted ENP (ENP_wght)
enp.new_nat_vv1 <- data.table(enp.new_indicator)[,nat_vv1 := sum(new_vv1, na.rm=TRUE),by=c("ctr_n","ctr","yr","mn","tier")]
enp.cst_wght <- data.table(enp.new_nat_vv1)[,cst_wght := (new_vv1/nat_vv1),by=c("ctr_n","ctr","yr","mn","tier")]
enp.weighted <- data.table(enp.cst_wght)[,weighted := (cst_wght * ENEP_cst),by=c("ctr_n","ctr","yr","mn","tier")]
enp.enep_wght <- data.table(enp.weighted)[,ENEP_wght := sum(weighted),by=c("ctr_n","ctr","yr","mn","tier")]
# Now, we compute average ENP (ENP_avg) using ENP_cst
enp.enep_avg <- data.table(enp.enep_wght)[,ENEP_avg := mean(ENEP_cst),by=c("ctr_n","ctr","yr","mn","tier")]
enp.enp_avg_wght <- as.data.frame(enp.enep_avg)
# Merging both ENP-related sets, then cleaning them up
enp.merge <- merge.data.frame(enp.enep_nat,enp.enp_avg_wght,by = c("id","ctr_n","ctr","yr","mn","cst","tier"))
enp.slim <- subset(enp.merge, select = c(id,ctr_n,ctr,yr,mn,tier,ENEP_nat,ENEP_avg,ENEP_wght,indicator))
enp.slim_rows <- !duplicated(enp.slim[c("id","ctr_n","ctr","yr", "mn","tier")])
enp.full <- enp.slim[enp.slim_rows,]
full.enp <- enp.full[order(enp.full$ctr_n,enp.full$yr,enp.full$mn,enp.full$tier),]
# Creating Cox, MK_I, & MK_I_w
full.enp$cox <- (full.enp$ENEP_nat - full.enp$ENEP_avg)/ full.enp$ENEP_nat
full.enp$MK_I <- (full.enp$ENEP_nat - full.enp$ENEP_avg)/ full.enp$ENEP_avg
full.enp$MK_I_w <- (full.enp$ENEP_nat - full.enp$ENEP_wght)/ full.enp$ENEP_wght
# Removing previous computations for memory efficiency (beginning with "enp.")
rm(list = ls()[grep("^enp", ls())])
##**************************##
## Computing PSNS & friends ##
##**************************##
# Grabbing distinct cases, using data.c computed in subset section
data.c <- as.data.frame(data.c)
nat.unique.rows <- !duplicated(data.c[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
psns.distinct <- data.c[nat.unique.rows,]
# Computing ENP_cst (using unique data above)
psns.new_vv1 <- data.table(psns.distinct)[,new_vv1 := sum(vote.totals),by=c("ctr_n","ctr","yr","mn","cst","tier")]
psns.new_shares_sq <- data.table(psns.new_vv1)[,share_sq := (vote.totals/new_vv1)^2,by=c("ctr_n","ctr","yr","mn","cst","tier")]
psns.new_denom <- data.table(psns.new_shares_sq)[,denominator := sum(share_sq, na.rm = TRUE),by=c("ctr_n","ctr","yr","mn","cst","tier")]
psns.enep_cst <- data.table(psns.new_denom)[,ENEP_cst := 1/denominator,by=c("ctr_n","ctr","yr","mn","cst","tier")]
# Merging this ENP_cst with the full ENP above at the national-level
psns.full_enp <- merge(psns.enep_cst,full.enp, by = c("id","ctr_n","ctr","yr","mn","tier"))
# Renaming to just "psns" for easier typing, then computing prerequisites for inflation values (there are many)
psns <- psns.full_enp
psns$I_i <- ((psns$ENEP_nat-psns$ENEP_cst)/ psns$ENEP_cst) * 100
psns$alpha <- 0.5
psns$beta <- 0.25
psns$gamma <- 0.5
psns.nat_vote <- data.table(psns)[, nat_vote := sum(new_vv1, na.rm = TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.cst_vote_prop <- data.table(psns.nat_vote)[,cst_vote_prop := new_vv1/nat_vote, by = c("ctr_n","ctr","yr","mn","tier")]
psns.product <- data.table(psns.cst_vote_prop)[,product := ENEP_cst * cst_vote_prop, by = c("ctr_n","ctr","yr","mn","tier")]
psns.sum_cst <- data.table(psns.product)[,sum_cst := sum(product, na.rm = TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.denom <- data.table(psns.sum_cst)[,denominator := cst_tot * sum_cst, by = c("ctr_n","ctr","yr","mn","tier")]
psns.W_tilde <- data.table(psns.denom)[,W_tilde := ENEP_cst/ denominator, by = c("ctr_n","ctr","yr","mn","tier")]
psns.I_w <- data.table(psns.W_tilde)[,I_w := ((ENEP_nat - sum_cst)/sum_cst)*100, by = c("ctr_n","ctr","yr","mn","tier")]
psns.numerator <- data.table(psns.I_w)[,numerator := (I_i - I_w)^2 * W_tilde, by = c("ctr_n","ctr","yr","mn","tier")]
psns.sum_numerator <- data.table(psns.numerator)[,sum_numerator := sum(numerator, na.rm = TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.coeff_var_I_i <- data.table(psns.sum_numerator)[,coeff_var_I_i := sqrt(sum_numerator)/I_w, by = c("ctr_n","ctr","yr","mn","tier")]
psns.numerator_r2 <- data.table(psns.coeff_var_I_i)[,numerator_r2 := ((I_i - I_w)^4) * W_tilde, by = c("ctr_n","ctr","yr","mn","tier")]
psns.sum_numerator_r2 <- data.table(psns.numerator_r2)[,sum_numerator_r2 := sum(numerator_r2, na.rm = TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.denominator_r2 <- data.table(psns.sum_numerator_r2)[,denominator2 := ((I_i - I_w)^2) * W_tilde, by = c("ctr_n","ctr","yr","mn","tier")]
psns.sum_denominator_r2 <- data.table(psns.denominator_r2)[,sum_denominator_r2 := sum(denominator2, na.rm=TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.sum_denominator_r2$sq_sum_denominator_r2 <- (psns.sum_denominator_r2$sum_denominator_r2)^2
psns.kurtosis_I_i <- data.table(psns.sum_denominator_r2)[,kurtosis_I_i := sum_numerator_r2/sq_sum_denominator_r2, by = c("ctr_n","ctr","yr","mn","tier")]
psns.D <- data.table(psns.kurtosis_I_i)[, D := abs(coeff_var_I_i)^gamma * abs(kurtosis_I_i)^(1-gamma) * sign(coeff_var_I_i*kurtosis_I_i),
by = c("ctr_n","ctr","yr","mn","tier")]
psns.MK_N <- data.table(psns.D)[, MK_N := abs(I_w)^alpha * abs(D)^(1-alpha) * sign(I_w*D), by = c("ctr_n","ctr","yr","mn","tier")]
# NOT naming with "psns." to maintain the final object with all columns
MK_N_two <- data.table(psns.MK_N)[, MK_N_two := abs(I_w)^alpha * abs(coeff_var_I_i)^beta * abs(kurtosis_I_i)^(1 - alpha - beta) * sign(I_w*coeff_var_I_i*kurtosis_I_i),
by = c("ctr_n","ctr","yr","mn","tier")]
# Removing previously-computed objects to maintain memory efficiency
rm(list = ls()[grep("^psns", ls())])
# A few more prerequisites, grabbing distinct cases again, then merging in party.level measures
psns.nat_vv1 <- data.table(MK_N_two)[, nat_vv1 := sum(vote.totals, na.rm = TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.pty_vv1 <- data.table(psns.nat_vv1)[, pty_vv1 := sum(vote.totals, na.rm = TRUE), by = c("ctr_n","ctr","yr","mn","pty","tier")]
psns.pty_vv1 <- as.data.frame(psns.pty_vv1)
psns.unique.rows <- !duplicated(psns.pty_vv1[,c("id","ctr_n","ctr","yr", "mn","pty","tier")])
psns.unique <- psns.pty_vv1[psns.unique.rows,]
psns.pty_merge <- merge(psns.unique,full.pty,by = c("id", "ctr_n", "ctr", "yr", "mn", "pty", "pty_n", "cst_tot","tier"),all.x = TRUE)
# Ordering by party, computing weights, then computing PSNS & friends
psns.pty_merge <- psns.pty_merge[order(psns.pty_merge$ctr_n,psns.pty_merge$ctr,psns.pty_merge$yr,
psns.pty_merge$mn,psns.pty_merge$pty,psns.pty_merge$pty_n),]
psns.pty_merge$weight <- psns.pty_merge$pty_vv1/psns.pty_merge$nat_vv1
psns.psns <- data.table(psns.pty_merge)[, PSNS := sum(PNS * weight, na.rm = TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.psns_s <- data.table(psns.psns)[, PSNS_s := sum(PNS_s * weight, na.rm=TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.psns_w <- data.table(psns.psns_s)[, PSNS_w := sum(PNS_w * weight, na.rm=TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
psns.psns_sw <- data.table(psns.psns_w)[, PSNS_sw := sum(PNS_sw *weight, na.rm=TRUE), by = c("ctr_n","ctr","yr","mn","tier")]
# Replacing PSNS & friends with NA if they equal zero or are greater than 1
psns.to_edit <- psns.psns_sw
psns.to_edit$PSNS <- ifelse(psns.to_edit$PSNS == 0 | psns.to_edit$PSNS > 1, NA, psns.to_edit$PSNS)
psns.to_edit$PSNS_s <- ifelse(psns.to_edit$PSNS_s == 0 | psns.to_edit$PSNS_s > 1, NA, psns.to_edit$PSNS_s)
psns.to_edit$PSNS_w <- ifelse(psns.to_edit$PSNS_w == 0 | psns.to_edit$PSNS_w > 1, NA, psns.to_edit$PSNS_w)
psns.to_edit$PSNS_sw <- ifelse(psns.to_edit$PSNS_sw == 0 | psns.to_edit$PSNS_sw > 1, NA, psns.to_edit$PSNS_sw)
# Selecting relevant columns, then grabbing unique again
psns.slim <- subset(psns.to_edit, select = c(id,ctr_n,ctr,yr,mn,tier,PSNS,PSNS_s,PSNS_w,PSNS_sw,MK_N,MK_N_two,cst_tot))
psns.slim <- as.data.frame(psns.slim)
psns.unique_rows <- !duplicated(psns.slim)
full.psns <- psns.slim[psns.unique_rows,]
# Removing previously-computed psns objects AGAIN to maintain memory efficiency
rm(list = ls()[grep("^psns", ls())])
##*******************##
## Computing local_E ##
##*******************##
# Grabbing distinct cases
data.d <- as.data.frame(data.d)
local.unique_rows <- !duplicated(data.d[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
local.unique <- data.d[local.unique_rows,]
# Renaming to "local." plus the variable/data being created
local.nat_vote <- data.table(local.unique)[, nat_vote := sum(vote.totals, na.rm=TRUE), by = c("ctr_n","ctr","yr", "mn","tier")]
local.seat_cst <- data.table(local.nat_vote)[, seat_cst := sum(seat, na.rm=TRUE), by = c("ctr_n","ctr","yr", "mn","cst","tier")]
local.party_vote_prop <- data.table(local.seat_cst)[, party_vote_prop := vote.totals/nat_vote, by = c("ctr_n","ctr","yr", "mn","cst","tier")]
# Arranging, then computing seat proportion & party vote proportion
local.party_vote_prop <- local.party_vote_prop[order(local.party_vote_prop$ctr_n,local.party_vote_prop$ctr,
local.party_vote_prop$yr,local.party_vote_prop$mn,
local.party_vote_prop$cst,local.party_vote_prop$pty)]
local.seat_total <- data.table(local.party_vote_prop)[, seat_total := sum(seat, na.rm=TRUE), by = c("ctr_n","ctr","yr", "mn","tier")]
local.seat_contest <- data.table(local.seat_total)[, seat_contest := sum(seat_cst, na.rm=TRUE), by = c("ctr_n","ctr","yr", "mn","pty","tier")]
local.seat_prop <- data.table(local.seat_contest)[, seat_prop := seat_contest/seat_total, by = c("ctr_n","ctr","yr", "mn","pty","tier")]
# Computing local_E by party & tier, then getting national sum
local.local_E <- data.table(local.seat_prop)[, local_E := party_vote_prop * seat_prop, by = c("ctr_n","ctr","yr", "mn","pty","tier")]
local.local_E_r2 <- data.table(local.local_E)[, local_E := sum(local_E, na.rm=TRUE), by = c("ctr_n","ctr","yr", "mn","tier")]
# Selecting down to relevant columns, then grabbing distinct
local.local_E_r2 <- subset(local.local_E_r2, select = c(id,ctr_n,ctr,yr,mn,tier,local_E))
local.local_E_r2 <- as.data.frame(local.local_E_r2)
local_E_rows <- !duplicated(local.local_E_r2[,c("id","ctr_n","ctr","yr", "mn","tier")])
full.local_E <- local.local_E_r2[local_E_rows,]
# Removing previous computations for more efficient memory usage
rm(list = ls()[grep("^local", ls())])
##************************************##
## Merging/Cleaning up National-Level ##
##************************************##
# Merging all of the national level ("full")!
nat.enp_psns_merge <- merge(full.enp,full.psns,by = c("id","ctr_n","ctr","yr","mn","tier"),
all = TRUE, sort = TRUE)
nat.full <- merge(nat.enp_psns_merge,full.local_E, by = c("id","ctr_n","ctr","yr","mn","tier"), all = TRUE)
# Renaming columns to inflation[1-4] & indicator to nvvi, then cleaning up inflation/nvvi values (zero are NA)
names(nat.full)[names(nat.full) == "cox"] <- "inflation1"
names(nat.full)[names(nat.full) == "MK_I"] <- "inflation2"
names(nat.full)[names(nat.full) == "MK_I_w"] <- "inflation3"
names(nat.full)[names(nat.full) == "MK_N"] <- "inflation4"
names(nat.full)[names(nat.full) == "indicator"] <- "nvvi"
nat.full$inflation1 <- ifelse(nat.full$inflation1 == 0, NA, nat.full$inflation1)
nat.full$inflation2 <- ifelse(nat.full$inflation2 == 0, NA, nat.full$inflation2)
nat.full$inflation3 <- ifelse(nat.full$inflation3 == 0, NA, nat.full$inflation3)
nat.full$inflation4 <- ifelse(nat.full$inflation4 == 0, NA, nat.full$inflation4)
nat.full$nvvi <- ifelse(is.na(nat.full$nvvi), 0, nat.full$nvvi)
# Replacing local_E to be within the appropriate range
nat.full$local_E <- ifelse(nat.full$local_E ==0 | nat.full$local_E > 1, NA, nat.full$local_E)
# Joining in data grid (with parties, etc.)
grid <- as.data.frame(subset(base, select = c(id,ctr_n,ctr,yr,mn,tier,cst,cst_n,pty,pty_n)))
full.nat <- merge(nat.full,grid, by = c("id","ctr_n","ctr","yr","mn","tier"), all = TRUE)
# Grabbing unique, ordering, renaming ENP, then shipping!
full.nat_rows <- !duplicated(full.nat[,c("id","ctr_n","ctr","yr", "mn","tier")])
full.nat <- full.nat[full.nat_rows,]
full.nat <- full.nat[order(full.nat$ctr_n,full.nat$yr,full.nat$tier),]
names(full.nat)[names(full.nat) == "ENEP_nat"] <- "ENP_nat"
names(full.nat)[names(full.nat) == "ENEP_avg"] <- "ENP_avg"
names(full.nat)[names(full.nat) == "ENEP_wght"] <- "ENP_wght"
full.nat <- subset(full.nat, select = c(id, ctr_n, ctr, yr, mn, tier, nvvi, ENP_nat, ENP_avg, ENP_wght, inflation1, inflation2,
inflation3,inflation4, PSNS, PSNS_s, PSNS_w, PSNS_sw, local_E, cst_tot))
# Removing previous computations for more efficient memory usage
rm(list = ls()[grep("^nat", ls())])
# Writing national-level to .xlsx; informing of success
write.xlsx(full.nat, paste0(outputFolder, "national.level.xlsx"),
keepNA = TRUE) # `keepNA` = TRUE maintains NAs in output file
print("National-level measures successfully computed! Moving to constituency-level...", quote = FALSE)
#---------------------------------------------#
# Constituency-Level Nationalization Measures #
#---------------------------------------------#
cst.message <- function() {
print("---------------------------------------", quote=FALSE)
print("Computing Constituency Level Dataset...", quote=FALSE)
print("---------------------------------------", quote=FALSE)
}
cst.message()
# Grabbing distinct cases, then renaming to "cst." plus the variable/data being created
cst.unique_rows <- !duplicated(data.b[,c("id","ctr_n","ctr","yr", "mn","cst","pty","tier")])
cst.base <- data.b[cst.unique_rows,]
# Computing prerequisites for ENP_cst
cst.cst_tot <- data.table(cst.base)[, cst_tot := length(unique(cst)), by = c("ctr_n","ctr","yr", "mn")]
cst.new_vv1 <- data.table(cst.cst_tot)[, new_vv1 := sum(vote.totals, na.rm = TRUE), by = c("ctr_n","ctr","yr", "mn","cst","tier")]
cst.share_sq <- data.table(cst.new_vv1)[, share_sq := (vote.totals/new_vv1)^2, by = c("ctr_n","ctr","yr", "mn","cst","tier")]
cst.denom <- data.table(cst.share_sq)[, denom := sum(share_sq), by = c("ctr_n","ctr","yr", "mn","cst","tier")]
# Computing ENP_cst
cst.enp_cst <- data.table(cst.denom)[, ENP_cst := 1/denom, by = c("ctr_n","ctr","yr", "mn","cst","tier")]
# Computing the vv1 indicator, cvvi, selecting down to relevant columns, then grabbing distinct cases again
cst.enp_cst$cvvi <- 0
cst.enp_cst$cvvi <- ifelse(cst.enp_cst$vv1 != cst.enp_cst$new_vv1,1,cst.enp_cst$cvvi)
cst.enp_cst <- subset(cst.enp_cst, select = c(id,ctr_n,ctr,yr,mn,tier,cst,cst_n,ENP_cst,cvvi))
cst.enp_cst <- as.data.frame(cst.enp_cst)
cst.unique_rows_enp <- !duplicated(cst.enp_cst[,c("id","ctr_n","ctr","yr", "mn","cst","tier")])
cst.enp <- cst.enp_cst[cst.unique_rows_enp,]
# Merging in national-level to compute inflation5
cst.nat <- merge(cst.enp,full.enp, by = c("id","ctr_n","ctr","yr", "mn","tier"), all = TRUE)
cst.nat <- as.data.frame(cst.nat)
cst.nat$inflation5 <- (cst.nat$ENEP_nat - cst.nat$ENP_cst)/cst.nat$ENP_cst
# Merging in data grid to get party info
cst.nat.pty <- merge(cst.nat,grid,by = c("id","ctr_n","ctr","yr", "mn","cst","cst_n","tier"), all = TRUE)
# Selecting down to relevant columns, arranging, grabbing distinct, then shipping!
cst.slim <- subset(cst.nat.pty, select = c(id,ctr_n,ctr,yr,mn,tier,cst,cst_n,cvvi,ENP_cst))
cst.slim <- cst.slim[order(cst.slim$ctr_n,cst.slim$yr,cst.slim$mn,cst.slim$cst,cst.slim$tier),]
cst.slim_unique_rows <- !duplicated(cst.slim[,c("id","ctr_n","ctr","yr", "mn","cst","tier")])
full.cst <- cst.slim[cst.slim_unique_rows,]
# Writing constituency-level to .xlsx; informing of success
write.xlsx(full.cst, paste0(outputFolder, "constituency.level.xlsx"),
keepNA = TRUE) # `keepNA` = TRUE maintains NAs in output file
print("Constituency-level measures successfully computed!", quote = FALSE)
# Removing anything starting with "cst." from the local environment for memory efficiency
rm(list = ls()[grep("^cst", ls())])
# Printing timer to display efficiency
cat("The entire computation took ", (proc.time()[1]-start.timer[1])/60, "mins \n")
print("Done!", quote = FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.