data-raw/code/01_decisions.R

################################################################################
# Joshua C. Fjelstul, Ph.D.
# euip R package
################################################################################

# define pipe function
`%>%` <- magrittr::`%>%`

##################################################
# read in data
##################################################

# read in data
decisions <- read.csv("data-raw/infringements_raw.csv", stringsAsFactors = FALSE)

##################################################
# clean data
##################################################

# rename variables
names(decisions) <- c("case_id", "decision_date", "decision_stage_raw", "press_release", "memo", "member_state", "department_raw", "case_description", "active", "noncommunication")

# case year
decisions$case_year <- as.numeric(stringr::str_extract(decisions$case_id, "^[0-9]{4}"))

# case number
decisions$case_number <- as.numeric(stringr::str_extract(decisions$case_id, "[0-9]{4}$"))
decisions$case_number <- as.numeric(decisions$case_number)

# date
decisions$decision_date <- stringr::str_replace(decisions$decision_date, " .*", "")

# year
decisions$decision_year <- as.numeric(stringr::str_extract(decisions$decision_date, "[0-9]{4}"))

# month
decisions$decision_month <- stringr::str_extract(decisions$decision_date, "/[0-9]{2}/")
decisions$decision_month <- as.numeric(stringr::str_extract(decisions$decision_month, "[0-9]{2}"))

# day
decisions$decision_day <- as.numeric(stringr::str_extract(decisions$decision_date, "[0-9]{2}$"))

# non-communication
decisions$noncommunication <- as.numeric(decisions$noncommunication == "Yes")

# non-conformity
decisions$nonconformity <- 1 - decisions$noncommunication

# case type
decisions$case_type <- "Noncommunication"
decisions$case_type[decisions$nonconformity == 1] <- "Nonconformity"

# case type code
decisions$case_type_id <- 0
decisions$case_type_id[decisions$noncommunication == 1] <- 1
decisions$case_type_id[decisions$nonconformity == 1] <- 2

# additional decision
decisions$stage_additional <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "[Aa]dditional"))

# closing of the case
decisions$stage_closing <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "Closing"))

# withdrawal of the case
decisions$stage_withdrawal <- as.numeric(stringr::str_detect(decisions$decision_stage, "Withdrawal"))

# formal notice (article 258)
decisions$stage_lfn_258 <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "[Ff]ormal notice") & stringr::str_detect(decisions$decision_stage_raw, "258"))

# formal notice (article 260)
decisions$stage_lfn_260 <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "[Ff]ormal notice") & stringr::str_detect(decisions$decision_stage_raw, "260"))

# reasoned opinion (article 258)
decisions$stage_ro_258 <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "[Rr]easoned opinion") & stringr::str_detect(decisions$decision_stage_raw, "258|226"))

# reasoned opinion (article 260)
decisions$stage_ro_260 <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "[Rr]easoned opinion") & stringr::str_detect(decisions$decision_stage_raw, "260|228"))

# referral (article 258)
decisions$stage_rf_258 <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "[Rr]eferral") & stringr::str_detect(decisions$decision_stage_raw, "258"))

# referral (article 260)
decisions$stage_rf_260 <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "[Rr]eferral") & stringr::str_detect(decisions$decision_stage_raw, "260"))

# fix referral (article 258)
decisions$stage_rf_260_3 <- as.numeric(stringr::str_detect(decisions$decision_stage_raw, "258") & stringr::str_detect(decisions$decision_stage_raw, "260"))

# press release link
decisions$press_release_link <- stringr::str_extract(decisions$press_release, "IP-[0-9]+-[0-9]+")
decisions$press_release_link <- stringr::str_c("https://ec.europa.eu/commission/presscorner/detail/EN/", decisions$press_release_link)

# press release
decisions$press_release <- as.numeric(!is.na(decisions$press_release_link))

# drop memo
decisions <- dplyr::select(decisions, -c(memo, active))

# drop Article 260(3) decisions
decisions <- dplyr::filter(decisions, stage_rf_260_3 == 0)

##################################################
# clean member state names
##################################################

decisions$member_state[decisions$member_state == "Luxemburg"] <- "Luxembourg"

##################################################
# clean departments names
##################################################

# department variable
decisions$department <- stringr::str_c("Directorate-General for ", decisions$department_raw)

# clean names
decisions$department[stringr::str_detect(decisions$department, "Secretariat General")] <- "Secretariat-General"
decisions$department[stringr::str_detect(decisions$department, "Eurostat")] <- "Eurostat"
decisions$department[stringr::str_detect(decisions$department, "Legal Service")] <- "Legal Service"

# recode to match current names
decisions$department[decisions$department == "Directorate-General for Justice, Fundamental Rights and Citizenship"] <- "Directorate-General for Justice and Consumers"
decisions$department[decisions$department == "Directorate-General for Communication Networks, Content and Technology"] <- "Directorate-General for Communications Networks, Content and Technology"
decisions$department[decisions$department == "Directorate-General for Defence Industry and Space"] <- "Directorate-General for Defense Industry and Space"
decisions$department[decisions$department == "Directorate-General for Internal Market and services"] <- "Directorate-General for Internal Market and Services"
decisions$department[decisions$department == "Directorate-General for External relations"] <- "Directorate-General for External Relations"

##################################################
# clean date
##################################################

decisions$decision_date <- stringr::str_replace_all(decisions$decision_date, "/", "-")
decisions$decision_date <- lubridate::ymd(decisions$decision_date)

##################################################
# clean description
##################################################

# clean description
decisions$case_description <- stringr::str_to_lower(decisions$case_description)

# ensure case descriptions are consistent
decisions <- decisions %>%
  dplyr::group_by(case_number) %>%
  dplyr::mutate(case_description = case_description[1]) %>%
  dplyr::ungroup()

##################################################
# decision stage
##################################################

# decision stage
decisions$decision_stage <- "Other"
decisions$decision_stage[decisions$stage_lfn_258 == 1] <- "Letter of formal notice (Article 258)"
decisions$decision_stage[decisions$stage_ro_258 == 1] <- "Reasoned opinion (Article 258)"
decisions$decision_stage[decisions$stage_rf_258 == 1] <- "Referral to the Court (Article 258)"
decisions$decision_stage[decisions$stage_lfn_260 == 1] <- "Letter of formal notice (Article 260)"
decisions$decision_stage[decisions$stage_ro_260 == 1] <- "Reasoned opinion (Article 260)"
decisions$decision_stage[decisions$stage_rf_260 == 1] <- "Referral to the Court (Article 260)"
decisions$decision_stage[decisions$stage_closing == 1] <- "Closing"
decisions$decision_stage[decisions$stage_withdrawal == 1] <- "Withdrawal"

# decision stage
decisions$decision_stage_code <- "None"
decisions$decision_stage_code[decisions$stage_lfn_258 == 1] <- "LFN258"
decisions$decision_stage_code[decisions$stage_ro_258 == 1] <- "RO258"
decisions$decision_stage_code[decisions$stage_rf_258 == 1] <- "RF258"
decisions$decision_stage_code[decisions$stage_lfn_260 == 1] <- "LFN260"
decisions$decision_stage_code[decisions$stage_ro_260 == 1] <- "RO260"
decisions$decision_stage_code[decisions$stage_rf_260 == 1] <- "RF260"
decisions$decision_stage_code[decisions$stage_closing == 1] <- "C"
decisions$decision_stage_code[decisions$stage_withdrawal == 1] <- "W"

# decision stage ID
decisions$decision_stage_id <- NA
decisions$decision_stage_id[decisions$stage_lfn_258 == 1] <- 1
decisions$decision_stage_id[decisions$stage_ro_258 == 1] <- 2
decisions$decision_stage_id[decisions$stage_rf_258 == 1] <- 3
decisions$decision_stage_id[decisions$stage_lfn_260 == 1] <- 4
decisions$decision_stage_id[decisions$stage_ro_260 == 1] <- 5
decisions$decision_stage_id[decisions$stage_rf_260 == 1] <- 6
decisions$decision_stage_id[decisions$stage_closing == 1] <- 7
decisions$decision_stage_id[decisions$stage_withdrawal == 1] <- 8

##################################################
# ID variables
##################################################

# read in data
load("data-raw/member_states.RData")

# select variables
member_states <- dplyr::select(member_states, member_state, member_state_code, member_state_id)

# read in data
load("data-raw/departments.RData")

# select variables
departments <- dplyr::select(
  departments,
  department_id, department_name, department_code
)

# rename variable
departments <- dplyr::rename(departments, department = department_name)

# merge
decisions <- dplyr::left_join(decisions, member_states, by = "member_state")
decisions <- dplyr::left_join(decisions, departments, by = "department")

# clean workspace
rm(member_states, departments)

##################################################
# directive
##################################################

# directive number
decisions$directive_number <- stringr::str_extract(decisions$case_description, "(directive|dir\\.|dir) ([0-9]{2}|[0-9]{4})/[0-9]+")
decisions$directive_number <- stringr::str_extract(decisions$directive_number, "([0-9]{2}|[0-9]{4})/[0-9]+")
decisions$directive_number <- stringr::str_replace(decisions$directive_number, "^([6789][0-9])/", "19\\1/")
decisions$directive_number <- stringr::str_replace(decisions$directive_number, "^(0[0-9])/", "20\\1/")

# CELEX number
decisions$celex <- NA
for (i in 1:nrow(decisions)) {
  if (is.na(decisions$directive_number)[i]) {
    next
  }
  year <- stringr::str_extract(decisions$directive_number[i], "^[0-9]{4}")
  number <- stringr::str_extract(decisions$directive_number[i], "[0-9]+$")
  number <- stringr::str_pad(number, width = 4, side = "left", pad = "0")
  celex <- stringr::str_c("3", year, "L", number, sep = "")
  decisions$celex[i] <- celex
}

# clean workspace
rm(i, number, celex, year)

# has directive
decisions$directive <- as.numeric(!is.na(decisions$directive_number))

##################################################
# filter obesrvations
##################################################

# drop minor procedures
decisions <- dplyr::filter(decisions, decision_stage != "Other")

##################################################
# organize data
##################################################

# sort observations
decisions <- dplyr::arrange(decisions, decision_date, case_number, decision_stage)

# case ID
decisions$case_id <- stringr::str_c(
  "IP:",
  decisions$case_year, ":",
  stringr::str_pad(decisions$case_number, side = "left", width = 4, pad = "0"), ":",
  decisions$department_code, ":",
  decisions$member_state_code
)

# decision id
decisions$decision_id <- stringr::str_c(
  decisions$case_id, ":", decisions$decision_stage_code
)
decisions <- decisions %>% 
  dplyr::group_by(decision_id) %>%
  dplyr::mutate(
    counter = 1:dplyr::n()
  ) %>%
  dplyr::ungroup()
decisions$decision_id <- stringr::str_c(
  decisions$decision_id, ":", decisions$counter
)
decisions$decision_id <- stringr::str_remove(decisions$decision_id, ":1$")

# standardize data
decisions <- decisions %>%
  dplyr::group_by(case_id) %>%
  dplyr::mutate(
    directive_number = directive_number[1],
    celex = celex[1],
    directive = as.numeric(sum(directive) > 0),
    case_type_id = case_type_id[1],
    case_type = case_type[1],
    noncommunication = noncommunication[1],
    nonconformity = nonconformity[1],
  ) %>% 
  dplyr::ungroup()

# key ID
decisions$key_id <- 1:nrow(decisions)

# sort variables
decisions <- dplyr::select(
  decisions,
  key_id, decision_id, case_id, case_number, case_year,
  decision_date, decision_year, decision_month, decision_day,
  member_state_id, member_state, member_state_code,
  department_id, department, department_code,
  case_type_id, case_type, noncommunication, nonconformity,
  directive, directive_number, celex,
  decision_stage_id, decision_stage,
  stage_lfn_258, stage_ro_258, stage_rf_258, stage_lfn_260, stage_ro_260, stage_rf_260,
  stage_closing, stage_withdrawal, stage_additional,
  press_release
)

##################################################
# export data
##################################################

# write data
save(decisions, file = "data/decisions.RData")

################################################################################
# end R script
################################################################################
jfjelstul/euip documentation built on Dec. 20, 2021, 11:07 p.m.