vignettes/pooled_services.R

## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>", 
  cache = FALSE
)

## ----data_ipsa, echo=FALSE-----------------------------------------------
library(ipsar)
data("ipsa")

## ----research, message=FALSE---------------------------------------------
library(tidyverse)
ipsa$research <- str_detect(ipsa$short_description, " Research*")

research <- ipsa %>% dplyr::filter(research == "TRUE") %>% 
  select(expense_type, short_description, details)

head(research)

## ----clean_description, message=FALSE------------------------------------
library(tidyverse)
ipsa$clean_description <- ipsa$short_description %>%
  str_replace_all("European Research Group", "ERG") %>%
  str_replace_all("EUROPEAN RESEARCH GROUP", "ERG") %>%
  str_replace_all("European Research Group staff", "ERG") %>%
  str_replace_all("110516 - ERG Membership", "ERG") %>%
  str_replace_all("ERG Fees", "ERG") %>% 
  str_replace_all("ERG Payment", "ERG") %>%
  str_replace_all("ERG Researcher", "ERG") %>%
  str_replace_all("ERG Sub 2011-12", "ERG") %>%
  str_replace_all("ERG subscription", "ERG") %>% 
  str_replace("ERG 2010/2011", "ERG")

## ----view_ipsa, eval=FALSE-----------------------------------------------
#  ipsa %>% count(details) %>% View()

## ----clean_details-------------------------------------------------------
ipsa$clean_details <- ipsa$details %>%
  str_replace_all("\\[([^]]*)]", "") %>% # remove hard brackets and ***
  str_replace_all("[(]|[)]", "") %>% # remove brackets
  str_replace_all("European Research Group", "ERG") %>%
  str_replace_all("EUROPEAN RESEARCH GROUP", "ERG") %>%
  str_replace_all("2010 EUROPEAN RESERCH GROUP", "ERG") %>%
  str_replace_all("Annual subs European Research Group", "ERG") %>%
  str_replace_all("European Research Group for 2010/11", "ERG") %>% 
  str_replace_all("European Research Group Membership", "ERG") %>%
  str_replace_all("European Research Group pooled staff", "ERG") %>%
  str_replace_all("European Research Group Researcher", "ERG") %>%
  str_replace_all("European Research Group Subscription", "ERG") %>% 
  str_replace_all("Membership for the European Research Group", "ERG") %>% 
  str_replace_all("Research Services - European Research Group ", "ERG") %>% 
  str_replace_all("Membership for European Research Group for Committee Purposes", "ERG") %>% 
  str_replace_all("Research Services - European Research Group ", "ERG") %>% 
  str_replace_all("Research services on European issues, in support of Parliamentary functions", "ERG") %>% 
  str_replace_all("ERG Pooled staffing", "ERG") %>% 
  str_replace_all("ERG services from pooled staff member as previous year", "ERG") %>% 
  str_replace_all("ERG subscription", "ERG") %>% 
  str_replace_all("ERG Subscription", "ERG") %>% 
  str_replace_all("ERG subscription ", "ERG") %>% 
  str_replace_all("ERG subscription-", "ERG") %>% 
  str_replace_all("ERG subscriptionIP", "ERG") %>% 
  str_replace_all("ERG for 2010/11", "ERG") %>% 
  str_replace_all("ERG Researcher", "ERG") %>% 
  str_replace_all("Membership for ERG for Committee purposes", "ERG") %>%
  str_replace_all("ERG Membership", "ERG") %>% 
  str_replace_all("ERGservices from pooled staff member as previous year", "ERG") %>% 
  str_replace_all("Annual subs ERG", "ERG") %>% 
  str_replace_all("Annual Sub 2013-14", "ERG") %>% 
  str_replace_all("ERG Membership", "ERG") %>% 
  str_replace_all("PART STAFFING COSTS FOR ERGBACS payment received", "ERG") %>% 
  str_replace_all("PART STAFFING COSTS FOR ERG ", "ERG") %>% 
  str_replace_all("ERGIP", "ERG") %>% 
  str_replace_all("ERG ", "ERG") %>% 
  str_replace_all("ERG-", "ERG") 

## ----create_erg----------------------------------------------------------
# Add a count of the number of subscriptions over the years
ipsa$id <- 1:nrow(ipsa)
erg1 <- ipsa %>% dplyr::filter(clean_description == "ERG")
erg2 <- ipsa %>% dplyr::filter(clean_details  == "ERG")

# bind, identify duplicates and filter to non-duplicated entries. 

erg <- bind_rows(erg1, erg2) %>% 
  mutate(duplicated = duplicated(.$id)) %>%
  dplyr::filter(duplicated == "FALSE")

rm(erg1, erg2)

## ----erg_count-----------------------------------------------------------
# test count of subscription levels by year
erg_count <- erg %>% 
  group_by(year, member_id, mps_name, dp_name, current_role) %>% 
  tally(amount_claimed, sort = "TRUE")
head(erg_count)

## ----erg_adjust----------------------------------------------------------
erg <- erg %>%
  dplyr::filter(claim_no != "365591") %>% 
  dplyr::filter(id != "1323443") %>% 
  mutate(subscription_count = rep_len(1, length.out = nrow(.)))

## ----erg_save, echo=FALSE------------------------------------------------
save(erg, file = "erg.rda", compress = "xz")

## ----erg_subscriptions---------------------------------------------------
erg_subscriptions <- erg %>% 
  group_by(member_id, ipsa_name, dp_name, current_role, mps_constituency) %>%
  summarise_at(c("subscription_count", "amount_claimed"), sum) %>% 
  arrange(desc(subscription_count))

erg_subscriptions %>%
  select(member_id, ipsa_name, dp_name, current_role, subscription_count, amount_claimed)

## ----erg_subscriptions1--------------------------------------------------
erg_subscriptions <- ipsa_commons %>%
  select(member_id, member_from, gender, date_of_birth, laying_minister_name, house_start_date) %>%
  right_join(., erg_subscriptions, by = "member_id") %>% 
  select(member_id, dp_name, member_from, gender, date_of_birth, laying_minister_name, house_start_date, current_role, amount_claimed, subscription_count)
head(erg_subscriptions)

## ----echo=FALSE----------------------------------------------------------
save(erg_subscriptions, file = "erg_subscriptions.rda", compress = "xz")
poldham/ipsar documentation built on May 29, 2019, 11:03 p.m.