Nothing
#' @importFrom magrittr %>%
#' @importFrom lubridate %within% ymd
#' @importFrom pdftools pdf_text
#' @importFrom dplyr summarise ungroup group_by n select
v <- c('rollcall',
'vote',
'Legislators',
'Affirmative',
'prop_AF',
'sex',
'argument',
'Date',
'Negative',
'prop_NG',
'prop_women',
'prop_arg',
'rc',
'Chamber',
'chamber')
if(getRversion() >= "2.15.1"){
utils::globalVariables(c('.', v))
utils::suppressForeignCheck(c('.', v))
}
legislature <- function(df, name_var_date){
legislaturas$interval <- lubridate::interval(legislaturas$fecha_inicio, legislaturas$fecha_fin)
df <- as.data.frame(df)
f <- df[, names(df) == name_var_date]
f2 <- numeric(nrow(df))
for(i in 1:length(f2)){
w <- which(f[i] %within% legislaturas$interval)
f2[i] <-ifelse(is.numeric(w), w, NA)
}
as.integer(f2)
}
aux <- function(object){
if(object == "esir"){
object <- paste(c("SEJ\u00d1OR",
"8E\u00d1OR",
"~E~WR",
"SE~OR",
"S~OR",
"SEROR",
"SMOR",
"SE:ROR",
"SEJ.'ilOR",
"SElilOR",
"SEJ'ilOR",
"SEl'ilOR",
"SE:\u00d1OR",
"SEJ\u00d1OR",
"S\u00d1~OR",
"S@OR",
"SE:l'tOR",
"SEJS'OR",
"SUOB",
"SENOR",
"SE:f.tOR",
"SEI'\u00ed\"OR",
"SEJ.~OR",
"SEI'lOR",
"SJ.~OR",
"SEl~OR",
"SE'\u00d1OR",
"SE~JOR",
"SEf~OR",
"SEtlOR",
"SEf.JOR",
"SEfjO\u00edl",
"SEfJOR",
"SEl\\IOR",
"SEtilOR",
"SEl\\\\IOR",
"SEl\\\\JOR",
"SEl'JOR",
"DiSE\u00d1OR",
"SEfilOR",
"Sel'ior",
"SEi\\\\.IOR",
"SEi\\\\JOR",
"SEl\"JOR",
"SEJli.lOR",
"SE\"'OR",
"SEJ\\\\IOR",
"SEt\\\\i\u00f3R",
"SEl\'.IOR",
"SEIQOR",
"SEl\'IOR",
"SEt\\IOR",
"SEl'i.IOR",
"SEl'JOR",
"SEI'JOR",
"SEJ\\)j'OR",
"SEf'IOR",
"SEJ\\\\.IOR",
"SEt:JOR",
"SEl\"ilOR",
"SEl\"IOR",
"SE\\!\\\\JOR",
"SE!ilCF",
"SEf;IOR",
"SEISIOR",
"Sl;\u00d1OR",
"SEAOR",
"SEf.ilOR",
"SE1'10R",
"SEFIDR",
"SEt\\\\IOR",
"SEt\\\\10",
"SEf.IOR",
"SEl'iJOR",
"SEf:.IOR",
"SEJ\\\\JOR",
"SEf110R",
"SEl\\\\IQR",
"SEl\\\\.IOR",
"SErilOR",
"SE\u00d1.OR",
"SE!'JOR",
"SEl'\u00ed\\!OA",
"SEici\\!OR",
"SEl\"lOR",
"SEi\\\\lOR",
"SEl\\\\jOR",
"SERcR",
"SEF40R",
"seiQOR",
"SEtl:IOR",
"SEl\\\\iOR",
"SEf'IOR",
"SEli\u00edOR",
"SEKIOR",
"SEf;IQR",
"SE;tilOR",
"SEl'\\\\JOR",
"SEl'iilOR",
"SE\"10R",
"SEJli.IOR",
"SEf;iOR",
"SEOOR",
"sElilOR",
"SEF:IOR",
"~OR",
"SE~CR",
"S~OO",
"SEfJOO",
"SEFJOR",
"5ERCR",
"Se:F:lOR",
"SERCR",
"SEfilCR",
"SERDR",
"SEACR",
"SElilCR",
"SERQR",
"sEROR",
"SEFIOR",
"soberSa:lCR",
"SEfi\\!CR",
"SER00",
"SE\u00d1OO",
"SERcF",
"SERoR",
"SEfilOO",
"SEFICR",
"srnCR",
"SEFICR",
"SEFICR",
"SEfiIDR",
"SE\u00d1CR",
"srnOR",
"SEFlOR",
"SEf;\u00edOR",
"SEFlOR",
"SEf:loR",
"SEru:R",
"SERt:R",
"SEl'\\!CR",
"SEf:\u00edCR",
"SEFKR",
"sERCR",
"Srnffi",
"SEF\\!ffi",
"SElllOR",
"SERclR",
"SElllCR",
"SEl'lOR",
"SEfilffi",
"SER~",
"SEF'.lffi",
"SENDR",
"~ENOR",
"SEfKJR",
"SHIOR",
"SEf.\u00edOR",
"SEf:ICJf",
"SEflOR",
"SEr:.no",
"SEFillR",
"SEf:ICR",
"Sf\u00d1OR",
"s~rn",
"SEfiicF",
"SE\u00d1ffi",
"SEli\u00edrn",
"SEfiiOR",
"SEf:lrn",
"SEli\u00edCR",
"SEli\u00edCR",
"SEf;lffi",
"sEFJOR",
"SEFlCR",
"SEli\u00edCF",
"SEf:lOR",
"SER0R",
"SE\u00d1CJ",
"SEF30R",
"SEfllOR",
"SEFJCIR",
"SEf'lff",
"SElilc:R",
"SEf.lCR",
"SEf.1CR",
"SEf'.lOR",
"SE\u00edlOR",
"srnoR",
"SEf.ioR",
"SE\\!ilOR",
"SEF4QR",
"SERffi",
"~ FREY",
"SEb",
"SEfIDR",
"SeF\u00edor",
"SEl\\\\lOR",
"SE\u00d1\u00faR",
"SEf\u00edOR",
"SE\u00d1LA",
"SEROO",
"SEFJDR",
"SEFat",
"SEfiKJR",
"SE\\(i:l\\(\\]R",
"SEf;\\(\\)R",
"SE\\!irJR",
"SEliiOR",
"SEf.lOR",
"StFlOR",
"St~",
"SEIQOA",
"SEFloR",
"SEROFI",
"SEFaelR",
"S\\[SE\u00d1OR",
"~Ef'.JOR",
"SrnDR",
"SEJ!itOR",
"SE:&OR",
"SI!;\u00d1OR",
"SE:\u00d1'OR",
"SE\u00edi\u00ed\"OR",
"SE1il'OR",
"SEiil'OR",
"SEl'll\"OR",
"SDOR",
"SElil'OR",
"SE:f\u00edrOR",
"SEilOR",
"SEJI:lOR",
"SE:f.iOR",
"SEI'l'OR",
"SEI\\\\fOR",
"SimOR",
"SE:filOR",
"SEl\\\\!OR",
"SE'lllOR",
"SEJ:ItOR",
"SEl'tOR", "SElliOR",
"8!:-..\u00d1OR",
"SE:l.ll'OR",
"SEfiOR",
"SE'fi'tOR",
"SE&OR",
"SEl'i:lOR",
"SE:r::tOR",
"SEl'\u00ed'OR",
"SEiitOR",
"SE:AOR",
"SEftOR",
"SE:FJOR",
"SE1i\u00edOR",
"SE:fil\"OR",
"SE:fil'OR",
"SmOR",
"SF\u00d1OR",
"S:EaOR",
"tSE\u00d1OR",
"SEl'\u00edOR",
"SE1iiOR",
"SFmOR",
"SE\u00ediJOR",
"SE1tOR",
"SE:\u00d1OR",
"SERo:a",
"SE:flrOR",
"SE:IilOR",
"SEA'OR","SE~",
",SE:&OR",
"SE\"&OR",
"SE:fiOR",
"SEI'tOR",
"SE'&OR",
"SElitOR",
"s~oR",
"SE.fjOR",
"SE::lQ'OR",
"SdOR",
"S~",
"SE:ftOR",
"SEI'!OR",
"SEI'IOR",
"smoR",
"SE&QR",
"SE\u00d1QR",
"SE:\u00edi\u00ed'OR",
"SEt\\\u00edOR",
"SI \u00d1OR",
"SE \u00d1OR",
#"ÑOR",
"\u00d1OR"
),
collapse = "|")
}
if(object == "meses"){
object <- paste(c("ENERO",
"FEBRERO",
"MARZO",
"ABRIL",
"MAYO",
"JUNIO",
"JULIO",
"AGOSTO",
"SETIEMBRE",
"OCTUBRE",
"NOVIEMBRE",
"DICIEMBRE",
"SEPTIEMBRE",
"ENER",
"FEBR"),
collapse = "|")
}
if(object == "eleg"){
object <- "^(PRES|PESI|ESIDEN|SIDENTE|RESIDENT|SENAD|REPRE|SECRE|MINIS|LEGIS|PROSE|SUBSEC|DIPUTAD"
}
if(object == "chamb"){
object <- c("ASAMBLEA GENERAL",
"CAMARA DE SENADORES",
"CAMARA DE REPRESENTANTES",
"COMISION PERMANENTE",
"GENERAL", "SENADORES",
"REPRESENTANTES",
"PERMANENTE")
}
object
}
prospow <- function(file, add.error.sir = NULL){
esir <- aux("esir")
if(!is.null(add.error.sir)){
esir <- paste0(esir, "|", paste0(add.error.sir, collapse = "|"), collapse = "")
}
text0 <- pdfSIR(file)
text <- preComp(text0[["text"]], esir = esir)
text1 <- text0[[1]]
list(text, text1)
}
speech.pow <- function(file, add.error.sir = NULL, rm.error.leg = NULL, compiler = FALSE,
quality = FALSE, param = list(char = 6500, drop.page = 2), nominate = FALSE){
esir <- aux("esir")
if(!is.null(add.error.sir)){
esir <- paste0(esir, "|", paste0(add.error.sir, collapse = "|"), collapse = "")
}
text0 <- pdfSIR(file)
text <- preComp(text0[["text"]], esir = esir)
text1 <- text0[[1]]
clave <- stringr::str_which(text, "^SE\u00d1OR")
clave <- clave[clave > 300]
ident <- sub("([^.]+)\\.[[:alnum:]]+$", "\\1", basename(file))
if(length(clave) <= 1){
warning(paste("The document", ident,"does not contain interventions by legislators or it is not possible to recognize them in the text. \n"), call. = FALSE)
}else{
clave <- c(clave, length(text))
vec_speech <- character(length(clave)-1)
for(i in 1:(length(clave)-1)){
vec_speech[i] <- paste(text[clave[i]:(clave[i+1]-1)], collapse = " ")
}
## legis name --------------------------------------------------
vec_speech2 <- chartr('\u00e1\u00e9\u00ed\u00f3\u00fa','aeiou', vec_speech)
legis <- substring(vec_speech2, 1, regexpr("[a-z]|[A-Z]+\\s+[a-z]", vec_speech2) -2) %>%
stringr::str_remove_all(pattern = "(SE\u00d1ORES|SE\u00d1ORA|SE\u00d1OR)")
#empt <- setdiff(1:length(legis), stringr::str_which(legis, "[:alnum:]"))
empt <- which(nchar(legis) <= 2)
if(length(empt)!=0){
for(i in 1:length(empt)){
legis[empt[i]] <- substring(vec_speech[empt[i]], 1, 20)
}
}
legis <- legis %>%
stringr::str_remove_all(pattern = "(SE\u00d1ORES|SE\u00d1ORA|SE\u00d1OR)") %>%
stringr::str_remove_all(pattern = "[^[:alnum:][:blank:]?/\\:-]") %>%
stringr::str_remove_all(pattern = "[^A-z\u00d1\u00F1. ]") %>% ### check A-Z or A-z
stringr::str_squish()
## date --------------------------------------------------------
meses <- aux("meses")
fe <- stringr::str_which(toupper(text1), pattern = meses)
if(length(fe) != 0){
suppressWarnings(
fdiario <- c(text1[fe[1]-2], text1[fe[1]], substring(text1[fe[1]+2], 1, 4)) %>%
paste(., collapse = " ") %>%
stringr::str_squish()%>%
lubridate::parse_date_time(order = "dmy")
)
fdiario <- as.Date(fdiario)
}else{
fdiario <- NA
}
## chamber -----------------------------------------------------
chamb <- aux("chamb")
cha <- text1 %>% chartr('\u00c1\u00c9\u00cd\u00d3\u00da','AEIOU',.) %>% paste(collapse = " ") %>%
gsub(pattern = " ", replacement = " ", .)
cha <- chamb[stringr::str_which(cha, chamb)[1]] %>% chamber_fit()
## legislature -------------------------------------------------
text2 <- tibble::tibble(
legislator = chartr('\u00c1\u00c9\u00cd\u00d3\u00da','AEIOU', legis),
speech = vec_speech,
chamber = cha,
date = fdiario
)
if(!is.na(fdiario)){
text2$legislature <- legislature(text2, name_var_date = "date")
}else{
text2$legislature <- NA_integer_
}
text2$id <- ident
eleg <- aux("eleg")
if(!is.null(rm.error.leg)){
eleg <- paste0(eleg, "|", paste0(rm.error.leg, collapse = "|"), ")")
}else{
eleg <- paste0(eleg, ")")
}
## rollcall ----------------------------------------------------
if(!nominate){
text2 <- text2[-c(stringr::str_which(toupper(text2$legislator), toupper(eleg))),]
}
text2 <- text2[nchar(text2$legislator) < 25 ,] # nombres muy largos
text2 <- text2[stringr::str_detect(text2$speech, "[a-z]"),]
if(nrow(text2)==0L){
warning(paste("The document", ident,"only contains interventions by the president. \n"), call. = FALSE)
}
## class for compiler ---------------------------------
rmhead <- header(file = file)
text2$speech <- stringr::str_replace_all(text2$speech, "\\s{2,}", " ")
for(i in seq_along(rmhead)){text2$speech <- gsub(x = text2$speech, pattern = rmhead[i], replacement = "", fixed = TRUE)}
if(quality){
if(nrow(text2) != 0L){
n1 <- sum(nchar(text)) + length(text)
index_1 <- (param$char * (length(rmhead) - param$drop.page))/n1
index_2 <- sum(nchar(text2$speech))/n1
text2$index_1 <- round(index_1, 2)
text2$index_2 <- round(index_2, 2)
}
}
text2 <- text2 %>% dplyr::filter('legislator' != "")
text2$speech <- stringr::str_replace(text2$speech, "\\.", "\\. ")
class(text2) <- c(attributes(text2)$class, "puy")
return(text2)
}
}
compiler <- function(tidy_speech, compiler_by = character()){
vars <- match(compiler_by, names(tidy_speech))
if(length(vars) == 0L){
stop("The variables of 'tidy speech' do not match those of 'compiler_by'", call. = FALSE)
}
if(length(vars) != length(compiler_by)){
warning("Not all the variables of 'compiler_by' are in 'tidy speech'", call. = FALSE)
}
war <- sapply(tidy_speech[, compiler_by], FUN = function(x){any(is.na(x))})
war <- names(war)[war == TRUE]
if(length(war) > 0){
warning(paste("Variables that are in 'compiler_by' contain NA values:", paste(war, collapse = ", ")), call. = FALSE)
}
vars <- compiler_by[!compiler_by %in% war] #:-->*
tidy_speech$rec <- apply(tidy_speech[,vars], 1, paste, collapse = "__") #:-->*
ts_out <- tidy_speech %>% base::split(.$rec)
cby <- tibble::tibble('varid' = names(ts_out))
out <- ts_out %>%
purrr::map(.,function(x){
tibble::tibble(speech = paste(x$speech, collapse = " "))
}) %>%
dplyr::bind_rows() %>%
dplyr::bind_cols(cby,.) %>%
tidyr::separate('varid', into = vars, sep = "__")##
if("legislature" %in% vars){ #:-->*
out$legislature <- as.integer(out$legislature)
}
if("date" %in% vars){ #:-->*
if(is.character(tidy_speech$date)){out$date <- NA}
out$date <- as.Date(out$date)
}
if(length(war) > 0){ #:-->*
for(i in 1:length(war)){out[, war[i]] <- NA} #:-->*
out <- out[, c(compiler_by, "speech")] #:-->*
}
if("index_1" %in% names(tidy_speech)){
out$index_1 <- unique(tidy_speech$index_1)
out$index_2 <- unique(tidy_speech$index_2)
}
class(out) <- c(attributes(out)$class, "puy")
invisible(
SError(
add_sex(
clean_t(
out))))
}
chamber_fit <- function(chamber){
problem <- aux("chamb")[5:8]
correct <- aux("chamb")[1:4]
ubic <- which(problem %in% chamber)
chamber <- ifelse(length(ubic) > 0, correct[ubic], chamber)
if(is.logical(chamber)){chamber <- NA}
return(chamber)
}
header <- function(file){
fdoc <- tm::readPDF("pdftools")
fdoc <- fdoc(elem = list(uri = file), language = "spanish")$content
fdoc <- fdoc %>%
strsplit(., split = "\\r") %>%
lapply(., "[", 1) %>%
stringr::str_replace_all(pattern = "\\s{2,}", replacement = " ") %>%
stringr::str_replace_all(pattern = "- ", replacement = "") %>%
stringr::str_replace_all(pattern = "-|--|<|>", replacement = "")%>%
chartr('\u00c1\u00c9\u00cd\u00d3\u00da','AEIOU',.) %>%
stringr::str_squish() %>%
.[stats::complete.cases(.)] %>%
.[nchar(.) > 10]
}
separate_sir <- function(vec){
detect <- unlist(stringr::str_extract_all(string = vec, pattern = "[[:alnum:][:punct:]]{1,}SE\u00d1OR"))
if(length(detect) > 0){
vec_corte <- regexpr("[[:alnum:][:punct:]]SE\u00d1OR", detect)
for(i in 1:length(detect)){
vec <- gsub(
x = vec,
pattern = detect[i],
replacement = paste(substr(x = detect[i], start = 1, stop = vec_corte[i]), "SE\u00d1OR"),
fixed = TRUE
)
}
}
vec
}
add_sex <- function(data){
data$sex <- ifelse(stringr::str_detect(data$speech, pattern = "^SE\u00d1ORA"), 0, 1)
data[stringr::str_which(data$speech, pattern = "SE\u00d1ORALE"), "sex"] <- 1
data
}
clean_t <- function(x){
if("legislature" %in% names(x)){
if(is.na(x$legislature[1])){
x$legislature <- NA_integer_
}
}
if("chamber" %in% names(x)){
if(is.na(x$chamber[1])){
x$chamber <- NA_character_
}
}
if("date" %in% names(x)){
if(is.na(x$date[1])){
x$date <- as.Date(x$date)
}
}
if("id" %in% names(x)){
if(is.na(x$id[1])){
x$id <- NA_character_
}
}
x$speech <- stringr::str_squish(x$speech)
x
}
test_date <- function(from, to, legislature){
legislaturas$interval <- lubridate::interval(legislaturas$fecha_inicio, legislaturas$fecha_fin)
desde <- which(lubridate::dmy(from) %within% legislaturas$interval)
hasta <- which(lubridate::dmy(to) %within% legislaturas$interval)
if(sum(desde, hasta) / 2 == legislature) FALSE else TRUE
}
urlp <- function(step){
u <- list(
step1 = "https://parlamento.gub.uy/documentosyleyes/documentos/diarios-de-sesion?Cpo_Codigo_2=",
step2 = "&Lgl_Nro=",
step3 = "&DS_Fecha%5Bmin%5D%5Bdate%5D=",
step4 = "&DS_Fecha%5Bmax%5D%5Bdate%5D=",
step5 = "&Ssn_Nro=&TS_Diario=&tipoBusqueda=T&Texto=&page="
)
u[[step]]
}
proto_url <- function(chamber, legislature, from, to){
paginas <- as.character(c(0:20))
url <- purrr::map(paginas,~ paste0(urlp(1),
chamber,
urlp(2),
legislature,
urlp(3),
from,
urlp(4),
to,
urlp(5), .)) %>%
unlist() %>%
purrr::map(~ .x %>%
rvest::read_html() %>%
rvest::html_nodes(".views-field-DS-File-IMG a") %>%
rvest::html_attr("href") %>%
purrr::map(~ paste0("https://parlamento.gub.uy", .))) %>%
unlist()
url
}
parseo <- function(x){
paste(substring(x, 9, 10), substring(x, 6, 7), substring(x, 1, 4), sep = "-")
}
fechas_legis <- function(from, to){
periodo <- lubridate::as_date(lubridate::dmy(from):lubridate::dmy(to))
lista <- list()
for(i in 1:nrow(legislaturas)){
lista[[paste(i)]] <- lubridate::as_date(lubridate::ymd(legislaturas$fecha_inicio[i]):lubridate::ymd(legislaturas$fecha_fin[i]))
}
dat <- data.frame(
legis = rep(1:nrow(legislaturas), lengths(lista)),
fechas = lubridate::as_date(unlist(lista))
)
dat[which(dat$fechas %in% periodo),] %>% split(., f = .$legis) %>% lapply(., function(x) range(x$fechas))
}
urls.out <- function(chamber, from, to){
param <- fechas_legis(from, to)
out <- list()
for(i in 1:length(param)){
out[[i]] <- proto_url(chamber = chamber,
legislature = names(param)[i],
from = parseo(param[[i]][1]),
to = parseo(param[[i]][2]))
}
unlist(out)
}
uncompiler <- function(data){
dat <- data
comp <- base::split(dat, dat$id)
diarios <- length(comp)
unc <- list()
for(i in 1:diarios){
ud <- comp[[i]]
l <- strsplit(ud$speech, "(SE\u00d1OR|SE\u00d1ORA)")
largo <- lengths(l)
out <- tibble::tibble(
legislator = rep(ud$legislator, largo),
legislature = unique(ud$legislature)[1],
chamber = unique(ud$chamber)[1],
date = unique(ud$date)[1],
id = unique(ud$id)[1],
speech = unlist(l),
sex = rep(ud$sex, largo)
)
out <- out[nchar(out$speech) >= 2,]
unc[[i]] <- out
return(unc)
}
}
pdfSIR <- function(file){
fdoc <- tm::readPDF("pdftools")
fdoc <- fdoc(elem = list(uri = file), language = "spanish")$content
a <- strsplit(fdoc, "\n")
text <- lapply(a, "[", -1)
text <- text[lengths(text) > 0L]
list(
date_cham = unlist(strsplit(a[[1]], " " )),
text = text
)
}
extract_page <- function(text, umbral = ceiling(max(nchar(text))/2)){
nch <- nchar(text)
vec1 <- character()
vec2 <- character()
for(i in 1:length(text)){
if(nch[i] > umbral){
vec1[i] <- substring(text[i], 1, umbral)
vec2[i] <- substring(text[i], umbral+1, nch[i])
} else{
vec1[i] <- text[i]
vec2[i] <- " "
}
}
vec3 <- c(vec1, vec2)
vec3 <- stringr::str_replace_all(vec3, pattern = "\\s{2,}", " ") %>% stringr::str_squish()
vec3 <- paste(vec3[nchar(vec3) > 0L], collapse = " ")
vec3 <- stringr::str_replace_all(vec3, "- ", "")
vec3 <- stringr::str_replace_all(vec3, "-", "")
return(vec3)
}
preComp <- function(text, esir){
out <- unlist(lapply(X = text, FUN = extract_page))
# out <- stringr::str_replace_all(out, pattern = "[[:punct:]]", replacement = "")
out <- out %>%
stringr::str_replace_all(pattern = "\\r\\n|\\r\\t|\\t", replacement = " ") %>%
stringr::str_replace_all(pattern = " ", replacement = " ") %>%
stringr::str_replace_all(pattern = "- ", replacement = "") %>%
stringr::str_replace_all(pattern = esir, replacement = "SE\u00d1OR") %>%
stringr::str_replace_all(pattern = ":rlr|&|:&|!it|:\u00a1q'|iQ'", replacement = "\u00d1") %>%
stringr::str_replace_all(pattern = "-|--|<|>", replacement = "") %>%
separate_sir() %>%
strsplit(split = " ") %>%
unlist() %>%
chartr('\u00c1\u00c9\u00cd\u00d3\u00da','AEIOU',.) %>%
stringr::str_squish() %>%
.[nchar(.) >= 1L]
out
}
nominal <- function(z){
stringr::str_detect(string = z, pattern = "Se toma en el siguiente orden")
}
nom_detect <- function(sn){
nu <- nominal(sn)
if(sum(nu) != 0){
which(nu)
} else{
stop("No roll-call vote detected.", call. = FALSE)
}
}
SError <- function(zj){
zj$speech <- stringr::str_remove_all(string =zj$speech, pattern = "(.SE| SE)")
ale <- stringr::str_which(zj$speech, pattern = "SE\u00d1ORALE")
if(length(ale) > 0){
zj$legislator[ale] <- "SE\u00d1ORALE"
zj$speech[ale] <- paste("SE\u00d1OR", zj$speech[ale])
#zj$sex[ale] <- 1
}
zj
}
speech_pow_rc <- function(textorc){
texto <- textorc #### DATA
nd <- nom_detect(texto$speech)
nf <- stringr::str_which(
string = texto$speech,
pattern = "(Dese cuenta del resultado|Han votado)"
)
votaciones <- list()
for(i in 1:length(nd)){
votaciones[[i]] <- texto[nd[i]:nf[i],]
votaciones[[i]]$rollcall <- i
votaciones[[i]]$vote1 <- NA
votaciones[[i]]$vote1[stringr::str_detect(votaciones[[i]]$speech, pattern = "(Afirmativ|Por la afirma)")] <- "Afirmativo"
votaciones[[i]]$vote1[stringr::str_detect(votaciones[[i]]$speech, pattern = "(Negativ|Por la negat)")] <- "Negativo"
votaciones[[i]]$vote <- ifelse(votaciones[[i]]$vote1 == "Afirmativo", 1, 0)
votaciones[[i]] <- votaciones[[i]][-!is.na(votaciones[[i]]$vote1), ]
votaciones[[i]]$argument <- ifelse(stringr::str_detect(votaciones[[i]]$speech, pattern = "funda"), 1, 0)
votaciones[[i]] <- votaciones[[i]][,c("legislator",
"vote",
"argument",
"speech",
"chamber",
"date",
"legislature",
"rollcall",
"id")]
}
vv <- do.call("rbind", votaciones)
vv <- assignC(SError(vv))
return(vv)
}
assignC <- function(.X){
class(.X) <- c("nominal", class(.X))
return(.X)
}
cmabSUMM <- function(dd){
ch <- dd
chamb <- character()
for(i in 1:length(ch)){
if(ch[i] == "ASAMBLEA GENERAL"){chamb[i] <- "AG"}
if(ch[i] == "CAMARA DE SENADORESL"){chamb[i] <- "CSS"}
if(ch[i] == "CAMARA DE REPRESENTANTES"){chamb[i] <- "CRR"}
if(ch[i] == "COMISION PERMANENTE"){chamb[i] <- "CP"}
if(is.na(ch[i])){chamb[i] <- NA_character_}
}
chamb
#dd$Chamber <- chamb
#dd <- dd[,c(ncol(dd), 1:(ncol(dd)-1))]
}
table_rollcall_vote <- function(dat){
foo <-
dat %>%
group_by(rollcall, date) %>%
summarise(Legislators = n(),
Affirmative = sum(vote),
Negative = Legislators - Affirmative,
prop_AF = round((Affirmative/Legislators)*100, 2),
prop_NG = round(100-prop_AF,2),
prop_women = round(((Legislators - sum(sex, na.rm = T))/Legislators)*100,2),
prop_arg = round((sum(argument)/Legislators)*100,2),
Date = unique(date),
rc = unique(rollcall),
Chamber = cmabSUMM(chamber)[1]) %>%
ungroup() %>%
select(Chamber,
Date,
Legislators,
Affirmative,
Negative,
prop_AF,
prop_NG,
prop_women,
prop_arg,
rc)
foo
}
### parse id
# If the identifier (id) has more than 20 characters it will be compressed to 20.
# The same if the id is the same for multiple files. In the latter case a new id will
# be generated in the form 'text_01'...'text_n'.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.