#' @title Legiscan Bill Text Data
#' @description Parses and arranges JSON output from
#' Legiscan master data downloads bills subdirectory
#' @param fileobject A JSON file object from the bills subdirectory
#' @return Creates a list object containing a data frame
#' for the progress of the bill
#' @examples \donttest{
#' # Create directory object
#' directoryTree <- fileStructure("data/msHistoricalJSON/")
#'
#' # Create file list object
#' files <- fileLists(directoryTree)
#'
#' # Parse/clean the full bill text data from a LegiScan bill file
#' theBillText <- legiBillTxt(files[["bills"]][[10]][[12]])
#' }
#'
#' @importFrom RJSONIO fromJSON
#' @importFrom lubridate ymd
#' @importFrom plyr llply ldply
#' @importFrom dplyr bind_cols bind_rows
#' @importFrom magrittr %>%
#' @importFrom httr http_status GET
#' @importFrom XML htmlParse xmlValue xpathApply
#' @export legiBillTxt
#' @family Parsing and Cleaning LegiScan Data
#' @name legiBillTxt
legiBillTxt <- function(fileobject) {
# Parse the JSON object
billobject <- billdata(eval(fileobject))
# Named list of ID elements
IDs <- billids(billobject)
# Add the ID columns to the data frames and fill the required
# number of records to rectangularize the data frame
billTxt <- as.data.frame(dplyr::bind_cols(IDs[rep(seq_len(nrow(IDs)),
nrow(billobject[["texts"]])), ],
billobject[["texts"]]), stringsAsFactors = FALSE)
# Generate a list of all of the state link elements
linkLists <- as.list(billTxt$state_link)
# Name the elements of the link list
names(linkLists) <- c(billobject[["bill_number"]], billobject[["bill_number"]])
# Retrieve, parse, and clean the text of the bills
cleanText <- plyr::llply(linkLists, FUN = function(links) {
# Test the HTTP response status from the URL
if (httr::http_status(httr::GET(links))$message == "success: (200) OK") {
# If valid URL parse the HTML (strip all paragraph tags)
paste(XML::xpathApply(XML::htmlParse(links),
"//p", XML::xmlValue), collapse = "\n")
} else {
# Print equivalent of "error" message to the object
list(c("drop me"), c("Error loading the bill text"))
}
#tryCatch(paste(xpathApply(htmlParse(links),
# "//p", xmlValue), collapse = "\n"),
# error = function(e) {
# list(c("drop me"),
# c("Error loading the bill text"))
# })
})
# Create data table with the text data
fullText <- plyr::ldply(cleanText, dplyr::bind_rows)
# Convert text back to character vector
fullText[, 2] <- toString(fullText[, 2])
# Assign a name to the cleaned full text data table
names(fullText) <- c("drop", "full_bill_text")
# Remove the ID column generated by dplyr
fullText <- fullText[, 2]
# Add the full text of the bill to the other bill text data
billText <- as.data.frame(dplyr::bind_cols(billTxt, full_bill_text = fullText),
stringsAsFactors = FALSE)
# Convert the text of the legislation back to string data
billText$full_bill_text <- toString(billText$full_bill_text)
# Return the parsed/cleaned object
return(billText)
} # End of Function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.