#
# getOutcumes.R
#
# This script has functions for extracting outcomes from HTML generated by
# pdf2htmlEX.
getOutcomes =
#
# For each page, grab the outcomes.
#
# This won't get all of them if the outcomes span two pages, i.e. start on
# one and end on another. Let's see if this arises. We can see if the
# Actions/Services are on the same page. If not, then we need to go to the
# next page, etc. and find it.
#
#function(page, text = getAllText(xmlChildren(page)))
# Using the xpathSApply( ".//text") we split
function(page, text = paste(xpathSApply(page, ".//text", xmlValue), collapse = "\n"))
{
text = strsplit(text, "\\n")[[1]]
i = grep("^Expected Annual|Actions/Services", text)
if(length(i) < 2)
return(character())
ctnt = text[ (i[1] + 1) : (i[2] - 1) ]
# In some cases, the newline won't appear after the LCAP..... So remove it.
# But if we use xpathSApply to define text, it will.
ctnt = gsub("LCAP Year [0-9]+: [0-9]{4}-[0-9][0-9]", "", ctnt)
ctnt = ctnt[ ctnt != ""]
ctnt = ctnt [ - grep("^(Measurable |Outcomes:|LCAP Year |Page [0-9]+ of )", ctnt) ]
cleanOutcomes(ctnt)
}
cleanOutcomes =
#
# Clean up the 1.2.a etc. at the beginning of the outcomes. Set these as names.
#
function(txt)
{
nums = gsub("^([0-9]+\\.[0-9]+\\.([a-z]+\\.)?) .*", "\\1", txt)
vals = gsub("^[0-9]+\\.[0-9]+\\.([a-z]+\\.)? ", "", txt)
names(vals) = nums
vals
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.