isScanned | R Documentation |
isScanned(doc, checkFuns = NULL, textNodeThreshold = 10)
doc |
|
checkFuns |
|
textNodeThreshold |
##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (doc, checkFuns = NULL, textNodeThreshold = 10)
{
if (is.character(doc))
doc = xmlParse(doc)
nodes = getNodeSet(doc, "//page/*[not(local-name(.) = 'img') and not(local-name(.) = 'fontspec') and not(local-name(.) = 'rect') and not(local-name(.) = 'line')]")
if (length(nodes) == 0)
return(c(NoText = TRUE))
isTropicalMedHyg = length(getNodeSet(doc, "//text[ contains(., 'The American Society of Tropical Medicine and Hygiene')]")) >
0
if (isTropicalMedHyg && sum(sapply(nodes, xmlName) == "text") >
30)
return(FALSE)
pg = getNodeSet(doc, "//page")
textWords = sapply(pg, isScannedPage, textNodeThreshold)
if (!any(textWords))
return(FALSE)
pageHasImg = sapply(pg, function(x) "img" %in% names(x))
if (all(textWords) && all(pageHasImg))
return(TRUE)
txt = lapply(pg, function(x) unique(getPageText(x)))
if (length(unique(unlist(txt))) == 1)
return(SameTextOnAllPages = TRUE)
if (!is.null(checkFuns)) {
if (is.function(checkFuns))
ans = checkFuns(doc)
else for (f in checkFuns) {
ans = f(doc)
if (ans)
break
}
if (ans)
return(ans)
}
if (length(pg) > 2 && length(unique(txt[-1])) == 1)
return(SameTextOnAllPagesExceptFirst = TRUE)
img = getNodeSet(doc, "//page/img")
names(img) = xpathSApply(doc, "//page/img", function(x) xmlGetAttr(xmlParent(x),
"number"))
if (length(pg) > 0 && (all(pageHasImg) || (length(pg) > 2) &&
all(pageHasImg[-c(1, length(pg))]))) {
y = sapply(img, imgSpansPage)
if (all(y))
return(ImagesSpanAllPages = TRUE)
byPage = tapply(y, names(img), any)
if (length(byPage) > 2 && all(byPage[-c(1, length(byPage))]))
return(c(TwoCoverPages = TRUE))
if (length(pg) == 2 && any(grepl("ResearchGate|Downloaded|JSTOR",
txt[[1]])) && textWords[2] & byPage[2])
return(OneCoverPage = TRUE)
}
FALSE
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.