#' @param x An R object to coerce to numeric.
#' @value Returns the value of `as.numeric(x)` if the call to as.numeric does not throw an error, otherwise returns `NA`.
.numericOrNA < function(x)
tryCatch(as.numeric(x), error = NA)
#' @param x A single "xml_node" object holding a single HTML <tr> row.
#' @param i The single integer specifying the <td> cell index from argument `x` to extract.
#' @value Character
.textAtIdx <- function(x, i, j)
xml_text(xml_find_one(x, sprintf('//td[%s]', i, j)))
#' @param res A single "xml_node" object holding the scraped Kannapedia HTML for a single RSP identifier (single strain).
#' @details Signature for the `res` argument: `assert(inherits(res, 'html_node') | inherits(res, 'xml_node')))`
.doExtract <- function(res = NULL) {
if (is.null(res))
return(
list(
chemprofile = c(
cannabinoids = NA,
terpenoids = NA
),
pctheterozy = NA,
gendistance = NA
)
)
.getP <- function(block)
.numericOrNA(xml_text(block))
.getG <- function(block)
lapply(
xml_find_all(block, '//tbody/tr[position()>1]'),
function(tr) {
list(
position = .textAtTd(tr, 1) %>% .numericOrNA,
distance = .textAtTd(tr, 3) %>% .numericOrNA,
relative = .textAtTd(tr, 2) %>% .numericOrNA
)
}
)
.getC <- function(block)
list(
'THC/A' = xml_find_one(block, '//p/strong[2]/em') %>% .numericOrNA,
'THCV/A' = xml_find_one(block, '//p/strong[4]/em') %>% .numericOrNA,
'CBD/A' = xml_find_one(block, '//p/strong[3]/em') %>% .numericOrNA,
'CBN/A' = xml_find_one(block, '//p/strong[7]/em') %>% .numericOrNA,
'CBC/A' = xml_find_one(block, '//p/strong[5]/em') %>% .numericOrNA,
'CBG/A' = xml_find_one(block, '//p/strong[6]/em') %>% .numericOrNA
)
.getT <- function(block)
list(
'alpha-Bisabolol' = xml_find_one(block,'//strong[1]') %>% .numericOrNA,
'Borneol' = xml_find_one(block,'//strong[2]') %>% .numericOrNA,
'Camphene' = xml_find_one(block,'//strong[3]') %>% .numericOrNA,
'Carene' = xml_find_one(block,'//strong[4]') %>% .numericOrNA,
'Caryophyllene oxide' = xml_find_one(block,'//strong[5]') %>% .numericOrNA,
'beta-Carophyllene' = xml_find_one(block,'//strong[6]') %>% .numericOrNA,
'Fenchol' = xml_find_one(block,'//strong[7]') %>% .numericOrNA,
'Geraniol' = xml_find_one(block,'//strong[8]') %>% .numericOrNA,
'alpha-Humulene' = xml_find_one(block,'//strong[9]') %>% .numericOrNA,
'Limonene' = xml_find_one(block,'//strong[10]') %>% .numericOrNA,
'Linalool' = xml_find_one(block,'//strong[11]') %>% .numericOrNA,
'Myrcene' = xml_find_one(block,'//strong[1]') %>% .numericOrNA,
'alpha-Phellandrene' = xml_find_one(block,'//strong[2]') %>% .numericOrNA,
'Terpinolene' = xml_find_one(block,'//strong[3]') %>% .numericOrNA,
'alpha-Terpineol' = xml_find_one(block,'//strong[4]') %>% .numericOrNA,
'alpha-Terpinene' = xml_find_one(block,'//strong[5]') %>% .numericOrNA,
'gamma-Terpinene' = xml_find_one(block,'//strong[6]') %>% .numericOrNA,
'Total Nerolidol' = xml_find_one(block,'//strong[7]') %>% .numericOrNA,
'Total Ocimene' = xml_find_one(block,'//strong[8]') %>% .numericOrNA,
'alpha-Pinene' = xml_find_one(block,'//strong[9]') %>% .numericOrNA,
'beta-Pinene' = xml_find_one(block,'//strong[10]') %>% .numericOrNA
)
list(
chemprofile = c(
terpenoids = c(
xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[1]/div[1]/div[2]/ul/li[1]') %>% .getT,
xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[1]/div[1]/div[2]/ul/li[2]') %>% .getT
) %>% merge,
cannabinoids = xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[1]/div[1]/div[1]') %>% .getC,
),
pctheterozy = xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[1]/div[2]/div/strong[1]/em') %>% .getP,
gendistance = xml_find_first(content(res, 'parsed'), '//*[@id="x-section-2"]/div[2]/table') %>% .getG
)
}
#' @example `extractMetabolitesFromHTML(read_html(content("<...>", "parsed"), as_html = T))`
#' @export
extractPhenoFromHTML <-
function(html, callback = function(lab, val) c(label=lab, value=val), .doExtract)
{
.check <- function(htm)
list(
isHttr = (isHttr <- inherits(htm, 'response')),
isXml2 = (isXml2 <- inherits(htm, 'html_node') | inherits(htm, 'xml_node')),
isParsed = (isParsed <- any(isHttr, isXml2)),
isCharv = (isCharv <- !isParsed && inherits(htm, 'character')),
isFile = (isFile <- inherits(htm, 'file') | inherits(htm, 'connection')),
isPath = (isPath <- isCharv & !isFile && file.exists(htm)),
isTypeError = (isTypeError <- !any(isParsed, isFile, isCharv))
)
res <- .doExtract(NULL) # Initialize with NA's
if (.check(html)['isTypeError'])
stop('Argument "html" must inherit from one of the following classes: "response", "html_node", "xml_node", "connection", "file", or "character".')
if (.check(html)['isCharv'])
html <- read_html(html, as_html = T)
if (.check(html)['isPath'])
html <- file(html)
if (.check(html)['isFile'])
html <- read_html(html, as_html = T)
if (.check(html)['isParsed'])
if (.check(html)['isHttr'])
res <- content(html, 'parsed')
if (.check(html)['isXml2'])
res <- read_html(html, as_html = T)
.doExtract(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.