## Should be all element names (unless they are also attribute names!), since we only want attribute names
## (alternately we should define to only grab possible attribute names..)
SKIP = c("meta", "children", "member", "row", "cell", "seq", "matrix", "format", "names")
#' get_level
#'
#' get a data.frame of attribute values of a given node
#'
#' @param nex a nexml object
#' @param level a character vector indicating the class of node, see details
#'
#' @return Returns the attributes of specified class of nodes as a data.frame
#'
#' @details level should be a character vector giving the path to the specified node
#' group. For instance, `otus`, `characters`, and `trees` are top-level blocks (e.g.
#' child nodes of the root nexml block), and can be specified directly. To get metadata
#' for all "char" elements from all characters blocks, you must specify that `char` nodes
#' are child nodes to `character` nodes: e.g. `get_level(nex, "characters/char")`,
#' or similarly for states: `get_level(nex, characters/states)`.
#'
#' The return object is a data frame whose columns are the attribute names of the elements
#' specified. The column names match the attribute names except for "id" attribute, for which the column
#' is renamed using the node itself. (Thus `<otus id="os2">` would be rendered in a data.frame with column
#' called "otus" instead of "id"). Additional columns are
#' added for each parent element in the path; e.g. `get_level(nex, "otus/otu")` would include a column
#' named "otus" with the id of each otus block. Even though the method always returns the data frame
#' for all matching nodes in all blocks, these ids let you see which otu values came from which
#' otus block. This is identical to the function call `get_taxa()`.
#' Similarly, `get_level(nex, "otus/otu/meta")` would return additional columns 'otus' and
#' also a column, 'otu', with the otu parent ids of each metadata block. (This is identical to a
#' function call to `get_metadata`). This makes it easier to join data.frames as well, see examples
#'
#' @export
#' @importFrom dplyr select
get_level <- function(nex, level){
lvl <- strsplit(level, "/")[[1]]
out <- recursion(1, lvl)(nex) %>%
dplyr::select(!"nexml")
## drop columns that are all-na?
# all_na <- sapply(out, function(x) all(is.na(x)))
# out <- out[!all_na]
out
}
## Trick to apply nodelist_to_df iteratively
closure <- function(level, fun)
function(node) nodelist_to_df(node, level, fun)
recursion <- function(i, level){
if(i < length(level))
closure(level[i], recursion(i+1, level))
else
closure(level[i], attributes_to_row)
}
## Assumes slot(node, element) is a list
#' @importFrom dplyr bind_rows coalesce mutate %>%
#' @importFrom rlang :=
#' @importFrom uuid UUIDgenerate
nodelist_to_df <- function(node, element, fn, nodeId=NA){
idRefCol <- idRefColName(node)
if (is.na(nodeId))
nodeId <- node_id(node)
nodelist <- slot(node, element)
if(is.list(nodelist)){ ## node has a list of elements
out <- suppressWarnings(lapply(nodelist, fn)) %>% dplyr::bind_rows()
if (length(nodelist) > 0 && all(sapply(nodelist, is, "meta"))) {
# meta elements may have nested meta elements, retrieve these here too
ids <- sapply(nodelist,
function(n) if (length(n@children) > 0) uuid::UUIDgenerate() else NA)
if (length(ids) > 0 && any(!is.na(ids))) {
mout <- dplyr::mutate(out, "Meta" = coalesce_(out$LiteralMeta,
out$ResourceMeta,
ids))
nested <- mapply(function(n, id) nodelist_to_df(n, "children", fn, id),
n = nodelist,
id = mout[,"Meta"])
if (is.null(names(nested))) nested <- as.data.frame(nested[,1])
out <- dplyr::bind_rows(mout, unname(nested))
}
}
dplyr::mutate(out, "{idRefCol}" := {{ nodeId }}) -> out
} else { ## handle case when node has only one element
fn(nodelist) %>%
dplyr::mutate("{idRefCol}" := {{ nodeId }})
}
}
node_id <- function(node){
if("id" %in% slotNames(node))
slot(node, "id")
else
"root"
}
idRefColName <- function(node){
clname <- class(node)
super <- names(getClass(clname)@contains)
# meta elements can be nested, avoid clobbering the ID column with the IDREF
if (length(super) > 0 && (super[1] == "meta"))
super[1]
else
clname
}
#' @importFrom rlang :=
#' @importFrom dplyr rename
attributes_to_row <- function(node){
who <- slotNames(node)
## Avoid things that are not attributes:
types <- getSlots(class(node)) # actual slot values may be derived types
who <- who[ types %in% c("character", "integer", "numeric", "logical") ]
# the "about" attribute is only needed for RDF extraction
who <- who[!(who %in% c("names","about"))]
## Extract attributes, use NAs for numeric(0) / character(0) values
tmp <- sapply(who, function(x) slot(node, x))
tmp[sapply(tmp,length) < 1] <- NA
## Coerce into a row of a data.frame & rename id column to match class
out <- data.frame(as.list(tmp), stringsAsFactors=FALSE)
if("id" %in% who)
out <- dplyr::rename(out, "{class(node)}" := "id")
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.