#' @title Create a data.tree object to be used for aggregating or scaling
#' interval data
#'
#' @param data_intervals_dt \[`data.table()`\]\cr
#' Describes each interval present in the data. Includes a column for
#' '`{col_stem}`_start', '`{col_stem}`_end', and `col_stem`.
#' @param agg_intervals_dt \[`data.table()`\]\cr
#' Describes each interval that needs to be aggregated to. Includes a column
#' for '`{col_stem}`_start', '`{col_stem}`_end', and `col_stem`.
#' @inheritParams agg
#'
#' @details
#' `create_agg_interval_tree()` returns a `data.tree()` with three levels. The
#' root node is a place holder that covers the entire data interval. The second
#' level contains each aggregate node that needs to be made and each of these
#' aggregate node's have children nodes for the data intervals that are needed
#' for aggregation.
#'
#' `create_scale_interval_tree()` returns a `data.tree()` with a variable number
#' of levels dependent on the intervals available in the data. The root node
#' also covers the entire data interval, this node may or may not actually be in
#' `data_intervals_dt`. Each interval in `data_intervals_dt` is then positioned
#' in the tree so that it is a sub-interval of its parent interval node.
#'
#' @return \[`data.tree()`\] where the 'name' field of each node is in interval
#' notation and describes each left-closed, right-open interval. Each node
#' also includes a field for the 'left' and 'right' endpoint.
#'
#' @noRd
create_agg_interval_tree <- function(data_intervals_dt,
agg_intervals_dt,
col_stem) {
cols <- paste0(col_stem, "_", c("start", "end"))
# create the root of the interval tree that covers the full interval
full_int_start <- min(data_intervals_dt[[cols[1]]],
agg_intervals_dt[[cols[1]]],
na.rm = TRUE)
full_int_end <- max(data_intervals_dt[[cols[2]]],
agg_intervals_dt[[cols[2]]],
na.rm = TRUE)
full_int_name <- paste0("[", full_int_start, ", ", full_int_end, ")")
interval_tree <- create_interval_node(full_int_start, full_int_end,
full_int_name)
# create each interval node and place in the full interval tree
for (i_agg in 1:nrow(agg_intervals_dt)) {
new_agg_node <- create_interval_node(agg_intervals_dt[i_agg, get(cols[1])],
agg_intervals_dt[i_agg, get(cols[2])],
agg_intervals_dt[i_agg, get(col_stem)])
# subset to data intervals that are in the aggregate interval
child_ints <- data_intervals_dt[get(cols[1]) >= new_agg_node$left &
get(cols[2]) <= new_agg_node$right]
if (agg_intervals_dt[i_agg, include_NA]) {
child_ints <- rbind(
child_ints,
data_intervals_dt[is.na(get(cols[1])) & is.na(get(cols[2]))]
)
}
if (nrow(child_ints) > 0) {
for (i_sub in 1:nrow(child_ints)) {
new_child_node <- create_interval_node(child_ints[i_sub, get(cols[1])],
child_ints[i_sub, get(cols[2])],
child_ints[i_sub, get(col_stem)])
new_agg_node$AddChildNode(new_child_node)
}
}
interval_tree$AddChildNode(new_agg_node)
}
# check each aggregate node (level 2) and fill in any missing child intervals
subtrees <- data.tree::Traverse(interval_tree,
filterFun = function(x) x$level == 2)
fill_missing_intervals(interval_tree, subtrees, col_stem)
return(interval_tree)
}
#' @inheritParams create_agg_interval_tree
create_scale_interval_tree <- function(data_intervals_dt, col_stem) {
cols <- paste0(col_stem, "_", c("start", "end"))
# create the root of the interval tree that covers the full interval
full_int_start <- min(data_intervals_dt[[cols[1]]], na.rm = TRUE)
full_int_end <- max(data_intervals_dt[[cols[2]]], na.rm = TRUE)
full_int_name <- paste0("[", full_int_start, ", ", full_int_end, ")")
interval_tree <- create_interval_node(full_int_start, full_int_end,
full_int_name)
# if the full interval is included in `data_intervals_dt` then drop it
# since the root interval node is already made above
if (nrow(data_intervals_dt[get(cols[1]) == full_int_start &
get(cols[2]) == full_int_end]) > 0) {
data_intervals_dt <- data_intervals_dt[!(get(cols[1]) == full_int_start &
get(cols[2]) == full_int_end)]
}
# create each interval node and place in the full interval tree
if (nrow(data_intervals_dt) > 0) {
for (i in 1:nrow(data_intervals_dt)) {
new_node <- create_interval_node(data_intervals_dt[i, get(cols[1])],
data_intervals_dt[i, get(cols[2])],
data_intervals_dt[i, get(col_stem)])
place_new_interval_node(interval_tree, new_node)
}
}
# check each non-leaf node and fill in any missing child intervals
subtrees <- data.tree::Traverse(
interval_tree, filterFun = function(x) data.tree::isNotLeaf(x)
)
fill_missing_intervals(interval_tree, subtrees, col_stem)
return(interval_tree)
}
#' @title Create a node for an interval tree
#'
#' @param start \[`numeric(1)`\]\cr
#' the left endpoint of the interval tree node.
#' @param end \[`numeric(1)`\]\cr
#' the right endpoint of the interval tree node.
#' @param name \[`character(1)`\]\cr
#' name of the node in interval notation.
#'
#' @return \[`data.tree()`\] node with 'name', 'left', 'right' attributes
#'
#' @noRd
create_interval_node <- function(start, end, name) {
new_node <- data.tree::Node$new(name)
new_node$Set(left = start)
new_node$Set(right = end)
return(new_node)
}
#' @title Place a new interval node in an interval tree
#'
#' @description Recursive function to place a new interval node in an interval
#' tree.
#'
#' @param current_node \[`data.tree()`\]\cr
#' node of interval tree to put the `new_node` in.
#' @param new_node \[`data.tree()`\]\cr
#' new interval node to place somewhere below `current_node`.
#'
#' @details
#' Assumption is that `new_node` is a sub interval of `current_node` and
#' this is double checked.
#'
#' `new_node` can be placed below `current_node` as:
#' * another child node of `current_node`.
#' * another child node of `current_node` but with one of `current_node`'s
#' children placed as a child of `new_node`.
#' * somewhere below one of `current_node`'s children.
#'
#' @return Invisibly returns reference to modified `current_node` with
#' `new_node` placed as part of subtree.
#'
#' @noRd
place_new_interval_node <- function(current_node, new_node) {
if (data.tree::isLeaf(current_node)) {
current_node$AddChildNode(new_node)
} else {
# identify any children nodes that are sub intervals of the new node
sub_interval_nodes <- data.tree::Traverse(
current_node,
filterFun = function(x) current_node$level == x$level - 1 &
new_node$left <= x$left & new_node$right >= x$right
)
# identify child node that is parent interval of the new node
parent_interval_node <- data.tree::Traverse(
current_node,
filterFun = function(x) current_node$level == x$level - 1 &
x$left <= new_node$left & x$right >= new_node$right
)
if (length(sub_interval_nodes) > 0) {
# insert new node in between current node and each of the sub interval
# child nodes
for (child_node in sub_interval_nodes) {
current_node$RemoveChild(name = child_node$name)
new_node$AddChildNode(child_node)
current_node$AddChildNode(new_node)
}
} else if (length(parent_interval_node) > 0) {
# place new interval node somewhere below the current child node
for (child_node in parent_interval_node) {
place_new_interval_node(child_node, new_node)
}
} else {
# if new node didn't overlap with any other interval nodes then add a new
# child interval node
current_node$AddChildNode(new_node)
}
}
return(invisible(current_node))
}
#' @title create new interval nodes for any missing interval nodes
#'
#' @description Check that the children nodes cover the entire parent interval node
#' and create new interval nodes for any missing ranges.
#'
#' @param interval_tree \[`data.tree()`\]\cr
#' interval tree containing the subtree to be modified
#' @param subtrees \[`list(data.tree())`\]\cr
#' non-leaf subtrees to check and fill any missing intervals.
#' @inheritParams agg
#'
#' @return invisibly return reference to modified subtrees.
#'
#' @noRd
fill_missing_intervals <- function(interval_tree, subtrees, col_stem) {
cols <- paste0(col_stem, "_", c("start", "end"))
for (agg_node in subtrees) {
# get endpoints of subtree leaves
start <- agg_node$Get("left",
filterFun = function(x) data.tree::isLeaf(x))
end <- agg_node$Get("right",
filterFun = function(x) data.tree::isLeaf(x))
missing_intervals <- identify_missing_intervals(
data.table(start, end), data.table(agg_node$left, agg_node$right)
)
data.table::setnames(missing_intervals, c("start", "end"), cols)
gen_name(missing_intervals, col_stem = col_stem, format = "interval")
data.table::setnames(missing_intervals, paste0(col_stem, "_name"), col_stem)
# add any missing interval nodes
if (nrow(missing_intervals) > 0) {
for (i_missing in 1:nrow(missing_intervals)) {
missing_child_node <- create_interval_node(missing_intervals[i_missing, get(cols[1])],
missing_intervals[i_missing, get(cols[2])],
missing_intervals[i_missing, get(col_stem)])
agg_node$AddChildNode(missing_child_node)
}
}
}
data.tree::Sort(interval_tree, attribute = "left")
return(invisible(interval_tree))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.