#' Download the Mouse Brain Atlas ontology as a list object
#'
#' No parameters
#'
#' @return a nested list object containing the Mouse Brain Atlas ontology
#' @export
get_mba_ontology <- function() {
# Download the ontology JSON file
temp <- tempfile()
download.file("http://api.brain-map.org/api/v2/structure_graph_download/1.json", temp)
# Read the JSON
raw_ontology <- jsonlite::fromJSON(temp)[["msg"]]
return(raw_ontology)
}
#' Convert a nested Mouse Brain Atlas ontology to a data.frame
#'
#' @param ontology A nested ontology object
#' @param ongology_df An existing ontology data.frame. This is used for recursion. You should use the default, NULL, to extract a full ontology.
#'
#' @return a data.frame with all descriptive columns for the ontology.
#' @export
flatten_mba_ontology <- function(ontology, ontology_df = NULL) {
l <- ontology
if(is.null(ontology_df)) {
ontology_df <- data.frame(l[names(l) != "children"])[0,]
ontology_df$n_children <- numeric()
}
if("children" %in% names(l)) {
child_df <- data.frame(l[names(l) != "children"])
n_children_of_children <- purrr::map_dbl(l$children,
function(x) {
if("children" %in% names(x)) {
length(x$children)
} else {
0
}
})
child_df$n_children <- n_children_of_children
ontology_df <- rbind(ontology_df, child_df)
for(i in 1:length(l$children)) {
child_list <- l$children[[i]]
ontology_df <- flatten_mba_ontology(child_list, ontology_df)
}
}
return(ontology_df)
}
#' Generate a taxon column that shows the "lineage" of each ontology term
#'
#' @param flat_ontology A flat ontology object generated by flatten_mba_ontology
#'
#' @return a data.frame with all descriptive columns for the ontology plus a new taxon column.
#' @export
generate_mba_taxons <- function(flat_ontology) {
flat_ontology <- arrange(flat_ontology,st_level)
taxons <- list("997" = "0")
for(i in 2:nrow(flat_ontology)) {
id <- as.character(flat_ontology$id[i])
parent_id <- as.character(flat_ontology$parent_structure_id[i])
taxons[id] <- paste0(taxons[parent_id], ";", id)
}
flat_ontology$taxons <- unlist(taxons)
return(flat_ontology)
}
#' Filter the MBA ontology to find children of a given structure acronym
#'
#' @param flat_ontology A flat ontology data.frame generated by flatten_mba_ontology
#' @param parent_acronym The acronym of a brain ontology structure to find
#' @param include_parent Whether or not to include the parent structure in the output. Default = FALSE.
#'
#' @return A data.frame with the filtered ontology only for children of the parent structure.
#' @export
filter_mba_ontology_children <- function(flat_ontology,
parent_acronym,
include_parent = FALSE) {
if(!parent_acronym %in% flat_ontology$acronym) {
stop(paste0("Parent acronym ", parent_acronym," not found in ontology."))
}
if(!"taxons" %in% names(flat_ontology)) {
flat_ontology <- generate_mba_taxons(flat_ontology)
}
parent_taxon <- flat_ontology$taxon[flat_ontology$acronym == parent_acronym]
children_ontology <- flat_ontology[grepl(paste0("^",parent_taxon), flat_ontology$taxon),]
if(!include_parent) {
children_ontology <- children_ontology[children_ontology$taxon != parent_taxon,]
}
return(children_ontology)
}
#' Generate a taxons column for a flattened ontology
#'
#' This column is needed for use with the taxa and metacodeR packages.
#'
#' @param flat_ontology A flat ontology data.frame generated by flatten_mba_ontology
#' @param id_column The id column used to link structures. default is "id".
#' @param parent_column The column specifying the id column of the parent of each structure. default is "parent_structure_id".
#' @param value_column The column of values to use to build the taxons column. Default is "id". Using "acronym" will yield a more readable taxonomy.
#' @param taxon_column The name of the column to use for storing the resulting taxons. Default is "taxons".
#'
#' @return a data.frame with an additional column of semicolon-separated taxons based on the value_column.
#'
#' @export
generate_taxons <- function(flat_ontology,
id_column = "id",
parent_column = "parent_structure_id",
value_column = "id",
taxon_column = "taxons") {
library(dplyr)
flat_ontology <- flat_ontology %>%
arrange(st_level)
taxon_list <- list(flat_ontology[[value_column]][1])
names(taxon_list)[1] <- flat_ontology[[id_column]][1]
for(i in 2:nrow(flat_ontology)) {
id <- as.character(flat_ontology[[id_column]][i])
value <- as.character(flat_ontology[[value_column]][i])
parent_id <- as.character(flat_ontology[[parent_column]][i])
taxon_list[[id]] <- paste0(taxon_list[[parent_id]], ";", value)
}
flat_ontology[[taxon_column]] <- unlist(taxon_list)
flat_ontology
}
#' Clean up taxons by removing structures no longer present after filtering
#'
#' @param flat_ontology A flat ontology data.frame generated by flatten_mba_ontology.
#' @param id_column The id column used to link structures. default is "id".
#' @param taxon_column The name of the column with taxons generated by generate_taxons().
#'
#' @return a data.frame with a modified taxons column that lacks any ids that are no
#' longer present in the ontology.
#'
#' @export
cleanup_taxons <- function(flat_ontology,
id_column = "id",
taxon_column = "taxons") {
library(purrr)
available_ids <- paste0(as.character(unique(flat_ontology[[id_column]])))
split_taxons <- strsplit(flat_ontology[[taxon_column]], ";")
new_taxons <- purrr::map(split_taxons,
function(x) {
x <- x[x %in% available_ids]
paste(x, collapse = ";", sep = "")
})
flat_ontology[[taxon_column]] <- new_taxons
flat_ontology
}
last_column_shift <- function(df,
col = "taxons") {
others <- names(df)[names(df) != col]
df[, c(others, col)]
}
#' Compute values hierarchicaly over a flat ontology
#'
#' Values accumulate from the bottom of the ontology to the top at each node,
#' and are computed only from the values of the direct children of each node.
#'
#' @param df A flat ontology data.frame with a taxons column
#' @param fun The function to run as a character object, e.g. "mean".
#' @param compute_column The column of values to use for the function computation.
#' @param result_column The name of the output column.
#' @param include_node Logical, should values assigned to non-leaf nodes be included in calculation?
#' @param na.rm If TRUE or FALSE, passed as a parameter to fun. If NULL (default), is ignored.
#' @param taxon_column The name of the taxons column. Default is "taxons"
#'
#' @return A flat ontology data.frame with the result_column added. The taxon column will be moved
#' to the last column position. This should behave similarly to mutate().
#'
#' @export
#'
#' @examples
#'
#' flat_ontology2 <- flat_ontology %>%
#' compute_hierarchical("mean","n_children","mean_children",
#' include_node = TRUE)
#'
compute_hierarchical <- function(df,
fun,
compute_column,
result_column,
include_node = FALSE,
na.rm = NULL,
taxon_column = "taxons") {
df[[result_column]] <- 0
# Get the depth by counting the semicolons in taxons
df$depth <- sapply(gregexpr(";",df[[taxon_column]]), function(x) sum(x != -1))
# Sort by deepest first
df <- df %>%
dplyr::mutate(original_order = 1:n()) %>%
dplyr::arrange(desc(depth))
for(i in 1:nrow(df)) {
if(df$n_children[i] == 0) {
# Otherwise, use the row for the leaf
df[[result_column]][i] <- df[[compute_column]][i]
} else {
# If this node has children, find them using the parent_structure_id column
children <- which(df$parent_structure_id == df$id[i])
# Get the values for the specified compute_column for the children
children_values <- df[[result_column]][children]
if(include_node) {
children_values <- c(df[[compute_column]][i], children_values)
}
# Run the function on the children_values
if(is.null(na.rm)) {
df[[result_column]][i] <- do.call(fun, list(children_values))
} else {
df[[result_column]][i] <- do.call(fun, list(children_values, na.rm = na.rm))
}
}
}
# rearrange results to match original order
df <- df %>%
arrange(original_order) %>%
select(-original_order, -depth)
# rearrange columns so that taxons is last (required for parse_tax_data)
df <- last_column_shift(df, taxon_column)
df
}
compute_children <- function(df,
taxon_column,
compute_column,
fun,
na.rm = NULL) {
res <- numeric(length = nrow(df))
for(i in 1:nrow(df)) {
parent_taxon <- df[[taxon_column]][i]
children <- which(grepl(paste0("^",parent_taxon),df[[taxon_column]]))
if(length(children) > 1) {
children <- setdiff(children, i)
}
children_values <- df[[compute_column]][children]
if(is.null(na.rm)) {
res[i] <- do.call(fun, list(children_values))
} else {
res[i] <- do.call(fun, list(children_values, na.rm = na.rm))
}
}
res
}
#' Get the Mouse Brain Atlas numeric structure ID(s) for given structure acronym(s)
#'
#' This retrieves the ID from an internal storage source in the package.
#'
#' @param acronym The structure acronym(s) to use.
#'
#' @return numeric value(s) for the target structure ID.
#'
#' @export
#'
#' @examples
#' MOp_id <- mba_structure_id("MOp")
mba_structure_id <- function(acronym) {
id_table <- read.csv(system.file("extdata", "mba_structure_id_to_acronym.csv", package = "cocoframer"))
matching_acronyms <- intersect(acronym, id_table$acronym)
if(length(matching_acronyms) == 0) {
stop(paste("Couldn't find a match for any acronyms provided:",
paste(acronym, collapse = ", ")))
}
missing_acronyms <- setdiff(acronym, id_table$acronym)
if(length(missing_acronyms > 0)) {
warning(paste("Couldn't find a match for these acronym(s):",
paste(missing_acronyms, collapse = ", ")))
}
id_table$id[match(matching_acronyms, id_table$acronym)]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.