#' Read Gene Expression Data from a tome file
#'
#' @param tome tome file to read. Required.
#' @param genes A vector of gene names to read. If NULL, will read all genes.
#' @param regions The gene regions to use. Can be "exon", "intron", or "both". Default = "exon".
#' @param units The type of values to return. Can be "counts" or "cpm". Default = "counts".
#' @param transform Transformation to apply to values. Can be "none", "log", "log2", "log10". Log transforms will add 1 to values before transformation. Default = "none".
#' @param format The format of the output. Can be "data.frame", "matrix", or "dgcMatrix" (sparse matrix). Default is "data.frame".
#'
#' @return A data.frame with sample_name as the first column and each subsequent column
#' containing gene expression values and named for the genes; Or a matrix with columns as genes and rows as samples.
#'
read_tome_gene_data <- function(tome,
genes = NULL,
regions = "exon",
units = "counts",
transform = "none",
format = "data.frame") {
jagged <- read_tome_genes_jagged(tome,
genes,
regions)
if(format == "data.frame") {
out <- jagged_to_data.frame(jagged,
rows = "sample_names",
cols = "gene_names")
} else if(format == "matrix") {
out <- jagged_to_matrix(jagged,
rows = "sample_names",
cols = "gene_names")
} else if(format == "dgCMatrix") {
out <- jagged_to_dgCMatrix(jagged,
rows = "sample_names",
cols = "gene_names")
}
if(units == "cpm") {
if(regions == "exon") {
total_counts <- c(h5read(tome, "/data/total_exon_counts"))
} else if(regions == "intron") {
total_counts <- c(h5read(tome, "/data/total_intron_counts"))
} else if(regions == "both") {
total_counts <- c(h5read(tome, "/data/total_exon_counts") + h5read(tome, "/data/total_intron_counts"))
}
out[,genes] <- out[,genes]/(total_counts/1e6)
}
if(transform == "log") {
out[,genes] <- log(out[,genes] + 1)
} else if(transform == "log2") {
out[,genes] <- log2(out[,genes] + 1)
} else if(transform == "log10") {
out[,genes] <- log10(out[,genes] + 1)
}
h5closeAll()
out
}
#' Read Sample Expression Data from a tome file
#'
#' @param tome tome file to read. Required.
#' @param samples A vector of sample names to read. Required
#' @param regions The gene regions to use. Can be "exon", "intron", or "both". Default = "exon".
#' @param units The type of values to return. Can be "counts" or "cpm". Default = "counts".
#' @param transform Transformation to apply to values. Can be "none", "log", "log2", "log10". Log transforms will add 1 to values before transformation. Default = "none".
#' @param format The format of the output. Can be "data.frame", "matrix", or "dgcMatrix" (sparse matrix). Default is "data.frame".
#'
#' @return A data.frame with gene_name as the first column and each subsequent column
#' containing gene expression values and named for the samples; Or a matrix with columns as samples and rows as genes.
#'
read_tome_sample_data <- function(tome,
samples,
regions = "exon",
units = "counts",
transform = "none",
format = "data.frame") {
#library(purrr)
#library(dplyr)
jagged <- read_tome_samples_jagged(tome,
samples,
regions)
if(format == "data.frame") {
out <- jagged_to_data.frame(jagged,
rows = "gene_names",
cols = "sample_names")
} else if(format == "matrix") {
out <- jagged_to_matrix(jagged,
rows = "gene_names",
cols = "sample_names")
} else if(format == "dgCMatrix") {
out <- jagged_to_dgCMatrix(jagged,
rows = "gene_names",
cols = "sample_names")
}
if(units == "cpm") {
root <- rhdf5::H5Fopen(tome)
sample_names <- rhdf5::h5read(root,"/sample_names")
sample_index <- match(samples, sample_names)
if(regions == "exon") {
total_counts <- c(rhdf5::h5read(root, "/data/total_exon_counts")[sample_index])
} else if(regions == "intron") {
total_counts <- c(rhdf5::h5read(root, "/data/total_intron_counts")[sample_index])
} else if(regions == "both") {
total_counts <- c(rhdf5::h5read(root, "/data/total_exon_counts")[sample_index] + rhdf5::h5read(root, "/data/total_intron_counts")[sample_index])
}
out[,samples] <- sweep(out[,samples], 2, (total_counts/1e6), "/")
}
if(transform == "log") {
out[,samples] <- log(out[,samples] + 1)
} else if(transform == "log2") {
out[,samples] <- log2(out[,samples] + 1)
} else if(transform == "log10") {
out[,samples] <- log10(out[,samples] + 1)
}
out
}
#' Get all gene names in a tome file
#'
#' @param tome tome file to read.
#'
read_tome_gene_names <- function(tome) {
read_tome_vector(tome,"/gene_names")
}
#' Get all sample names in a tome file
#'
#' @param tome tome file to read.
#'
read_tome_sample_names <- function(tome) {
read_tome_vector(tome,"/sample_names")
}
#' Get exon lengths from a tome file
#'
#' @param tome tome file to read.
#' @param genes specific genes to retrieve
#' @param return_as If "vector", will return only the length values as a vector.
#' If "data.frame", will return a data.frame with two columns: "gene_name" and "exon_length".
#'
read_tome_exon_lengths <- function(tome,
genes = NULL,
return_as = "vector") {
exon_lengths <- read_tome_vector(tome, "/data/exon_lengths")
if(return_as == "data.frame" | !is.null(genes)) {
tome_genes <- read_tome_gene_names(tome)
exon_lengths <- data.frame(gene_name = tome_genes,
exon_length = exon_lengths)
}
if(!is.null(genes)) {
exon_lengths <- exon_lengths[match(genes,exon_lengths$gene_name),]
}
if(return_as == "vector") {
unlist(exon_lengths$exon_length)
} else if(return_as == "data.frame") {
exon_lengths
}
}
#' Get intron lengths from a tome file
#'
#' @param tome tome file to read.
#' @param genes specific genes to retrieve
#' @param return_as If "vector", will return only the length values as a vector.
#' If "data.frame", will return a data.frame with two columns: "gene_name" and "intron_length".
#'
read_tome_intron_lengths <- function(tome,
genes = NULL,
return_as = "vector") {
intron_lengths <- read_tome_vector(tome,"/data/intron_lengths")
if(return_as == "data.frame" | !is.null(genes)) {
tome_genes <- read_tome_gene_names(tome)
intron_lengths <- data.frame(gene_name = tome_genes,
intron_length = intron_lengths)
}
if(!is.null(genes)) {
intron_lengths <- intron_lengths[match(genes,intron_lengths$gene_name),]
}
if(return_as == "vector") {
unlist(intron_lengths$intron_length)
} else if(return_as == "data.frame") {
intron_lengths
}
}
#' Get total per-gene counts from a tome-file.
#'
#' Useful for computing CPM or FPKM
#'
#' @param tome tome file to read.
#' @param region the gene regions to use. Can be "exon", "intron", or "both". Default = "exon"
#'
read_tome_total_counts <- function(tome,
region = "exon") {
if(region == "exon") {
total_counts <- read_tome_vector(tome,"data/total_exon_counts")
} else if(region == "intron") {
total_counts <- read_tome_vector(tome,"data/total_intron_counts")
} else if(region == "both") {
total_exon_counts <- read_tome_vector(tome,"data/total_exon_counts")
total_intron_counts <- read_tome_vector(tome,"data/total_intron_counts")
total_counts <- total_exon_counts + total_intron_counts
}
total_counts
}
#' Get dims for data stored in a tome file.
#'
#' @param tome tome file to read.
#' @param transpose logical indicating which orientation to get dims. Default = FALSE.
#' If FALSE, gives dimensions with samples as rows and genes as columns.
#' If TRUE, gives dimensions for genes as rows and samples as columns.
#'
read_tome_data_dims <- function(tome,
transpose = F) {
if(transpose == F) {
dims <- read_tome_vector(tome,"data/exon/dims")
} else if(transpose == T) {
dims <- read_tome_vector(tome,"data/t_exon/dims")
}
dims
}
#' Read annotations table from a tome file
#'
#' @param tome The location of the tome file to read.
#' @param groups The groups to read - matches column names using grep. Can provide multiple with c(). If NULL, will get all columns. Default is NULL.
#'
read_tome_anno <- function(tome,
groups = NULL) {
if(!is.null(groups)) {
anno <- read_tome_data.frame(tome,
"/sample_meta/anno",
stored_as = "vectors",
columns = c("sample_name", groups),
match_type = "grep",
get_all = FALSE)
} else {
anno <- read_tome_data.frame(tome,
"/sample_meta/anno",
stored_as = "vectors",
columns = "sample_name",
get_all = TRUE)
}
anno <- anno %>%
dplyr::select(sample_name, everything())
}
#' Read annotation descriptions table from a tome file
#'
#' @param tome The location of the tome file to read.
#'
read_tome_anno_desc <- function(tome) {
read_tome_data.frame(tome,
"/sample_meta/desc",
stored_as = "data.frame")
}
#' Read gene metadata table from a tome file
#'
#' @param tome The location of the tome file to read.
#' @param columns Specific columns to read If NULL, will get all columns. Default is NULL.
#'
read_tome_gene_meta <- function(tome,
columns = NULL) {
if(!is.null(columns)) {
read_tome_data.frame(tome,
"/gene_meta/genes",
stored_as = "vectors",
columns = c("gene_name", columns),
get_all = FALSE)
} else {
read_tome_data.frame(tome,
"/gene_meta/genes",
stored_as = "vectors",
columns = "gene_name",
get_all = TRUE)
}
}
#' Read gene metadata descriptions table from a tome file
#'
#' @param tome The location of the tome file to read.
#'
read_tome_gene_meta_desc <- function(tome) {
read_tome_data.frame(tome,
"/gene_meta/desc",
stored_as = "data.frame")
}
#' Read dendrogram descriptions table from a tome file
#'
#' @param tome The location of the tome file to read.
#'
read_tome_dend_desc <- function(tome) {
read_tome_data.frame(tome,
"/dend/desc",
stored_as = "data.frame")
}
#' Read stats table from a tome file
#'
#' @param tome the location of the tome file to read.
#' @param stats_name the name of the stats table to read
#' @param columns selected columns to read. If NULL, reads all columns. Default = NULL.
#' @param get_all logical, whether or not to append all other columns after the specified columns. Default = FALSE.
#'
read_tome_stats <- function(tome,
stats_name = NULL,
columns = NULL,
get_all = FALSE) {
ls <- rhdf5::h5ls(tome)
stats_names <- ls$name[ls$group == "/stats"]
stats_names <- stats_names[stats_names != "desc"]
if(is.null(stats_name)) { stats_name <- ".namenotfound" }
if(stats_name %in% stats_names) {
stats_target <- paste0("/stats/", stats_name)
if(!is.null(columns)) {
stats <- read_tome_data.frame(tome,
stats_target,
stored_as = "vectors",
columns = c("sample_name", columns),
get_all = get_all)
} else {
stats <- read_tome_data.frame(tome,
stats_target,
stored_as = "vectors",
columns = columns,
get_all = TRUE)
}
} else {
if(length(stats_names) > 0) {
stats_message <- paste0("A stats table name (stats_name) is required. Available in this dataset are: ", paste(stats_names,collapse = ", "))
} else {
stats_message <- "No stats tables are found in this dataset."
}
stop(stats_message)
}
stats
}
#' Read stats descriptions table from a tome file
#'
#' @param tome The location of the tome file to read.
#'
read_tome_stats_desc <- function(tome) {
read_tome_data.frame(tome,
"/stats/desc",
stored_as = "data.frame")
}
#' Read projection coordinates table from a tome file
#'
#' @param tome the location of the tome file to read.
#' @param proj_name the projection to read
#'
read_tome_projection <- function(tome,
proj_name = NULL) {
ls <- rhdf5::h5ls(tome)
proj_names <- ls$name[ls$group == "/projection"]
proj_names <- proj_names[proj_names != "desc"]
if(is.null(proj_name)) { proj_name <- ".namenotfound" }
if(proj_name %in% proj_names) {
proj_target <- paste0("projection/",proj_name)
proj <- read_tome_data.frame(tome,
proj_target,
stored_as = "data.frame")
proj
} else {
if(length(proj_names) > 0) {
proj_message <- paste0("A projection name (proj_name) is required. Available in this dataset are: ", paste(proj_names,collapse = ", "))
} else {
proj_message <- "No projections are found in this dataset."
}
stop(proj_message)
}
}
#' Read projection descriptions table from a tome file
#'
#' @param tome The location of the tome file to read.
#'
read_tome_projection_desc <- function(tome) {
read_tome_data.frame(tome,
"/projection/desc",
stored_as = "data.frame")
}
#' Read dendrogram object from a tome file
#'
#' @param tome the location of the tome file to read.
#' @param dend_name the dendrogram to read.
#'
read_tome_dend <- function(tome,
dend_name = NULL) {
ls <- rhdf5::h5ls(tome)
dend_names <- ls$name[ls$group == "/dend"]
dend_names <- dend_names[dend_names != "desc"]
if(is.null(dend_name)) { dend_name <- ".namenotfound" }
if(dend_name %in% dend_names) {
dend_target <- paste0("dend/",dend_name)
dend <- read_tome_serialized(tome,
dend_target)
dend
} else {
if(length(dend_names) > 0) {
dend_message <- paste0("A dendrogram name (dend_name) is required. Available in this dataset are: ", paste(dend_names,collapse = ", "))
} else {
dend_message <- "No dendrograms are found in this dataset."
}
stop(dend_message)
}
}
#' Read mapping table from a tome file
#'
#' @param tome the location of the tome file to read.
#' @param mapping_name the name of the mapping table to read
#' @param columns selected columns to read. If NULL, reads all columns. Default = NULL.
#' @param get_all logical, whether or not to append all other columns after the specified columns. Default = FALSE.
#'
read_tome_mapping <- function(tome,
mapping_name = NULL,
columns = NULL,
get_all = FALSE) {
ls <- rhdf5::h5ls(tome)
mapping_names <- ls$name[ls$group == "/mapping"]
mapping_names <- mapping_names[mapping_names != "desc"]
if(is.null(mapping_name)) { mapping_name <- ".namenotfound"}
if(mapping_name %in% mapping_names) {
mapping_target <- paste0("mapping/", mapping_name)
if(!is.null(columns)) {
mapping <- read_tome_data.frame(tome,
mapping_target,
stored_as = "vectors",
columns = c("sample_name", columns),
get_all = get_all)
} else {
mapping <- read_tome_data.frame(tome,
mapping_target,
stored_as = "vectors",
columns = "sample_name",
get_all = get_all)
}
} else {
if(length(mapping_names) > 0) {
mapping_message <- paste0("A mapping table name (mapping_name) is required. Available in this dataset are: ", paste(mapping_names,collapse = ", "))
} else {
mapping_message <- "No mapping tables are found in this dataset."
}
stop(mapping_message)
}
mapping
}
#' Read mapping descriptions table from a tome file
#'
#' @param tome The location of the tome file to read.
#'
read_tome_mapping_desc <- function(tome) {
read_tome_data.frame(tome,
"/mapping/desc",
stored_as = "data.frame")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.