# experiment.R
# Functions for interacting with Astrolabe experiments.
#' Load Astrolabe experiment.
#'
#' Load an Astrolabe experiment which can then be used for other orloj
#' functions. As part of loading the experiment, correct the file paths that
#' were generated by the Astrolabe platform to local file paths.
#'
#' @param experiment_path Path to the Astrolabe experiment.
#' @return Astrolabe experiment.
#' @export
loadExperiment <- function(experiment_path) {
experiment <- readRDS(file.path(experiment_path, "config.RDS"))
experiment$analysis_path <- file.path(experiment_path, "analysis")
experiment$reports_path <- file.path(experiment_path, "reports")
experiment$samples_path <- file.path(experiment_path, "samples")
experiment
}
#' Get experiment sample features.
#'
#' Get the sample features for a given Astrolabe experiment. Feature names are
#' converted from internal Astrolabe IDs to user-readable names.
#'
#' @param experiment An Astrolabe experiment.
#' @return Experiment sample features.
#' @export
getExperimentSampleFeatures <- function(experiment) {
sample_features <- experiment$sample_features
features <- experiment$features
# Experiment has no features, return just column with sample IDs.
if (ncol(features) == 0) return(experiment$samples[, "SampleId"])
features$FeatureId <- paste0("feature_", features$FeatureId)
m <- match(colnames(sample_features), features$FeatureId)
col_indices <- !is.na(m)
colnames(sample_features)[col_indices] <- features$FeatureName[m[col_indices]]
sample_features
}
#' Astrolabe experiment summary.
#'
#' @param experiment An Astrolabe experiment.
#' @export
experimentSummary <- function(experiment) {
cat(paste0(
"Astrolabe experiment with ",
nrow(experiment$samples), " samples and ",
nrow(experiment$channels), " channels\n\n"
))
cat(paste0(
"Classification channels:\n",
paste(experiment$class_channels, collapse = ", "),
"\n"
))
if (nrow(experiment$features) > 0) {
cat("\n")
cat("Sample features:\n")
for (row_idx in seq(nrow(experiment$features))) {
row <- experiment$features[row_idx, ]
feature_id <- paste0("feature_", row$FeatureId)
feature_values <- unique(experiment$sample_features[[feature_id]])
cat(paste0(
row$FeatureName, ": ", paste(feature_values, collapse = ", "), "\n"))
}
}
}
.chooseLevel <- function(experiment) {
# Choose default level for experiment.
if (is.null(experiment$organism)) {
return("Assignment")
} else if (experiment$organism == "profiling_only") {
return("Profiling")
} else {
return("Assignment")
}
}
#' Experiment cell subset counts.
#'
#' Cell subset counts and frequencies for all of the samples in an experiment.
#'
#' @param experiment An Astrolabe experiment.
#' @param level Cell subset level, such as "Assignment" or "Profiling".
#' @return Cell subset counts for that level.
#' @export
experimentCellSubsetCounts <- function(experiment,
level = .chooseLevel(experiment)) {
aggregate_statistics_filename <-
file.path(experiment$analysis_path, "combine_aggregate_statistics.RDS")
if (!file.exists(aggregate_statistics_filename)) {
stop(paste0(aggregate_statistics_filename, "not found"))
}
aggregate_statistics <- readRDS(aggregate_statistics_filename)
counts <- aggregate_statistics$subset_cell_counts
if (!(level %in% counts$Parent)) stop("level not found in cell subset counts")
counts <- counts %>%
dplyr::filter(Parent == level) %>%
dplyr::select(-Parent)
counts <- dplyr::left_join(experiment$samples, counts, by = "SampleId")
# Calculate frequencies.
counts <- counts %>%
dplyr::group_by(SampleId) %>%
dplyr::mutate(Freq = N / sum(N)) %>%
dplyr::ungroup()
counts
}
#' Experiment cell subset channel statistics.
#'
#' Cell subset channel statistics for all of the samples in an experiment.
#'
#' @param experiment An Astrolabe experiment.
#' @param level Cell subset level, such as "Assignment" or "Profiling".
#' @return Cell subset channel statistics for that level.
#' @export
experimentCellSubsetChannelStatistics <- function(experiment,
level = .chooseLevel(experiment)) {
aggregate_statistics_filename <-
file.path(experiment$analysis_path, "combine_aggregate_statistics.RDS")
if (!file.exists(aggregate_statistics_filename)) {
stop(paste0(aggregate_statistics_filename, "not found"))
}
aggregate_statistics <- readRDS(aggregate_statistics_filename)
stats <- aggregate_statistics$subset_channel_statistics
if (!(level %in% stats$Parent)) {
stop("level not found in cell subset channel statistics")
}
stats <- stats %>%
dplyr::filter(Parent == level) %>%
dplyr::select(-Parent)
stats <- dplyr::left_join(experiment$samples, stats, by = "SampleId")
stats
}
#' Map experiment cell subsets to numerical values.
#'
#' Create a map from experiment cell subsets in a given level to unique
#' numerical values. This map is used when generating Assignment and Profiling
#' columns in exported FCS files.
#'
#' @param experiment An Astrolabe experiment.
#' @param level Cell subset level.
#' @return Map from cell subsets to numerical values.
#' @export
experimentCellSubsetMap <- function(experiment,
level = .chooseLevel(experiment)) {
# Get cell subsets from aggregate statistics.
cell_subsets_file_path <-
file.path(experiment$analysis_path, "experiment_cell_subsets.RDS")
cell_subsets <- readRDS(cell_subsets_file_path)
cell_subsets <- unique(cell_subsets[[level]])
cell_subsets <- c(cell_subsets, "AstrolabeBead", "Debris", "Dead")
data.frame(
Value = seq(length(cell_subsets)),
CellSubset = cell_subsets,
stringsAsFactors = FALSE
)
}
#' Differential abundance analysis.
#'
#' Load the experiment differential abundance analysis, for a given cell subset
#' level.
#'
#' @param experiment An Astrolabe experiment.
#' @param level Cell subset level, such as "Assignment" or "Profiling".
#' @param convert_ids Whether to convert Astrolabe IDs to feature names.
#' @return Differential abundance analysis list.
#' @export
differentialAbundanceAnalysis <- function(experiment,
level = .chooseLevel(experiment),
convert_ids = TRUE) {
# Value for NA feature value. Samples with this value should not be included
# in the analysis.
feature_na <- "__NA__"
daa_filename <-
file.path(experiment$analysis_path, "differential_abundance_analysis.RDS")
if (!file.exists(daa_filename)) {
stop(paste0(daa_filename, " not found"))
}
differential_abundance_analysis <- readRDS(daa_filename)
daa <- differential_abundance_analysis$differential_abundance_analysis
if (!(level %in% names(daa))) return(NULL)
daa <- daa[[level]]
# Find feature values.
feature_values <- lapply(nameVector(names(daa)), function(feature_name) {
if(!is.factor(experiment$sample_features[[feature_name]])) {
# Reverse compatibility: Past versions of Astrolabe used character for
# sample feature values.
values <- experiment$sample_features[[feature_name]]
} else {
values <- levels(experiment$sample_features[[feature_name]])
}
values <- setdiff(values, feature_na)
})
# Convert feature IDs to names.
if (convert_ids) {
m <-
match(as.numeric(gsub("^feature_", "", names(daa))),
experiment$features$FeatureId)
names(daa) <- experiment$features$FeatureName[m]
names(feature_values) <- experiment$features$FeatureName[m]
}
# Convert tables to tibbles and only keep p-value, FDR, and logFC.
lapply(nameVector(names(daa)), function(feature_name) {
if (is.null(daa[[feature_name]])) {
NULL
} else {
tab <- as.data.frame(daa[[feature_name]]) %>%
tibble::rownames_to_column("CellSubset")
# Calculate max(logFC) for features with multiple values.
log_fc_cols <- grep("logFC", colnames(tab))
if (length(log_fc_cols) > 1) {
log_fc <- tab[, log_fc_cols]
max_log_fc <- apply(log_fc, 1, function(v) v[which.max(abs(v))])
tab$logFC <- max_log_fc
}
# Reverse compatibility: Median columns were labeled "median_" in past
# versions of Astrolabe.
feature_cols_old <- paste0("median_", feature_values[[feature_name]])
if (all(feature_cols_old %in% colnames(tab))) {
old_indices <- which(colnames(tab) %in% feature_cols_old)
colnames(tab)[old_indices] <-
gsub("median_", "", colnames(tab)[old_indices])
}
cols <-
c("CellSubset",
"logFC",
"PValue",
"FDR",
feature_values[[feature_name]])
tab[, cols]
}
})
}
#' Differential expression analysis.
#'
#' Load the experiment differential expression analysis across all levels and
#' all kits.
#'
#' @param experiment An Astrolabe experiment.
#' @param convert_ids Whether to convert Astrolabe IDs to user-supplied values.
#' @return Differential expression analysis list.
#' @export
experimentDifferentialExpressionAnalysis <- function(experiment,
convert_ids = TRUE) {
# Import differential expression analysis data.
dea <-
readRDS(file.path(
experiment$analysis_path,
"differential_abundance_analysis.RDS"))$differential_expression_analysis
if (is.null(dea)) return(dea)
if (!convert_ids) return(dea)
# Convert kit IDs to user-supplied values.
lapply(dea, function(dea_level) {
m <- match(names(dea_level), experiment$de_analysis_kits$Id)
names(dea_level) <- experiment$de_analysis_kits$Name[m]
dea_level
})
}
#' MDS map.
#'
#' Return the MDS map for the experiment, for a given level.
#'
#' @param experiment An Astrolabe experiment.
#' @param level Cell subset level, such as "Assignment" or "Profiling".
#' @param convert_ids Whether to convert Astrolabe IDs to feature names.
#' @return MDS map.
#' @export
experimentMds <- function(experiment,
level = .chooseLevel(experiment),
convert_ids = TRUE) {
mds_filename <-
file.path(experiment$analysis_path, "calculate_mds.RDS")
if (!file.exists(mds_filename)) stop(paste0(mds_filename, " not found"))
mds <- readRDS(mds_filename)
if (!(level %in% names(mds))) stop("cannot find level in MDS map")
mds <- mds[[level]]
# Calculate and add mean frequency over all samples.
cell_subset_counts <- experimentCellSubsetCounts(experiment, level = level)
mds_freqs <- cell_subset_counts %>%
dplyr::group_by(CellSubset) %>%
dplyr::summarize(Freq = mean(Freq))
mds <- dplyr::left_join(mds, mds_freqs, by = "CellSubset")
# Calculate and add mean marker intensities over all samples.
marker_stats <-
experimentCellSubsetChannelStatistics(experiment, level = level)
marker_stats <- marker_stats %>%
dplyr::group_by(CellSubset, ChannelName) %>%
dplyr::summarize(Median = mean(Median, na.rm = TRUE)) %>%
reshape2::dcast(CellSubset ~ ChannelName, value.var = "Median")
mds <- dplyr::left_join(mds, marker_stats, by = "CellSubset")
# Add max(fold change) and -log10(FDR) for each feature.
daa <-
differentialAbundanceAnalysis(experiment, level = level,
convert_ids = FALSE)
for (feature_id in experiment$features$FeatureId) {
feature_id_long <- paste0("feature_", feature_id)
feature_name <- feature_id_long
if (convert_ids) {
feature_name <-
subset(experiment$features, FeatureId == feature_id)$FeatureName
}
if (is.null(daa[[feature_id_long]])) {
# If we did not run DAA for a feature, update MDS map with empty values.
tab <-
data.frame(
CellSubset = unique(cell_subset_counts$CellSubset),
logFC = 0,
FDR = 1
)
} else {
# Ran DAA, take test results.
tab <- daa[[feature_id_long]]
tab <- tab[, c("CellSubset", "logFC", "FDR")]
}
colnames(tab) <-
c("CellSubset",
paste0(feature_name, "_logFC"),
paste0(feature_name, "_FDR"))
mds <- dplyr::left_join(mds, tab, by = "CellSubset")
}
mds
}
experimentLabelingStrategy <- function(experiment) {
if (experiment$organism == "profiling_only") return(NULL)
hierarchy <-
readRDS(file.path(experiment$analysis_path, "hierarchy.RDS"))$hierarchy
# Convert Assignment cell subsets to strings.
assignment <- setdiff(hierarchy$CellSubset, hierarchy$Parent)
descs <- lapply(orloj::nameVector(assignment), function(cell_subset) {
channels <-
.getSubsetChannelsFull(hierarchy, experiment$class_channels, cell_subset)
data.frame(
CellSubset = cell_subset,
Desc = .convertChannelsToDesc(channels),
stringsAsFactors = FALSE
)
}) %>% dplyr::bind_rows()
# Order according to Compartment order.
cell_subsets_file_path <-
file.path(experiment$analysis_path, "experiment_cell_subsets.RDS")
if (file.exists(cell_subsets_file_path)) {
# Experiment has Compartment level.
cell_subsets <- readRDS(cell_subsets_file_path)
cell_subsets <- unique(cell_subsets[, c("Compartment", "Assignment")])
cell_subsets <-
cell_subsets %>%
dplyr::filter(Assignment %in% descs$CellSubset) %>%
dplyr::rename(Parent = Compartment, CellSubset = Assignment)
} else {
# Reverse compatibility: Experiment does not have Compartment level, use
# Level_1 instead.
cell_subsets <-
lapply(orloj::nameVector(assignment), function(cell_subset) {
curr_subset <- cell_subset
while (curr_subset != "Root") {
curr_subset <- gsub("_unassigned", "", curr_subset)
parent <- curr_subset
curr_subset <- hierarchy$Parent[hierarchy$CellSubset == curr_subset]
}
data.frame(
Parent = parent,
CellSubset = cell_subset,
stringsAsFactors = FALSE
)
}) %>% dplyr::bind_rows()
cell_subsets <- cell_subsets[gtools::mixedorder(cell_subsets$Parent), ]
}
dplyr::left_join(cell_subsets, descs, by = "CellSubset")
}
.getSubsetChannels <- function(hierarchy, class_channels, cell_subset) {
# Get data frame with state of each channel for given subset.
hierarchy <- hierarchy[hierarchy$CellSubset == cell_subset, ]
channels <-
intersect(class_channels, names(which(colSums(!is.na(hierarchy)) > 0)))
# Unassigned cell subset with no markers (neither positive nor negative).
if (length(channels) == 0) {
return(data.frame(
CellSubset = cell_subset,
Channel = "(unassigned)",
State = NA
))
}
hierarchy <- hierarchy[, channels, drop = FALSE]
hierarchy$CellSubset <- cell_subset
ch <-
reshape2::melt(hierarchy, id.vars = "CellSubset", variable.name = "Channel",
value.name = "State")
ch$Channel <- as.character(ch$Channel)
ch[gtools::mixedorder(ch$Channel), ]
}
.getSubsetChannelsFull <- function(hierarchy, class_channels, cell_subset) {
# Get data frame with state of each channel for given subset, spanning the
# entire hierarchy.
df <- data.frame()
while (cell_subset %in% hierarchy$CellSubset) {
# Reverse compatibility: Before the addition of support for labeling
# unassigned subsets, parents were allowed to include "_unassigned".
cell_subset <- gsub("_unassigned", "", cell_subset)
df <- rbind(.getSubsetChannels(hierarchy, class_channels, cell_subset), df)
cell_subset <- subset(hierarchy, CellSubset == cell_subset)$Parent
}
df
}
.convertChannelsToDesc <- function(channels) {
# Convert channels data frame to a string.
channels$StateStr <-
ifelse(is.na(channels$State), "",
ifelse(channels$State, "+", "-"))
# Use factors to maintain subset order.
channels$CellSubset <-
factor(channels$CellSubset, levels = unique(channels$CellSubset))
channels <- channels %>%
dplyr::group_by(CellSubset) %>%
dplyr::summarize(Str = paste0(paste0(Channel, StateStr), collapse = " "))
paste0(channels$Str, collapse = ", ")
}
#' List of Assignment subsets.
#'
#' Return a vector of all Assignment subsets.
#''
#' @param experiment An Astrolabe experiment.
#' @return Character vector of Assignment subsets.
#' @export
experimentAssignmentSubsets <- function(experiment) {
if (experiment$organism == "profiling_only") return(NULL)
cell_subsets_file_path <-
file.path(experiment$analysis_path, "experiment_cell_subsets.RDS")
if (file.exists(cell_subsets_file_path)) {
# Experiment has Compartment level.
cell_subsets <- readRDS(cell_subsets_file_path)
unique(cell_subsets$Assignment)
} else {
# Reverse compatibility: Experiment does not have Compartment level, need
# to check for extend_assignment_unassigned.
subsets <- experimentLabelingStrategy(experiment)$CellSubset
if (!is.null(experiment$extend_assignment_unassigned) &&
experiment$extend_assignment_unassigned) {
hierarchy <-
readRDS(file.path(experiment$analysis_path, "hierarchy.RDS"))$hierarchy
subsets <- c(subsets, paste0(unique(hierarchy$Parent), "_unassigned"))
subsets <- gtools::mixedsort(unique(subsets))
}
subsets
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.