#' Examine skew of clones towards a cluster or compartment
#'
#' The metric seeks to quantify how individual clones are skewed towards
#' a specific cellular compartment or cluster. A clone bias of **1** -
#' indicates that a clone is composed of cells from a single
#' compartment or cluster, while a clone bias of **0** - matches the
#' background subtype distribution. Please read and cite the following
#' [manuscript](https://pubmed.ncbi.nlm.nih.gov/35829695/)
#' if using [clonalBias()].
#'
#' @examples
#' #Making combined contig data
#' combined <- combineTCR(contig_list,
#' samples = c("P17B", "P17L", "P18B", "P18L",
#' "P19B","P19L", "P20B", "P20L"))
#'
#' #Getting a sample of a Seurat object
#' scRep_example <- get(data("scRep_example"))
#'
#' #Using combineExpresion()
#' scRep_example <- combineExpression(combined, scRep_example)
#' scRep_example$Patient <- substring(scRep_example$orig.ident,1,3)
#'
#' #Using clonalBias()
#' clonalBias(scRep_example,
#' cloneCall = "aa",
#' split.by = "Patient",
#' group.by = "seurat_clusters",
#' n.boots = 5,
#' min.expand = 2)
#'
#'
#' @param sc.data The single-cell object after [combineExpression()].
#' @param cloneCall How to call the clone - VDJC gene (**gene**),
#' CDR3 nucleotide (**nt**), CDR3 amino acid (**aa**),
#' VDJC gene + CDR3 nucleotide (**strict**) or a custom variable
#' in the data.
#' @param group.by The variable to use for calculating bias
#' @param split.by The variable to use for calculating the baseline frequencies.
#' For example, "Type" for lung vs peripheral blood comparison
#' @param n.boots number of bootstraps to downsample.
#' @param min.expand clone frequency cut off for the purpose of comparison.
#' @param exportTable Returns the data frame used for forming the graph.
#' @param palette Colors to use in visualization - input any
#' [hcl.pals][grDevices::hcl.pals].
#' @import ggplot2
#' @importFrom quantreg rqss
#' @importFrom stringr str_sort
#' @export
#' @concept SC_Functions
#' @return ggplot scatter plot with clone bias
clonalBias <- function(sc.data,
cloneCall="strict",
split.by=NULL,
group.by=NULL,
n.boots = 20,
min.expand=10,
exportTable = FALSE,
palette = "inferno") {
.checkSingleObject(sc.data)
cloneCall <- .theCall(.grabMeta(sc.data), cloneCall)
#Calculating bias
bias <- .get_clono_bias(sc.data,
split.by = split.by,
group.by = group.by ,
cloneCall=cloneCall,
min.expand=min.expand)
df_shuffle.list <- list()
#Bootstrapping
for (ii in seq_len(n.boots)) {
df_shuffle.list[[ii]] <- .get_clono_bias(sc.data,
split.by = split.by,
group.by = group.by,
cloneCall=cloneCall,
min.expand=min.expand,
do.shuffle = TRUE,
seed=ii)
}
df_shuffle <- Reduce(rbind, df_shuffle.list)
#Sumarrising boot straps
stat.summary <- df_shuffle %>%
group_by(ncells) %>%
summarise(mean = mean(bias),
std = sd(bias))
corrected_p <- 1-(0.05/nrow(bias))
bias$Top_state <- factor(bias$Top_state,
str_sort(unique(bias$Top_state), numeric = TRUE))
#Calculating Bias Z-score
bias$Z.score <- NA
for(i in seq_len(nrow(bias))) {
stat.pos <- bias[i,]$ncells
row.pull <- stat.summary[stat.summary$ncells == stat.pos,]
z.score <- (bias[i,]$bias - row.pull$mean)/row.pull$std
bias$Z.score[i] <- z.score
}
#Attaching the cloneSize of original combineExpression()
meta <- .grabMeta(sc.data)
meta <- meta[meta[,cloneCall] %in% bias[,"Clone"],]
meta <- unique(meta[,c(cloneCall, split.by, "cloneSize")])
bias$cloneSize <- NA
for(i in seq_len(nrow(bias))) {
split <- bias[,1][i]
clone <- bias[,3][i]
bias$cloneSize[i] <- as.vector(meta[which(meta[,cloneCall] == clone & meta[,split.by] == split),"cloneSize"])[1]
}
bias$cloneSize <- factor(bias$cloneSize, rev(levels(meta[, "cloneSize"])))
if (exportTable) {
return(bias)
}
bias$dotSize <- as.numeric(bias$cloneSize)
#else, return the plot
ggplot(bias, aes(x=ncells,y=bias)) +
geom_point(aes(fill=Top_state, size = dotSize),
shape = 21,
stroke = 0.25) +
stat_quantile(data=df_shuffle,
quantiles = c(corrected_p),
method = "rqss",
lambda = 3,
color = "black",
lty = 2) +
#This is ridiculous way to get around the internal ggplot "style"-based warnings
scale_size_continuous(breaks = as.vector(unique(bias$dotSize)),
labels = as.vector(unique(bias$cloneSize))) +
scale_fill_manual(values = .colorizer(palette, length(unique(bias[,"Top_state"])))) +
theme_classic() +
labs(size = "cloneSize", fill = "Group") +
xlab("Clonal Size") +
ylab("Clonal Bias")
}
#Background summary of clones
.get_clono_bg <- function(df,
split.by=split.by,
group.by=group.by,
cloneCall=cloneCall,
min.expand=10) {
df <- .data.wrangle(df, split.by, cloneCall, "both")
bg <- list()
for (s in seq_along(df)) {
clones <- table(df[[s]][,cloneCall])
clones <- clones[clones>=min.expand]
if (length(clones)>0) {
expanded <- df[[s]][which(df[[s]][,cloneCall] %in% names(clones)), group.by]
bg[[s]] <- table(expanded) / sum(table(expanded))
}
}
names(bg) <- names(df)
return(bg)
}
#Clone Bias
.get_clono_bias <- function(df,
split.by=NULL,
group.by=NULL,
cloneCall=cloneCall,
min.expand=10,
do.shuffle=FALSE,
seed=123) {
dat <- data.frame(Sample=character(),
Clone_i=character(),
Clone=character(),
ncells=integer(),
Top_state=double(),
freq=double(),
freq_diff=double(),
bias=double())
bg <- .get_clono_bg(df,
split.by=split.by,
min.expand = min.expand,
group.by = group.by,
cloneCall = cloneCall)
if (!is.null(split.by)) {
df <- .list.input.return(df, split.by)
} else {
if (inherits(x=df, what ="Seurat") | inherits(x=df, what ="SummarizedExperiment")) {
df <- list("Object" = .grabMeta(df))
}
}
cloneCall <- .theCall(df, cloneCall)
df <- .checkBlanks(df, cloneCall)
for (s in names(bg)) {
#All cells have the same type. Cannot calculate bias for this sample
if (length(bg[[s]]) > 1) {
clones <- table(df[[s]][,cloneCall])
clones <- clones[clones>=min.expand]
subtypes <- names(bg[[s]])
if (length(clones)>0) {
clones <- sort(clones, decreasing = TRUE)
expanded <- df[[s]][which(df[[s]][,cloneCall] %in% names(clones)), c(group.by, cloneCall)]
if (do.shuffle) { #reshuffle annotation column
expanded[[group.by]] <- sample(expanded[[group.by]])
}
for (i in seq_along(clones)) {
this <- names(clones)[i]
this.s <- paste0(this, "_", s)
ncells <- clones[i]
sub <- expanded[which(expanded[, cloneCall] == this), group.by]
sub <- factor(sub, levels=subtypes)
comp <- table(sub) / sum(table(sub))
diff <- comp - bg[[s]]
diff_norm <- round((comp - bg[[s]])/(1-bg[[s]]), 3)
top_state <- names(which.max(diff_norm))[1]
new_row <- c(s, this.s, this, as.integer(ncells), top_state,
as.double(comp[top_state]),
as.double(diff[top_state]),
as.double(diff_norm[top_state]))
dat[nrow(dat) + 1,] <- new_row
}
}
}
}
dat$ncells <- as.numeric(dat$ncells)
dat$freq <- as.double(dat$freq)
dat$freq_diff <- as.double(dat$freq_diff)
dat$bias <- as.double(dat$bias)
return(dat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.