#' @name perform.bbknn
#' @aliases perform.bbknn
#'
#' @title Performs BBKNN integration
#'
#' @description Performs BBKNN integration on defined method-assays and reductions contained within. This is performed on reductions.
#'
#' @param object IBRAP S4 class object
#' @param assay Character. String containing indicating which assay to use
#' @param reduction Character. String defining the name of the reduction to provide for BBKNN. Default = NULL
#' @param graph.name.suffix Character. Should a suffix be added to the end of bbknn as the graph name, i.e. parameter changes?
#' @param batch Character. Column name in metadata indicating batch. Can be multiple.
#' @param approx Character. Employs annoy's approximate neighbour finding. Useful for large datasets but may increase correction.
#' @param metric. Character. Which distance metric to use when approx is TRUE, options: 'angular', 'euclidean', 'manhattan' or 'hamming'. Default = 'euclidean'
#' @param neighbors_within_batch Numerical. How many neighbours to report per batch. Default = 3
#' @param n_pcs Numerical. Range of principal components to use. Default = NULL
#' @param trim Numerical. Trims the n of neighbours per cell to this value. Helps with population independence. Default = NULL
#' @param annoy_n_trees Numerical. Number of trees to generate in annoy forest. More trees provides higher precision at the cost of increased resource demand and run time. Default = 10
#' @param use_faiss Boolean. Uses faiss package to compute nearest neighbour, this improves run time at the cost of precision. Default = TRUE
#' @param set_op_mix_ratio Numerical. UMAP connectivity parameter between 0 and 1. controls the blen d between a connectivity matrix formed exclusively from mutual nearest neighbour pairs (0) and a union of all observed neighbour relationships with the mutual pairs emphasised (1). Default = 1.0
#' @param local_connectivity Numerical. How many nearest neighbours of each cell are assumed to be fully connected. Default = 1
#' @param generate.diffmap Boolean. Should diffusion maps be generated from the neighourhood graphs, these will be stored in computational_reductions and can be used for umap generation and further neighbourhood generation. Default = TRUE
#' @param n_comps Numerical. How many components should be generated for the diffusion maps. Default = 15
#' @param diffmap.name.sufix Character. Should a suffix be added to the end of bbknn:diffmap as the reduction name, i.e. parameter changes?
#'
#' @return BBKNN connectivity graph contained in graphs in the indicated method-assays
#'
#' @examples
#'
#' object <- perform.bbknn(object = object,
#' assay = c('SCT', 'SCANPY', 'SCRAN'),
#' reduction = c('pca'),
#' batch = 'tech')
#'
#' @export
perform.bbknn <- function(object,
assay,
reduction,
graph.name.suffix = '',
batch,
approx = FALSE,
metric = 'euclidean',
neighbors_within_batch = 3,
n_pcs = NULL,
trim = NULL,
annoy_n_trees = 10,
use_faiss = TRUE,
set_op_mix_ratio = 1.0,
local_connectivity= 1,
generate.diffmap = FALSE,
n_comps = 15,
diffmap.name.suffix='') {
if(!is(object = object, class2 = 'IBRAP')) {
stop('object must be of class IBRAP\n')
}
if(!is.character(assay)) {
stop('assay must be character string\n')
}
for(x in assay) {
if(!x %in% names(object@methods)) {
stop(paste0('reduction: ', x, 'does not exist\n'))
}
}
if(!is.character(reduction)) {
stop('reduction must be character string\n')
}
for(x in reduction) {
for(i in assay) {
if(!x %in% names(c(object@methods[[i]]@computational_reductions, object@methods[[i]]@visualisation_reductions,
object@methods[[i]]@integration_reductions))) {
stop(paste0('reduction: ', x, ' does not exist\n'))
}
}
}
if(!is.character(graph.name.suffix)) {
stop('graph.name.suffix must be character string\n')
}
if(!is.character(batch)) {
stop('batch must be character string\n')
}
for(r in batch) {
if(!r %in% colnames(object@sample_metadata)) {
stop(crayon::cyan(paste0(r, ' in batch does not exist\n')))
}
}
if(!is.logical(approx)) {
stop('approx must be logical: TRUE/FALSE \n')
}
if(!is.character(metric)) {
stop('metric must be character \n')
}
if(!is.numeric(neighbors_within_batch)) {
stop('neighbors_within_batch must be numerical \n')
}
if(!is.null(trim)) {
if(!is.numeric(trim)) {
stop('trim must be character string\n')
}
}
if(!is.numeric(annoy_n_trees)) {
stop('annoy_n_trees must be numerical\n')
}
if(!is.logical(use_faiss)) {
stop('use_faiss must be logical: TRUE/FALSE\n')
}
if(!is.numeric(set_op_mix_ratio)) {
stop('set_op_mix_ratio must be numerical\n')
}
if(!is.numeric(local_connectivity)) {
stop('local_connectivity must be numerical\n')
}
if(!is.logical(generate.diffmap)) {
stop('generate.diffmap should be boolean. either TRUE/FALSE\n')
}
if(isTRUE(generate.diffmap)) {
if(!is.numeric(n_comps)) {
stop('n_comps must be numerical \n')
}
if(!is.character(diffmap.name.suffix)) {
stop('diffmap.name.suffix \n')
}
}
sc <- reticulate::import('scanpy')
pd <- reticulate::import('pandas')
for(p in assay) {
count <- 1
for(r in reduction) {
dims <- n_pcs[[count]]
scobj <- sc$AnnData(X = object@methods[[p]]@computational_reductions[[reduction[count]]])
scobj$obs_names <- as.factor(colnames(object))
scobj$var_names <- as.factor(colnames(object@methods[[p]]@computational_reductions[[reduction[count]]]))
scobj$obsm$update(X_pca = object@methods[[p]]@computational_reductions[[reduction[count]]])
if(length(colnames(as.data.frame(object@sample_metadata))) >= 1) {
if(length(batch) > 1) {
temp <- function(x) {
return(paste(x, collapse = '_'))
}
df <- object@sample_metadata[,batch]
df2 <- apply(X = df, MARGIN = 1, FUN = temp)
df <- cbind(df, batch = df2)
batch <- 'batch'
scobj$obs <- pd$DataFrame(data = as.data.frame(df))
} else {
scobj$obs <- pd$DataFrame(data = as.data.frame(object@sample_metadata))
}
}
if(dims == 0) {
cat(crayon::cyan(paste0(Sys.time(), ': npcs calculated\n')))
dims <- ncol(object@methods[[p]]@computational_reductions[[reduction[count]]])
}
if(is.null(trim)) {
cat(crayon::cyan(paste0(Sys.time(), ': initialising BBKNN for assay: ', p, ', reduction: ', r, '\n')))
sc$external$pp$bbknn(scobj,
batch_key = reticulate::r_to_py(as.character(batch)),
approx = as.logical(FALSE),
metric = as.character(metric),
neighbors_within_batch = as.integer(neighbors_within_batch),
n_pcs = dims,
annoy_n_trees = as.integer(annoy_n_trees),
use_faiss = as.logical(use_faiss),
set_op_mix_ratio = set_op_mix_ratio,
local_connectivity = local_connectivity)
cat(crayon::cyan(paste0(Sys.time(), ': BBKNN complete \n')))
} else if (!is.null(trim)) {
cat(crayon::cyan(paste0(Sys.time(), ': initialising BBKNN for assay: ', p, ', reduction: ', r, '\n')))
sc$external$pp$bbknn(scobj,
batch_key= reticulate::r_to_py(as.character(batch)),
approx = as.logical(FALSE),
metric = as.character(metric),
neighbors_within_batch = as.integer(neighbors_within_batch),
n_pcs = dims,
trim = as.integer(trim),
annoy_n_trees = as.integer(annoy_n_trees),
use_faiss = as.logical(use_faiss),
set_op_mix_ratio = set_op_mix_ratio,
local_connectivity = local_connectivity)
cat(crayon::cyan(paste0(Sys.time(), ': BBKNN complete \n')))
}
if(isTRUE(generate.diffmap)) {
cat(crayon::cyan(paste0(Sys.time(), ': calcualting diffusion map\n')))
sc$tl$diffmap(adata = scobj, n_comps = as.integer(n_comps))
cat(crayon::cyan(paste0(Sys.time(), ': diffusion map calculated\n')))
diffmap <- as.matrix(scobj$obsm[['X_diffmap']])
DC_names <- list()
counter <- 1
for(x in 1:ncol(diffmap)) {
DC_names[[counter]] <- paste0('DC_', counter)
counter <- counter + 1
}
DC_names <- unlist(DC_names)
colnames(diffmap) <- DC_names
rownames(diffmap) <- colnames(object)
}
graph.list <- list()
connectivities <- scobj$obsp[['connectivities']]
colnames(connectivities) <- colnames(object)
rownames(connectivities) <- colnames(object)
distances <- scobj$obsp[['distances']]
colnames(distances) <- colnames(object)
rownames(distances) <- colnames(object)
connectivities <- as(object = as.matrix(connectivities), Class = 'dgCMatrix')
distances <- as(object = as.matrix(distances), Class = 'dgCMatrix')
graph.list[['connectivities']] <- connectivities
graph.list[['distances']] <- distances
if('_' %in% unlist(x = strsplit(graph.name.suffix, split = ''))) {
cat(crayon::cyan(paste0(Sys.time(), ': _ cannot be used in graph.name.suffix, replacing with -\n')))
graph.name.suffix <- sub(pattern = '_', replacement = '-', x = diffmap.name.suffix)
}
object@methods[[p]]@neighbours[[paste0(r, '_bbknn_bbknn', graph.name.suffix[count])]] <- graph.list
cat(crayon::cyan(paste0(Sys.time(), ': bbknn results added to IBRAP object\n')))
if(isTRUE(generate.diffmap)) {
if('_' %in% (strsplit(x = diffmap.name.suffix, split = ''))) {
cat(crayon::cyan(paste0(Sys.time(), ': _ cannot be used in diffmap.name.suffix, replacing with -\n')))
diffmap.name.suffix <- sub(pattern = '_', replacement = '-', x = diffmap.name.suffix)
}
object@methods[[p]]@computational_reductions[[paste0(r, '_bbknn_bbknn:diffmap', diffmap.name.suffix[count])]] <- diffmap
}
count <- count + 1
}
}
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.