#' @import Matrix
#' @importFrom dplyr summarise
#' @importFrom dplyr n
#' @export
FastFindAnchors = function(
tmp.dir = NULL,
nCores = NULL,
k.filter = 100,
k.anchor = 5,
sample.cut = 50,
verbose = T
) {
nCores = nCores %||% parallel::detectCores()
object.ncells = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/others/object_ncells.rds"))
offsets = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/others/offsets.rds"))
features = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/others/features.rds"))
nSample = length(object.ncells)
combinations = expand.grid(1:nSample, 1:nSample)
combinations = combinations[combinations$Var1 < combinations$Var2, , drop = FALSE]
if (verbose == T) {
message("Finding all pairwise anchors")
}
if (nSample >= sample.cut) {
rna.list = pbmcapply::pbmclapply(
1:nSample, function(i) {
rna = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", i,".rds"))
rna = rna@assays$RNA@data[features,]
return(rna)
}
)
ncell = c()
for (i in 1:length(rna.list)) {
ncell = append(ncell, ncol(rna.list[[i]]))
}
rna.average = pbmcapply::pbmclapply(
rna.list, function(rna) {
rna = rowMeans(rna)
return(rna)
}, mc.cores = 20
)
rna.average = do.call(cbind, rna.average)
rna.cor = cor(rna.average)
sample.tree = Seurat:::BuildSampleTree(rna.cor)
median.nfeature = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/others/median_nfeature.rds"))
paired = c(1, 2)
for (j in 1:nrow(sample.tree)) {
idx = Seurat:::ParseMergePair(sample.tree, j)
if (sum(ncell[idx$object2]) > sum(ncell[idx$object1])) {
# if (max(median.nfeature[idx$object2]) > max(median.nfeature[idx$object1])) {
tmp = idx$object2
idx$object2 = idx$object1
idx$object1 = tmp
}
p = lapply(idx$object2, function(n) {
a = idx$object1[which(rank(-rna.cor[n, idx$object1]) <= 5)]
return(cbind(n, a))
})
paired = rbind(paired, do.call(rbind, p))
}
paired = paired[-1,]
paired = data.frame(paired)
colnames(paired) = c("Var1", "Var2")
rm(rna.list)
gc(reset = T)
combinations = paired[!duplicated(paired),]
}
all.anchors = pbmcapply::pbmclapply(
X = 1:nrow(x = combinations),
FUN = function(row) {
FindAnchorsPair(
combinations = combinations,
row = row,
offsets = offsets,
k.filter = k.filter,
k.anchor = k.anchor,
tmp.dir = tmp.dir,
features = features,
verbose = verbose
)
},
mc.cores = nCores
)
if (nSample == 2) {
all.anchors = all.anchors$value[[1]]
} else {
all.anchors = do.call(what = 'rbind', args = all.anchors)
}
all.anchors = rbind(all.anchors, all.anchors[, c(2, 1, 3)])
# AddDatasetID = getFromNamespace("AddDatasetID", "Seurat")
all.anchors = Seurat:::AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = object.ncells)
all.anchors = data.table::as.data.table(all.anchors)
data.table::setindex(all.anchors, dataset2)
data.table::setindex(all.anchors, dataset1)
if (verbose == T) {
message("Merging data")
}
if (nSample < sample.cut) {
anchor.group <- all.anchors %>% group_by(dataset1, dataset2) %>% summarise(n = n())
similarity.matrix = matrix(data = 0, ncol = nSample, nrow = nSample)
similarity.matrix[lower.tri(similarity.matrix, diag=FALSE) | upper.tri(similarity.matrix, diag=FALSE)] = anchor.group$n
ncell.a = matrix(data = NA, ncol = nSample, nrow = nSample)
for (i in 1:nSample) {
ncell.a[i,] = object.ncells[i]
}
ncell.b = matrix(data = NA, ncol = nSample, nrow = nSample)
for (i in 1:nSample) {
ncell.b[,i] = object.ncells[i]
}
ncel.min = pmin(ncell.a, ncell.b)
similarity.matrix = similarity.matrix/ncel.min
similarity.matrix[upper.tri(x = similarity.matrix, diag = TRUE)] = NA
# BuildSampleTree = getFromNamespace("BuildSampleTree", "Seurat")
sample.tree = Seurat:::BuildSampleTree(similarity.matrix)
rna.bind = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/1.rds"))
rna.bind = rna.bind@assays$RNA@data[features,]
for (i in 2:nSample) {
rna = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", i,".rds"))
rna.bind = cbind(rna.bind, rna@assays$RNA@data[features,])
}
} else {
idx = split(sample(1:nSample, size = nSample), cut(1:nSample, ceiling(nSample/sample.cut), labels = FALSE))
rna.list = pbmcapply::pbmclapply(
idx, function(i) {
rna.bind = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", i[1],".rds"))@assays$RNA@data[features,]
for (j in i[2:length(i)]) {
rna.bind = cbind(rna.bind, readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", j,".rds"))@assays$RNA@data[features,])
}
return(rna.bind)
}, mc.cores = length(idx)
)
rna.bind = do.call(cbind, rna.list)
}
rna.bind = ScaleData(rna.bind, verbose = F)
pca = RunPCA(rna.bind, verbose = F)
saveRDS(pca, paste0(tmp.dir, "/FastIntegrationTmp/others/raw_pca.rds"), compress = F)
saveRDS(sample.tree, paste0(tmp.dir, "/FastIntegrationTmp/others/sample_tree.rds"), compress = F)
saveRDS(all.anchors, paste0(tmp.dir, "/FastIntegrationTmp/anchors/anchors.rds"), compress = F)
return(NULL)
}
FindAnchorsPair = function(
tmp.dir = NULL,
combinations,
row,
offsets,
k.filter,
k.anchor,
features,
verbose= F
) {
i = combinations[row, 1]
j = combinations[row, 2]
object.list.1 = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", i, ".rds"))
object.list.2 = readRDS(paste0(tmp.dir, "/FastIntegrationTmp/raw/", j, ".rds"))
object.list.1@assays$RNA@counts = object.list.1@assays$RNA@data
object.list.2@assays$RNA@counts = object.list.2@assays$RNA@data
features = c(Seurat::VariableFeatures(object.list.1),Seurat::VariableFeatures(object.list.2))
features = names(sort(-table(features)))[1:2000]
object.1 = subset(x = object.list.1, features = features)
object.2 = subset(x = object.list.2, features = features)
ncell1 = ncol(object.1)
ncell2 = ncol(object.2)
object.1 = Seurat::ScaleData(object.1, features = features, verbose = F)
object.2 = Seurat::ScaleData(object.2, features = features, verbose = F)
object.1[["ToIntegrate"]] = object.1[["RNA"]]
DefaultAssay(object = object.1) = "ToIntegrate"
object.2[["ToIntegrate"]] = object.2[["RNA"]]
DefaultAssay(object = object.2) = "ToIntegrate"
object.pair = Seurat:::RunCCA(
object1 = object.1,
object2 = object.2,
assay1 = "ToIntegrate",
assay2 = "ToIntegrate",
features = features,
renormalize = FALSE,
rescale = FALSE,
num.cc = 30
)
object.pair = Seurat::L2Dim(object = object.pair, reduction = "cca")
anchors = Seurat:::FindAnchors(
object.pair = object.pair,
assay = c("ToIntegrate", "ToIntegrate"),
slot = "data",
cells1 = colnames(x = object.1),
cells2 = colnames(x = object.2),
internal.neighbors = list(NULL, NULL),
reduction = "cca.l2",
nn.reduction = "cca",
k.filter = k.filter,
k.anchor = k.anchor,
nn.method = "rann",
dims = 1:30, verbose = F
)
anchors[, 1] = anchors[, 1] + offsets[i]
anchors[, 2] = anchors[, 2] + offsets[j]
return(anchors)
}
#' @export
BuildIntegrationFile = function(
rna.list,
tmp.dir = NULL,
nCores = NULL,
nfeatures = 2000,
verbose = F
) {
if (verbose == T) {
message("Checking the rna list")
}
rna.list = SeuratObject:::CheckDuplicateCellNames(rna.list)
nCores = nCores %||% parallel::detectCores()
if (is.null(tmp.dir)) {
message("Not specify the temporary directory, will create one in current directory")
tmp.dir = "./"
}
if(file.exists(tmp.dir) == F) {
stop("The temporary directory not exist")
} else {
dir.create(paste0(tmp.dir, "/FastIntegrationTmp"))
dir.create(paste0(tmp.dir, "/FastIntegrationTmp/raw"))
dir.create(paste0(tmp.dir, "/FastIntegrationTmp/anchors"))
dir.create(paste0(tmp.dir, "/FastIntegrationTmp/others"))
dir.create(paste0(tmp.dir, "/FastIntegrationTmp/inte"))
}
if (verbose == T) {
message("Splitting and saving rna list in tmp dir")
}
pbmcapply::pbmclapply(
1:length(rna.list), function(i) {
rna = rna.list[[i]]
rna@assays$RNA@counts = matrix(0)
saveRDS(rna, paste0(tmp.dir, "/FastIntegrationTmp/raw/", i, ".rds"), compress = F)
return(NULL)
}, mc.cores = nCores
)
features = Seurat::SelectIntegrationFeatures(rna.list, nfeatures = nfeatures, verbose = F)
object.ncells = sapply(X = rna.list, FUN = function(x) dim(x = x)[2])
offsets = as.vector(x = cumsum(x = c(0, object.ncells)))[1:length(x = rna.list)]
median.nfeature = unlist(lapply(rna.list, function(rna){
return(median(rna$nFeature_RNA))
}))
saveRDS(features, paste0(tmp.dir, "/FastIntegrationTmp/others/features.rds"))
saveRDS(object.ncells, paste0(tmp.dir, "/FastIntegrationTmp/others/object_ncells.rds"))
saveRDS(median.nfeature, paste0(tmp.dir, "/FastIntegrationTmp/others/median_nfeature.rds"))
saveRDS(offsets, paste0(tmp.dir, "/FastIntegrationTmp/others/offsets.rds"))
return(NULL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.