# Logistic transformation
logistic <- function(x,scale = NULL, center = NULL, invert = F){
a <- ifelse(invert,1, -1)
if(is.null(scale)) scale = sd(x)
if(is.null(center)) center = mean(x)
sigm <- 1/(1+ exp(a*(x-center)/scale))
return(sigm)
}
# Mark anchor as inconsistent based on cell-type labeling
# Anchors will be consider as inconsistent if they are linking cells classified as different cell populations.
inconsistent_anchors <- function(anchors, seed=123,
cell.labels=NULL,
label.confidence=1,
quantile_ss=0){
if (! cell.labels %in% colnames(anchors@object.list[[1]]@meta.data)) {
stop(sprintf("Please specify a valid metadata column with label annotations (cell.labels). %s not found", cell.labels))
}
labels <- data.frame()
for (i in seq_along(anchors@object.list)){
x <- anchors@object.list[[i]]
labels <- rbind(labels, data.frame(dataset = i, ncell = 1:ncol(x),
label = as.character(x@meta.data[,cell.labels])))
}
labels[,"dataset_cell"] <- paste0(labels[,"dataset"], "_", labels[,"ncell"])
#Convert NAs to 'unknown'
nas <- is.na(labels[,"label"])
labels[nas, "label"] <- "unknown"
nas <- labels[, "label"] == "NaN"
labels[nas, "label"] <- "unknown"
#Exclude 'Multi' labels
multis <- tolower(labels[, "label"]) == "multi"
labels[multis, "label"] <- "unknown"
df <- anchors@anchors
df[,"dataset1_cell1"] <- paste0(df[,"dataset1"], "_", df[,"cell1"])
df[,"dataset2_cell2"] <- paste0(df[,"dataset2"], "_", df[,"cell2"])
order1 <- match(df$dataset1_cell1, labels$dataset_cell)
df[,"Label_1"] <- labels[order1, "label"]
order2 <- match(df$dataset2_cell2, labels$dataset_cell)
df[,"Label_2"] <- labels[order2, "label"]
#Flag consistency of labels
df$Consistent <- df$Label_1 == df$Label_2 | df$Label_1 == "unknown" | df$Label_2 == "unknown"
df$Consistent[is.na(df$Consistent)] <- TRUE
#Rescue some anchors with a given probability
df <- probabilistic_reject(anchors=df, accept_rate = 1-label.confidence,
q = quantile_ss, seed = seed)
anchors@anchors <- df
return(anchors)
}
#Re-weight anchors based on combined seurat anchor score and rPCA distance
reweight_anchors <- function(ref.anchors, alpha=0.8,
dist.pct = 0.5,
dist.scale.factor = 2)
{
df <- ref.anchors@anchors
eps <- 10^(-6) #small number to avoid log(0)
dist.mean.center = quantile(df$dist.mean,dist.pct)
dist.mean.scale = sd(df$dist.mean,na.rm = T)/dist.scale.factor
squash <- logistic(x = df$dist.mean, invert = T,
center = dist.mean.center,
scale = dist.mean.scale)
#weighted geometric mean
logsum = alpha*log(squash + eps) + (1-alpha)*log(df$knn.score + eps)
df$score <- exp(logsum) - eps
ref.anchors@anchors <- df
return(ref.anchors)
}
# Probabilistic rejection of inconsistent anchors
# accept_rate: probability of rejecting anchors when labels are inconsistent
# q: anchor score quantile above which probabilistic rejection applies
probabilistic_reject <- function(anchors, accept_rate=0, q=0, seed=seed){
set.seed(seed)
e0 <- quantile(anchors$score, q)
pos <- anchors[anchors$Consistent==FALSE, "score"] - e0
bin <- as.numeric(pos > 0)
#Randomly accept anchors with accept_rate probability above quantile=q
accept <- bin*accept_rate > runif(length(bin), 0, 1)
#Add this column
flag <- anchors$Consistent
flag[flag==FALSE] <- accept
anchors['Retain_ss'] <- flag
return(anchors)
}
# To be used in the distance matrix among datasets (when building SampleTree integration)
strength_function <- function(anchor.df,
method = "weight.sum",
usecol = "score"
){
if(method== "counts") {
strength <- nrow(anchor.df)
}
if(method== "mean.score") {
if(usecol == "dist.mean") strength <- mean(1-anchor.df[,usecol])
if(usecol == "score") strength <- mean(anchor.df[,usecol])
}
if(method== "weight.sum") {
if(usecol == "dist.mean") strength <- sum((1-anchor.df[,usecol]))
if(usecol == "score") strength <- sum((anchor.df[,usecol]))
}
return(strength)
}
#aux function to determine anchor threshold from pairwise anchor score percentile
threshold_from_percentile <- function(anchors, dist.pct=0.8) {
i=1
min.pcntile <- vector()
for (l1 in lev1) {
j=1
pcntile <- vector()
for (l2 in lev2) {
v <- subset(ref.anchors@anchors, subset=(dataset1==l1 & dataset2==l2))$dist.mean
if(length(v)>0) {
pcntile[j] <- sort(v)[dist.pct*length(v)]
j=j+1
}
}
min.pcntile[i] <- min(pcntile)
i=i+1
}
dist.thr <- min(min.pcntile)
return(dist.thr)
}
# Compute similarity matrix betwen datasets based on different criteria
weighted.Anchors.STACAS <- function(
anchor.df,
offsets,
obj.lengths,
usecol = "score",
method = "weight.sum"
) {
usecol = match.arg(arg = usecol ,choices = c("dist.mean","score"))
method = match.arg(arg = method ,choices = c("weight.sum","counts"))
similarity.matrix <- matrix(data = 0, ncol = length(x = offsets), nrow = length(x = offsets))
similarity.matrix[upper.tri(x = similarity.matrix, diag = TRUE)] <- NA
total.cells <- sum(obj.lengths)
offsets <- c(offsets, total.cells)
for (i in 1:nrow(x = similarity.matrix)){
for (j in 1:ncol(x = similarity.matrix)){
if (!is.na(x = similarity.matrix[i, j])){
relevant.rows <- anchor.df[(anchor.df$dataset1 %in% c(i, j)) & (anchor.df$dataset2 %in% c(i, j)), ]
score <- strength_function(relevant.rows, method = method, usecol = usecol)/2
ncell <- min(obj.lengths[[i]], obj.lengths[[j]])
similarity.matrix[i, j] <- score / ncell
}
}
}
return(similarity.matrix)
}
get.blocklist = function(obj) {
data("genes.blocklist")
mm <- intersect(unlist(genes.blocklist$Mm), rownames(obj))
hs <- intersect(unlist(genes.blocklist$Hs), rownames(obj))
if (length(mm) > length(hs)) {
return(genes.blocklist$Mm)
} else {
return(genes.blocklist$Hs)
}
}
check.genes <- function(obj.list) {
allgenes <- rownames(obj.list[[1]])
lgt <- length(obj.list)
for (i in 2:lgt) {
allgenes <- c(allgenes, rownames(obj.list[[i]]))
}
frac <- table(allgenes)/lgt
found.in.all <- names(frac)[frac == 1]
frac.all <- length(found.in.all) / length(unique(allgenes))
if (frac.all < 1) {
message(sprintf("%.1f %% of genes found across all datasets", 100*frac.all))
}
if (frac.all < 0.5) {
warning("More than half of the genes were not consistently found across datasets. Please check that datasets use the same gene symbols")
}
return(found.in.all)
}
# Count anchors between data sets
# IMPORTED from Seurat 3.2.1
CountAnchors.Seurat <- function(
anchor.df,
offsets,
obj.lengths
) {
similarity.matrix <- matrix(data = 0, ncol = length(x = offsets), nrow = length(x = offsets))
similarity.matrix[upper.tri(x = similarity.matrix, diag = TRUE)] <- NA
total.cells <- sum(obj.lengths)
offsets <- c(offsets, total.cells)
for (i in 1:nrow(x = similarity.matrix)){
for (j in 1:ncol(x = similarity.matrix)){
if (!is.na(x = similarity.matrix[i, j])){
relevant.rows <- anchor.df[(anchor.df$dataset1 %in% c(i, j)) & (anchor.df$dataset2 %in% c(i, j)), ]
score <- nrow(x = relevant.rows)
ncell <- min(obj.lengths[[i]], obj.lengths[[j]])
similarity.matrix[i, j] <- score / ncell
}
}
}
return(similarity.matrix)
}
# Adjust sample tree to only include given reference objects
# IMPORTED from Seurat 3.2.1
AdjustSampleTree.Seurat <- function(x, reference.objects) {
for (i in 1:nrow(x = x)) {
obj.id <- -(x[i, ])
if (obj.id[[1]] > 0) {
x[i, 1] <- -(reference.objects[[obj.id[[1]]]])
}
if (obj.id[[2]] > 0) {
x[i, 2] <- -(reference.objects[[obj.id[[2]]]])
}
}
return(x)
}
###Modified function to return anchor distances together with anchor pairs - distances can be used to filter anchors at a later stage
FindIntegrationAnchors.wdist <- function(
object.list = NULL,
assay = NULL,
reference = NULL,
normalization.method = "LogNormalize",
anchor.features = 2000,
l2.norm = TRUE,
dims = 1:10,
k.anchor = 5,
k.filter = 200,
k.score = 30,
max.features = 200,
verbose = TRUE
) {
reduction <- "pca"
nn.method = "rann"
eps = 0
object.ncells <- sapply(X = object.list, FUN = function(x) dim(x = x)[2])
if (any(object.ncells <= max(dims))) {
bad.obs <- which(x = object.ncells <= max(dims))
stop("Max dimension too large: objects ", paste(bad.obs, collapse = ", "),
" contain fewer than ", max(dims), " cells. \n Please specify a",
" maximum dimensions that is less than the number of cells in any ",
"object (", min(object.ncells), ").")
}
if (!is.null(x = assay)) {
if (length(x = assay) != length(x = object.list)) {
stop("If specifying the assay, please specify one assay per object in the object.list")
}
object.list <- sapply(
X = 1:length(x = object.list),
FUN = function(x) {
DefaultAssay(object = object.list[[x]]) <- assay[x]
return(object.list[[x]])
}
)
} else {
assay <- sapply(X = object.list, FUN = DefaultAssay)
}
object.list <- Seurat:::CheckDuplicateCellNames(object.list)
slot <- "data"
if (is.numeric(anchor.features)) {
if (verbose) {
message("Computing ", anchor.features, " integration features")
}
anchor.features <- SelectIntegrationFeatures(
object.list = object.list,
nfeatures = anchor.features,
assay = assay
)
}
nn.reduction <- reduction
# if using pca, only need to compute the internal neighborhood structure once
# for each dataset
internal.neighbors <- list()
if (nn.reduction == "pca") {
k.filter <- NA
if (verbose) {
message("Computing within dataset neighborhoods")
}
k.neighbor <- max(k.anchor, k.score)
internal.neighbors <- pbapply::pblapply(
X = 1:length(x = object.list),
FUN = function(x) {
Seurat:::NNHelper(
data = Embeddings(object = object.list[[x]][[nn.reduction]])[, dims],
k = k.neighbor + 1,
method = nn.method,
eps = eps
)
}
)
}
# determine pairwise combinations
combinations <- expand.grid(1:length(x = object.list), 1:length(x = object.list))
combinations <- combinations[combinations$Var1 < combinations$Var2, , drop = FALSE]
# determine the proper offsets for indexing anchors
objects.ncell <- sapply(X = object.list, FUN = ncol)
offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)]
if (is.null(x = reference)) {
# All pairwise combinations
if (verbose) {
message("Finding all pairwise anchors")
}
} else {
reference <- unique(sort(reference))
if (max(reference) > length(object.list)) {
stop('Error: requested reference object ', max(reference), " but only ",
length(object.list), " objects provided")
}
# modify the combinations matrix to retain only R-R and R-Q comparisons
if (verbose) {
message("Finding anchors between all query and reference datasets")
ok.rows <- (combinations$Var1 %in% reference) | (combinations$Var2 %in% reference)
combinations <- combinations[ok.rows, ]
}
}
# determine all anchors
all.anchors <- pbapply::pblapply(
X = 1:nrow(x = combinations),
FUN = function(row) {
i <- combinations[row, 1]
j <- combinations[row, 2]
suppressWarnings(
object.1 <- DietSeurat(
object = object.list[[i]],
assays = assay[i],
features = anchor.features,
counts = FALSE,
scale.data = TRUE,
dimreducs = reduction
))
suppressWarnings(
object.2 <- DietSeurat(
object = object.list[[j]],
assays = assay[j],
features = anchor.features,
counts = FALSE,
scale.data = TRUE,
dimreducs = reduction
))
# suppress key duplication warning
suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[assay[i]]])
DefaultAssay(object = object.1) <- "ToIntegrate"
if (reduction %in% Reductions(object = object.1)) {
slot(object = object.1[[reduction]], name = "assay.used") <- "ToIntegrate"
}
object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)
suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[assay[j]]])
DefaultAssay(object = object.2) <- "ToIntegrate"
if (reduction %in% Reductions(object = object.2)) {
slot(object = object.2[[reduction]], name = "assay.used") <- "ToIntegrate"
}
object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)
common.features <- intersect(
x = rownames(x = Loadings(object = object.1[["pca"]])),
y = rownames(x = Loadings(object = object.2[["pca"]]))
)
object.pair <- merge(x = object.1, y = object.2, merge.data = TRUE)
projected.embeddings.1<- t(x = GetAssayData(object = object.1, slot = "scale.data")[common.features, ]) %*%
Loadings(object = object.2[["pca"]])[common.features, ]
object.pair[['projectedpca.1']] <- CreateDimReducObject(
embeddings = rbind(projected.embeddings.1, Embeddings(object = object.2[["pca"]])),
assay = DefaultAssay(object = object.1),
key = "projectedpca1_"
)
projected.embeddings.2 <- t(x = GetAssayData(object = object.2, slot = "scale.data")[common.features, ]) %*%
Loadings(object = object.1[["pca"]])[common.features, ]
object.pair[['projectedpca.2']] <- CreateDimReducObject(
embeddings = rbind(projected.embeddings.2, Embeddings(object = object.1[["pca"]])),
assay = DefaultAssay(object = object.2),
key = "projectedpca2_"
)
object.pair[["pca"]] <- CreateDimReducObject(
embeddings = rbind(
Embeddings(object = object.1[["pca"]]),
Embeddings(object = object.2[["pca"]])),
assay = DefaultAssay(object = object.1),
key = "pca_"
)
slot(object = object.pair[["projectedpca.1"]], name = "cell.embeddings") <- Sweep(
x = Embeddings(object = object.pair[["projectedpca.1"]]),
MARGIN = 2,
STATS = apply(X = Embeddings(object = object.pair[["projectedpca.1"]]), MARGIN = 2, FUN = sd),
FUN = "/"
)
slot(object = object.pair[["projectedpca.2"]], name = "cell.embeddings") <- Sweep(
x = Embeddings(object = object.pair[["projectedpca.2"]]),
MARGIN = 2,
STATS = apply(X = Embeddings(object = object.pair[["projectedpca.2"]]), MARGIN = 2, FUN = sd),
FUN = "/"
)
object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.1")
object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.2")
reduction <- "projectedpca.1.l2"
reduction.2 <- "projectedpca.2.l2"
internal.neighbors <- internal.neighbors[c(i, j)]
anchors <- FindAnchors.wdist(
object.pair = object.pair,
assay = c("ToIntegrate", "ToIntegrate"),
slot = slot,
cells1 = colnames(x = object.1),
cells2 = colnames(x = object.2),
internal.neighbors = internal.neighbors,
reduction = reduction,
reduction.2 = reduction.2,
nn.reduction = nn.reduction,
dims = dims,
k.anchor = k.anchor,
k.filter = k.filter,
k.score = k.score,
max.features = max.features,
nn.method = nn.method,
eps = eps,
verbose = verbose
)
anchors[, 1] <- anchors[, 1] + offsets[i]
anchors[, 2] <- anchors[, 2] + offsets[j]
return(anchors)
}
)
all.anchors <- do.call(what = 'rbind', args = all.anchors)
all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3, 5, 4)]) ##keep distance information
all.anchors <- AddDatasetID.2(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell) ##add dataset IDs
command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE)
anchor.set <- new(Class = "IntegrationAnchorSet",
object.list = object.list,
reference.objects = reference %||% seq_along(object.list),
anchors = all.anchors,
offsets = offsets,
anchor.features = anchor.features,
command = command
)
return(anchor.set)
}
#First (otherwise second) function
`%||%` <- function(lhs, rhs) {
if (!is.null(x = lhs)) {
return(lhs)
} else {
return(rhs)
}
}
##Modified Sweep function (from Seurat 3.1)
Sweep <- function(x, MARGIN, STATS, FUN = '-', check.margin = TRUE, ...) {
if (any(grepl(pattern = 'X', x = names(x = formals(fun = sweep))))) {
return(sweep(
X = x,
MARGIN = MARGIN,
STATS = STATS,
FUN = FUN,
check.margin = check.margin,
...
))
} else {
return(sweep(
x = x,
MARGIN = MARGIN,
STATS = STATS,
FUN = FUN,
check.margin = check.margin,
...
))
}
}
# Find nearest neighbors
# IMPORTED from Seurat 3.2.1
FindNN.Seurat <- function(
object,
cells1 = NULL,
cells2 = NULL,
internal.neighbors,
grouping.var = NULL,
dims = 1:10,
reduction = "cca.l2",
reduction.2 = character(),
nn.dims = dims,
nn.reduction = reduction,
k = 300,
nn.method = "rann",
eps = 0,
integration.name = 'integrated',
verbose = TRUE
) {
if (xor(x = is.null(x = cells1), y = is.null(x = cells2))) {
stop("cells1 and cells2 must both be specified")
}
if (!is.null(x = cells1) && !is.null(x = cells2) && !is.null(x = grouping.var)) {
stop("Specify EITHER grouping.var or cells1/2.")
}
if (is.null(x = cells1) && is.null(x = cells2) && is.null(x = grouping.var)) {
stop("Please set either cells1/2 or grouping.var")
}
if (!is.null(x = grouping.var)) {
if (nrow(x = unique(x = object[[grouping.var]])) != 2) {
stop("Number of groups in grouping.var not equal to 2.")
}
groups <- names(x = sort(x = table(object[[grouping.var]]), decreasing = TRUE))
cells1 <- colnames(x = object)[object[[grouping.var]] == groups[[1]]]
cells2 <- colnames(x = object)[object[[grouping.var]] == groups[[2]]]
}
if (verbose) {
message("Finding neighborhoods")
}
if (!is.null(x = internal.neighbors[[1]])) {
nnaa <- internal.neighbors[[1]]
nnbb <- internal.neighbors[[2]]
} else {
dim.data.self <- Embeddings(object = object[[nn.reduction]])[ ,nn.dims]
dims.cells1.self <- dim.data.self[cells1, ]
dims.cells2.self <- dim.data.self[cells2, ]
nnaa <- Seurat:::NNHelper(
data = dims.cells1.self,
k = k + 1,
method = nn.method,
eps = eps
)
nnbb <- Seurat:::NNHelper(
data = dims.cells2.self,
k = k + 1,
method = nn.method,
eps = eps
)
}
if (length(x = reduction.2) > 0) {
nnab <- Seurat:::NNHelper(
data = Embeddings(object = object[[reduction.2]])[cells2, ],
query = Embeddings(object = object[[reduction.2]])[cells1, ],
k = k,
method = nn.method,
eps = eps
)
nnba <- Seurat:::NNHelper(
data = Embeddings(object = object[[reduction]])[cells1, ],
query = Embeddings(object = object[[reduction]])[cells2, ],
k = k,
method = nn.method,
eps = eps
)
} else {
dim.data.opposite <- Embeddings(object = object[[reduction]])[ ,dims]
dims.cells1.opposite <- dim.data.opposite[cells1, ]
dims.cells2.opposite <- dim.data.opposite[cells2, ]
nnab <- Seurat:::NNHelper(
data = dims.cells2.opposite,
query = dims.cells1.opposite,
k = k,
method = nn.method,
eps = eps
)
nnba <- Seurat:::NNHelper(
data = dims.cells1.opposite,
query = dims.cells2.opposite,
k = k,
method = nn.method,
eps = eps
)
}
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = 'neighbors',
new.data = list('nnaa' = nnaa, 'nnab' = nnab, 'nnba' = nnba, 'nnbb' = nnbb, 'cells1' = cells1, 'cells2' = cells2)
)
return(object)
}
# Find Anchor pairs
# IMPORTED from Seurat 3.2.1
FindAnchorPairs.Seurat <- function(
object,
integration.name = 'integrated',
k.anchor = 5,
verbose = TRUE
) {
neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors')
max.nn <- c(ncol(x = neighbors$nnab), ncol(x = neighbors$nnba))
if (any(k.anchor > max.nn)) {
message(paste0('warning: requested k.anchor = ', k.anchor, ', only ', min(max.nn), ' in dataset'))
k.anchor <- min(max.nn)
}
if (verbose) {
message("Finding anchors")
}
# convert cell name to neighbor index
nn.cells1 <- neighbors$cells1
nn.cells2 <- neighbors$cells2
cell1.index <- suppressWarnings(which(colnames(x = object) == nn.cells1, arr.ind = TRUE))
ncell <- 1:nrow(x = neighbors$nnab)
ncell <- ncell[ncell %in% cell1.index]
anchors <- list()
# pre allocate vector
anchors$cell1 <- rep(x = 0, length(x = ncell) * 5)
anchors$cell2 <- anchors$cell1
anchors$score <- anchors$cell1 + 1
idx <- 0
indices.ab <- Indices(object = neighbors$nnab)
indices.ba <- Indices(object = neighbors$nnba)
for (cell in ncell) {
neighbors.ab <- indices.ab[cell, 1:k.anchor]
mutual.neighbors <- which(
x = indices.ba[neighbors.ab, 1:k.anchor, drop = FALSE] == cell,
arr.ind = TRUE
)[, 1]
for (i in neighbors.ab[mutual.neighbors]){
idx <- idx + 1
anchors$cell1[idx] <- cell
anchors$cell2[idx] <- i
anchors$score[idx] <- 1
}
}
anchors$cell1 <- anchors$cell1[1:idx]
anchors$cell2 <- anchors$cell2[1:idx]
anchors$score <- anchors$score[1:idx]
anchors <- t(x = do.call(what = rbind, args = anchors))
anchors <- as.matrix(x = anchors)
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = 'anchors',
new.data = anchors
)
if (verbose) {
message(paste0("\tFound ", nrow(x = anchors), " anchors"))
}
return(object)
}
# Top dim features
# IMPORTED from Seurat 3.2.1
TopDimFeatures.Seurat <- function(
object,
reduction,
dims = 1:10,
features.per.dim = 100,
max.features = 200,
projected = FALSE
) {
dim.reduction <- object[[reduction]]
max.features <- max(length(x = dims) * 2, max.features)
num.features <- sapply(X = 1:features.per.dim, FUN = function(y) {
length(x = unique(x = as.vector(x = sapply(X = dims, FUN = function(x) {
unlist(x = TopFeatures(object = dim.reduction, dim = x, nfeatures = y, balanced = TRUE, projected = projected))
}))))
})
max.per.pc <- which.max(x = num.features[num.features < max.features])
features <- unique(x = as.vector(x = sapply(X = dims, FUN = function(x) {
unlist(x = TopFeatures(object = dim.reduction, dim = x, nfeatures = max.per.pc, balanced = TRUE, projected = projected))
})))
features <- unique(x = features)
return(features)
}
# Filter anchors
# IMPORTED from Seurat 3.2.1
FilterAnchors.Seurat <- function(
object,
assay = NULL,
slot = "data",
integration.name = 'integrated',
features = NULL,
k.filter = 200,
nn.method = "rann",
eps = 0,
verbose = TRUE
) {
if (verbose) {
message("Filtering anchors")
}
assay <- assay %||% DefaultAssay(object = object)
features <- features %||% VariableFeatures(object = object)
if (length(x = features) == 0) {
stop("No features provided and no VariableFeatures computed.")
}
features <- unique(x = features)
neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors')
nn.cells1 <- neighbors$cells1
nn.cells2 <- neighbors$cells2
cn.data1 <- L2Norm(
mat = as.matrix(x = t(x = GetAssayData(
object = object[[assay[1]]],
slot = slot)[features, nn.cells1])),
MARGIN = 1)
cn.data2 <- L2Norm(
mat = as.matrix(x = t(x = GetAssayData(
object = object[[assay[2]]],
slot = slot)[features, nn.cells2])),
MARGIN = 1)
nn <- NNHelper(
data = cn.data2[nn.cells2, ],
query = cn.data1[nn.cells1, ],
k = k.filter,
method = nn.method,
eps = eps
)
anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors")
position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) {
which(x = anchors[x, "cell2"] == Indices(object = nn)[anchors[x, "cell1"], ])[1]
})
anchors <- anchors[!is.na(x = position), ]
if (verbose) {
message("\tRetained ", nrow(x = anchors), " anchors")
}
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = "anchors",
new.data = anchors
)
return(object)
}
# Score Anchors
# IMPORTED from Seurat 3.2.1
ScoreAnchors.Seurat <- function(
object,
assay = NULL,
integration.name = 'integrated',
verbose = TRUE,
k.score = 30,
do.cpp = TRUE
) {
assay <- assay %||% DefaultAssay(object = object)
anchor.df <- as.data.frame(x = GetIntegrationData(object = object, integration.name = integration.name, slot = 'anchors'))
neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "neighbors")
offset <- length(x = neighbors$cells1)
indices.aa <- Indices(object = neighbors$nnaa)
indices.bb <- Indices(object = neighbors$nnbb)
indices.ab <- Indices(object = neighbors$nnab)
indices.ba <- Indices(object = neighbors$nnba)
nbrsetA <- function(x) c(indices.aa[x, 1:k.score], indices.ab[x, 1:k.score] + offset)
nbrsetB <- function(x) c(indices.ba[x, 1:k.score], indices.bb[x, 1:k.score] + offset)
# score = number of shared neighbors
anchor.new <- data.frame(
'cell1' = anchor.df[, 1],
'cell2' = anchor.df[, 2],
'score' = mapply(
FUN = function(x, y) {
length(x = intersect(x = nbrsetA(x = x), nbrsetB(x = y)))},
anchor.df[, 1],
anchor.df[, 2]
)
)
# normalize the score
max.score <- quantile(anchor.new$score, 0.9)
min.score <- quantile(anchor.new$score, 0.01)
anchor.new$score <- anchor.new$score - min.score
anchor.new$score <- anchor.new$score / (max.score - min.score)
anchor.new$score[anchor.new$score > 1] <- 1
anchor.new$score[anchor.new$score < 0] <- 0
anchor.new <- as.matrix(x = anchor.new)
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = 'anchors',
new.data = anchor.new
)
return(object)
}
###Find Anchors functions, modified from Seurat 3.1 to return the distance in PC space between pairs of datasets
FindAnchors.wdist <- function(
object.pair,
assay,
slot,
cells1,
cells2,
internal.neighbors,
reduction,
reduction.2 = character(),
nn.reduction = reduction,
dims = 1:10,
k.anchor = 5,
k.filter = 200,
k.score = 30,
max.features = 200,
nn.method = "rann",
eps = 0,
projected = FALSE,
verbose = TRUE
) {
# compute local neighborhoods, use max of k.anchor and k.score if also scoring to avoid
# recomputing neighborhoods
k.neighbor <- k.anchor
if (!is.na(x = k.score)) {
k.neighbor <- max(k.anchor, k.score)
}
object.pair <- FindNN.Seurat(
object = object.pair,
cells1 = cells1,
cells2 = cells2,
internal.neighbors = internal.neighbors,
dims = dims,
reduction = reduction,
reduction.2 = reduction.2,
nn.reduction = nn.reduction,
k = k.neighbor,
nn.method = nn.method,
eps = eps,
verbose = verbose
)
object.pair <- FindAnchorPairs.Seurat(
object = object.pair,
integration.name = "integrated",
k.anchor = k.anchor,
verbose = verbose
)
if (!is.na(x = k.filter)) {
top.features <- TopDimFeatures.Seurat(
object = object.pair,
reduction = reduction,
dims = dims,
features.per.dim = 100,
max.features = max.features,
projected = projected
)
object.pair <- FilterAnchors.Seurat(
object = object.pair,
assay = assay,
slot = slot,
integration.name = 'integrated',
features = top.features,
k.filter = k.filter,
nn.method = nn.method,
eps = eps,
verbose = verbose
)
}
if (!is.na(x = k.score)) {
object.pair = ScoreAnchors.Seurat(
object = object.pair,
assay = DefaultAssay(object = object.pair),
integration.name = "integrated",
verbose = verbose,
k.score = k.score
)
}
###Return distances
anc.tab <- object.pair@tools$integrated@anchors
d1.2 <- numeric(length = dim(anc.tab)[1])
d2.1 <- numeric(length = dim(anc.tab)[1])
for (r in 1:dim(anc.tab)[1]) {
c1 <- anc.tab[r,"cell1"]
c2 <- anc.tab[r,"cell2"]
d1.2[r] <- object.pair@tools$integrated@neighbors$nnab@nn.dist[c1, which(object.pair@tools$integrated@neighbors$nnab@nn.idx[c1,] == c2 )]
d2.1[r] <- object.pair@tools$integrated@neighbors$nnba@nn.dist[c2, which(object.pair@tools$integrated@neighbors$nnba@nn.idx[c2,] == c1 )]
}
object.pair@tools$integrated@anchors <- cbind(object.pair@tools$integrated@anchors, dist1.2=d1.2)
object.pair@tools$integrated@anchors <- cbind(object.pair@tools$integrated@anchors, dist2.1=d2.1)
anchors <- GetIntegrationData(
object = object.pair,
integration.name = 'integrated',
slot = 'anchors'
)
return(anchors)
}
#Modified from Seurat 4.1.1
AddDatasetID.2 <- function(
anchor.df,
offsets,
obj.lengths
) {
ndataset <- length(x = offsets)
total.cells <- sum(obj.lengths)
offsets <- c(offsets, total.cells)
row.offset <- rep.int(x = offsets[1:ndataset], times = obj.lengths)
dataset <- rep.int(x = 1:ndataset, times = obj.lengths)
anchor.df <- data.frame(
'cell1' = anchor.df[, 1] - row.offset[anchor.df[, 1]],
'cell2' = anchor.df[, 2] - row.offset[anchor.df[, 2]],
'score' = anchor.df[, 3],
'dataset1' = dataset[anchor.df[, 1]],
'dataset2' = dataset[anchor.df[, 2]],
'dist1.2' = anchor.df[, 4],
'dist2.1' = anchor.df[, 5]
)
return(anchor.df)
}
#Modified from Seurat 4.1.1
ParseMergePair.local <- function(clustering, i){
# return 2-element list of datasets in first and second object
datasets <- list('object1' = clustering[i, 1], 'object2' = clustering[i, 2])
if (datasets$object1 > 0) {
datasets$object1 <- ParseRow(clustering, datasets$object1)
}
if (datasets$object2 > 0) {
datasets$object2 <- ParseRow(clustering, datasets$object2)
}
datasets$object1 <- abs(x = datasets$object1)
datasets$object2 <- abs(x = datasets$object2)
return(datasets)
}
#Modified from Seurat 4.1.1
ParseRow <- function(clustering, i){
# returns vector of datasets
datasets <- as.list(x = clustering[i, ])
if (datasets[[1]] > 0) {
datasets[[1]] <- ParseRow(clustering = clustering, i = datasets[[1]])
}
if (datasets[[2]] > 0) {
datasets[[2]] <- ParseRow(clustering = clustering, i = datasets[[2]])
}
return(unlist(datasets))
}
#Modified from Seurat 4.1.1
PairwiseIntegrateReference.STACAS <- function(
anchorset,
new.assay.name = "integrated",
features = NULL,
features.to.integrate = NULL,
normalization.method = "LogNormalize",
dims = 1:30,
k.weight = 100,
weight.reduction = NULL,
sd.weight = 1,
sample.tree = NULL,
preserve.order = TRUE,
verbose = TRUE
) {
nobj <- length(anchorset@object.list)
reference.objects <- slot(object = anchorset, name = "reference.objects")
features <- features %||% slot(object = anchorset, name = "anchor.features")
features.to.integrate <- features.to.integrate %||% features
if (length(reference.objects) == 1) {
ref.obj <- anchorset@object.list[[reference.objects]]
ref.obj[[new.assay.name]] <- CreateAssayObject(
data = GetAssayData(ref.obj, slot = 'data')[features.to.integrate, ]
)
DefaultAssay(object = ref.obj) <- new.assay.name
return(ref.obj)
}
anchors <- slot(object = anchorset, name = "anchors")
offsets <- slot(object = anchorset, name = "offsets")
objects.ncell <- sapply(anchorset@object.list, FUN = ncol)
if (!is.null(weight.reduction)) {
if (length(weight.reduction) == 1 | inherits(weight.reduction, what = "DimReduc")) {
if (nobj == 2) {
weight.reduction <- list(NULL, weight.reduction)
} else if (inherits(weight.reduction, what = "character")) {
weight.reduction <- as.list(rep(weight.reduction, times = nobj))
} else {
stop("Invalid input for weight.reduction. Please specify either the names of the dimension",
"reduction for each object in the list or provide DimReduc objects.")
}
}
if (length(weight.reduction) != nobj) {
stop("Please specify a dimension reduction for each object, or one dimension reduction to be used for all objects")
}
if (inherits(weight.reduction, what = "character")) {
weight.reduction <- as.list(weight.reduction)
}
available.reductions <- lapply(X = anchorset@object.list,
FUN = FilterObjects, classes.keep = 'DimReduc')
for (ii in 1:length(weight.reduction)) {
if (ii == 1 & is.null(weight.reduction[[ii]])) next
if (!inherits(weight.reduction[[ii]], what = "DimReduc")) {
if (!weight.reduction[[ii]] %in% available.reductions[[ii]]) {
stop("Requested dimension reduction (", weight.reduction[[ii]], ") is not present in object ", ii)
}
weight.reduction[[ii]] <- anchorset@object.list[[ii]][[weight.reduction[[ii]]]]
}
}
}
if (is.null(sample.tree)) {
sample.tree <- SampleTree.STACAS(
anchorset = anchorset,
plot = FALSE
)
}
cellnames.list <- list()
for (ii in 1:nobj) {
cellnames.list[[ii]] <- colnames(anchorset@object.list[[ii]])
}
unintegrated <- suppressWarnings(expr = merge(
x = anchorset@object.list[[reference.objects[[1]]]],
y = anchorset@object.list[reference.objects[2:length(reference.objects)]]
))
names(anchorset@object.list) <- as.character(-(1:nobj))
if (!is.null(weight.reduction)) {
names(weight.reduction) <- names(anchorset@object.list)
}
if (verbose & (length(reference.objects) != nobj)) {
message("Building integrated reference")
}
for (ii in 1:nrow(sample.tree)) {
merge.pair <- as.character(sample.tree[ii, ])
length1 <- ncol(anchorset@object.list[[merge.pair[1]]])
length2 <- ncol(anchorset@object.list[[merge.pair[2]]])
if (!(preserve.order) & (length2 > length1)) {
merge.pair <- rev(merge.pair)
sample.tree[ii, ] <- as.numeric(merge.pair)
}
if (!is.null(weight.reduction)) {
# extract the correct dimreduc objects, in the correct order
weight.pair <- weight.reduction[merge.pair]
} else {
weight.pair <- NULL
}
object.1 <- DietSeurat(
object = anchorset@object.list[[merge.pair[1]]],
assays = DefaultAssay(anchorset@object.list[[merge.pair[1]]]),
counts = FALSE
)
object.2 <- DietSeurat(
object = anchorset@object.list[[merge.pair[2]]],
assays = DefaultAssay(anchorset@object.list[[merge.pair[2]]]),
counts = FALSE
)
# suppress key duplication warning
suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[DefaultAssay(object = object.1)]])
DefaultAssay(object = object.1) <- "ToIntegrate"
object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate")
suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[DefaultAssay(object = object.2)]])
DefaultAssay(object = object.2) <- "ToIntegrate"
object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate")
datasets <- ParseMergePair.local(sample.tree, ii)
if (verbose) {
message(
"Merging dataset ",
paste(datasets$object2, collapse = " "),
" into ",
paste(datasets$object1, collapse = " ")
)
}
merged.obj <- merge(x = object.1, y = object.2, merge.data = TRUE)
if (verbose) {
message("Extracting anchors for merged samples")
}
filtered.anchors <- anchors[anchors$dataset1 %in% datasets$object1 & anchors$dataset2 %in% datasets$object2, ]
if (nrow(filtered.anchors) == 0) {
stop(sprintf("Zero anchors were found between datasets %s and %s",
paste0(datasets$object1, collapse = ","),
paste0(datasets$object2, collapse = ",")))
}
#k.weight == "max" disables local rescaling of anchor weights
nanch.d2 <- length(unique(filtered.anchors$cell2))
if (k.weight == "max" | k.weight > nanch.d2) {
k.use <- nanch.d2
} else {
k.use <- k.weight
}
integrated.matrix <- Seurat:::RunIntegration(
filtered.anchors = filtered.anchors,
normalization.method = normalization.method,
reference = object.1,
query = object.2,
cellnames.list = cellnames.list,
new.assay.name = new.assay.name,
features.to.integrate = features.to.integrate,
features = features,
dims = dims,
weight.reduction = weight.reduction,
k.weight = k.use,
sd.weight = sd.weight,
eps=0,
verbose = verbose
)
integrated.matrix <- cbind(integrated.matrix, GetAssayData(object = object.1, slot = 'data')[features.to.integrate, ])
merged.obj[[new.assay.name]] <- CreateAssayObject(data = integrated.matrix)
DefaultAssay(object = merged.obj) <- new.assay.name
anchorset@object.list[[as.character(x = ii)]] <- merged.obj
anchorset@object.list[[merge.pair[[1]]]] <- NULL
anchorset@object.list[[merge.pair[[2]]]] <- NULL
invisible(x = CheckGC())
}
integrated.data <- GetAssayData(
object = anchorset@object.list[[as.character(ii)]],
assay = new.assay.name,
slot = 'data'
)
integrated.data <- integrated.data[, colnames(x = unintegrated)]
unintegrated[[new.assay.name]] <- CreateAssayObject(data = integrated.data,
scale.data = matrix(),
var.features = vector(),
meta.features = data.frame(row.names = rownames(x = integrated.data)),
misc = NULL)
# "unintegrated" now contains the integrated assay
DefaultAssay(unintegrated) <- new.assay.name
VariableFeatures(unintegrated) <- features
unintegrated <- SetIntegrationData(
object = unintegrated,
integration.name = "Integration",
slot = "anchors",
new.data = anchors
)
unintegrated <- SetIntegrationData(
object = unintegrated,
integration.name = "Integration",
slot = "sample.tree",
new.data = sample.tree
)
unintegrated[["FindIntegrationAnchors"]] <- slot(object = anchorset, name = "command")
suppressWarnings(expr = unintegrated <- LogSeuratCommand(object = unintegrated))
return(unintegrated)
}
#Modified from Seurat 4.1.1
MapQueryData.local <- function(
anchorset,
reference,
new.assay.name = "integrated",
normalization.method = "LogNormalize",
features = NULL,
features.to.integrate = NULL,
dims = 1:30,
k.weight = 100,
weight.reduction = NULL,
weights.matrix = NULL,
no.offset = FALSE,
sd.weight = 1,
preserve.order = FALSE,
eps = 0,
verbose = TRUE
) {
normalization.method <- match.arg(arg = normalization.method)
reference.datasets <- slot(object = anchorset, name = 'reference.objects')
nobj <- length(anchorset@object.list)
anchors <- slot(object = anchorset, name = 'anchors')
features <- features %||% slot(object = anchorset, name = "anchor.features")
features.to.integrate <- features.to.integrate %||% features
cellnames.list <- list()
for (ii in 1:nobj) {
cellnames.list[[ii]] <- colnames(anchorset@object.list[[ii]])
}
if (length(reference.datasets) == nobj) {
query.datasets <- NULL
} else {
query.datasets <- setdiff(seq(from=1, to=nobj), reference.datasets)
}
query.corrected <- pbapply::pblapply(
X = query.datasets,
FUN = function(this.dataset) {
if (verbose) {
message("\nIntegrating dataset ", this.dataset, " with reference dataset")
}
filtered.anchors <- anchors[anchors$dataset1 %in% reference.datasets & anchors$dataset2 == this.dataset, ]
if (nrow(filtered.anchors) == 0) {
return(NULL)
}
integrated <- Seurat:::RunIntegration(
filtered.anchors = filtered.anchors,
reference = reference,
query = anchorset@object.list[[this.dataset]],
new.assay.name = new.assay.name,
normalization.method = normalization.method,
cellnames.list = cellnames.list,
features.to.integrate = features.to.integrate,
weight.reduction = weight.reduction,
weights.matrix = weights.matrix,
no.offset = no.offset,
features = features,
dims = dims,
k.weight = k.weight,
sd.weight = sd.weight,
eps = eps,
verbose = verbose
)
return(integrated)
}
)
reference.integrated <- GetAssayData(
object = reference,
slot = 'data'
)[features.to.integrate, ]
query.corrected[[length(query.corrected) + 1]] <- reference.integrated
all.integrated <- do.call(cbind, query.corrected)
return(all.integrated)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.