#' @import Seurat
#' @import SeuratObject
#' @import ggplot2
#' @import harmony
#' @import Matrix
#'
NULL
############# infer whole genome spatial transcriptome from single cell RNAseq
#' run harmony integration
#'
#' This function calls harmony package(https://github.com/immunogenomics/harmony)
#' to integrate the scRNA-seq and spatial transcriptome data.
#'
#' @param seurat_obj merged seurat object
#' @param genes_select genes used to integration, usually targeted genes.
#' @param npc number of PC
#' @return seurat object
#'
run_harmony = function(seurat_obj, genes_select, npc = 20){
SeuratObject::VariableFeatures(seurat_obj) = genes_select
#seurat_obj = Seurat::ScaleData(seurat_obj, features = genes_select, vars.to.regress = "tech", verbose = FALSE)
seurat_obj = Seurat::ScaleData(seurat_obj, features = genes_select, verbose = FALSE)
seurat_obj = Seurat::RunPCA(seurat_obj, npcs = npc, features = genes_select, verbose = FALSE)
# harmony
seurat_obj <- harmony::RunHarmony(
object = seurat_obj,
group.by.vars = 'tech',
plot_convergence = F,
theta = 2,
lambda = 0.5,
verbose = FALSE,
max.iter.harmony = 20
)
return(seurat_obj)
}
#' stabilize expression
#'
#' This function use nearest neighbor to correct the expression.
#' This could remove some False Positive / False negative expressions.
#' This function corrects "RNA" assay "data" slot.
#'
#' @param obj seurat object
#' @param neighbor number of nearest neighbors used to correct
#' @param npcs number of principal component (PC) used to RunPCA
#' @param weight.NN the weight of cell itself
#' @param n.core number of CPU cores used to parallel
#' @return seurat object with corrected expression
#'
#' @export
#'
#'
stabilize_expr = function(obj,
neighbor = 5,
npcs = 8,
weight.NN = 0.2,
n.core = 10){
neighbor = neighbor + 1 # include cell itself
obj = Seurat::FindVariableFeatures(obj, selection.method = "vst", verbose = FALSE)
obj = Seurat::ScaleData(obj, verbose = FALSE)
obj = Seurat::RunPCA(obj, npcs = npcs, verbose = FALSE)
obj = suppressMessages(Seurat::FindNeighbors(obj, return.neighbor = T, k.param = neighbor, verbose = FALSE))
# imputation expr value by KNN
enhancer_expr_tmp = expm1(obj@assays$RNA@data)
enhancer_expr = parallel::mclapply(colnames(obj), function(cell){
cell_neighbors = Seurat::TopNeighbors(obj@neighbors$RNA.nn, cell, n=neighbor)
weight.NN * matrixStats::rowMedians(as.matrix(enhancer_expr_tmp[,cell_neighbors[-1]])) + (1-weight.NN) * enhancer_expr_tmp[,cell_neighbors[1]]
#Matrix::rowMeans(obj@assays$RNA@data[,cell_neighbors])
}, mc.cores = n.core)
enhancer_expr = do.call(cbind, enhancer_expr)
colnames(enhancer_expr) = colnames(obj)
rownames(enhancer_expr) = rownames(obj)
obj@assays$RNA@data = as(log1p(enhancer_expr), "dgCMatrix")
rm(enhancer_expr, enhancer_expr_tmp)
gc()
return(obj)
}
#' calculate correlation
#'
#'
#' @param x sparse matrix
#' @return correlation matrix
#'
#' @export
#'
sparse.cor <- function(x){
n <- nrow(x)
m <- ncol(x)
ii <- unique(x@i)+1 # rows with a non-zero element
Ex <- Matrix::colMeans(x)
nozero <- as.vector(x[ii,]) - rep(Ex,each=length(ii))
covmat <- ( Matrix::crossprod(matrix(nozero,ncol=m)) +
Matrix::crossprod(t(Ex))*(n-length(ii))
)/(n-1)
sdvec <- sqrt(diag(covmat))
covmat/Matrix::crossprod(t(sdvec))
}
#' Main function of iSpatial
#'
#' This function integrates scRNA-seq and spatial transcriptome. With limited
#' genes in merFISH/SeqFISH, iSpatial infer/enhance all genes from scRNA-seq.
#' Finally, this function generates spatially expression of all genes.
#'
#' @param spRNA seurat object of spatial transcriptome data.
#' Should contain normalized data. Run Seurat::NormalizeData or others.
#' @param scRNA seurat object of single cell RNA-seq data.
#' Should contain normalized data. Run Seurat::NormalizeData or others.
#' @param dims which dimensions to use when find the nearest neighbors
#' @param k.neighbor number of neighbors to use when infer the expression
#' @param infered.assay names of output assay in seurat object
#' @param weighted.KNN we use a dynamics weight to assign scRNA values to spRNA.
#' The dynamics weight based on the correlation between scRNA cell and spRNA
#' cell.
#' @param RNA.weight the weight of scRNA-seq expression when genes detected both
#' in spRNA and scRNA. RNA.weight should be 0 to 1.
#' The inferred expression = (1 - RNA.weight) * spRNA + RNA.weight * scRNA.
#' @param n.core number of CPU cores used to parallel.
#' @param correct.scRNA Whether to stabilize expression in scRNA.
#' @param correct.spRNA Whether to stabilize expression in spRNA.
#' @param correct.weight.NN weight of nearest neighbors to correct expr.
#' @param correct.neighbor number of nearest neighbors used to correct expr.
#'
#' @return returns a seurat object with inferred expression in infered.assay.
#'
#' @export
#'
#'
infer = function(
spRNA,
scRNA,
dims = 1:30,
k.neighbor = 30,
infered.assay = "enhanced",
weighted.KNN = TRUE,
RNA.weight = 0.5,
n.core = 8,
correct.spRNA = FALSE,
correct.scRNA = FALSE,
correct.weight.NN = 0.2,
correct.neighbor = 5
){
spRNA$tech = "spatial"
scRNA$tech = "scRNA"
if(is.null(spRNA@assays$RNA)){
stop(paste(spRNA, " do not have 'RNA' assay."))
}
if(is.null(scRNA@assays$RNA)){
stop(paste(scRNA, " do not have 'RNA' assay."))
}
if(length(spRNA@assays$RNA@data) == 0){
stop(paste(spRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(length(scRNA@assays$RNA@data) == 0){
stop(paste(scRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(correct.spRNA){
message("Stablize spatial transcriptome.")
spRNA = stabilize_expr(spRNA,
neighbor = correct.neighbor,
weight.NN = correct.weight.NN,
n.core = n.core, npcs = length(dims))
}
if(correct.scRNA){
message("Stablize single cell RNAseq.")
scRNA = stabilize_expr(scRNA,
neighbor = correct.neighbor,
weight.NN = correct.weight.NN,
n.core = n.core, npcs = length(dims))
}
genes_select = intersect(rownames(scRNA), rownames(spRNA))
if(length(genes_select) < 10){
stop("Too few intesected genes between scRNA and spRNA.")
}
# run pca
spRNA = Seurat::FindVariableFeatures(spRNA, verbose = FALSE)
SeuratObject::VariableFeatures(spRNA) = SeuratObject::VariableFeatures(spRNA)[SeuratObject::VariableFeatures(spRNA) %in% genes_select]
spRNA = Seurat::ScaleData(spRNA, verbose = FALSE)
spRNA = Seurat::RunPCA(spRNA, npcs = length(dims), verbose = FALSE)
SeuratObject::VariableFeatures(scRNA) = SeuratObject::VariableFeatures(spRNA)
scRNA = Seurat::ScaleData(scRNA, verbose = FALSE)
scRNA = Seurat::RunPCA(scRNA, npcs = length(dims), verbose = FALSE)
# merge two objects
message("1st level integration")
anchors <- Seurat::FindIntegrationAnchors(object.list = list(spRNA, scRNA),
normalization.method = "LogNormalize",
reduction = "rpca",
anchor.features = SeuratObject::VariableFeatures(spRNA),
k.anchor = 20,
dims = dims,
verbose = FALSE)
integrated <- Seurat::IntegrateData(anchorset = anchors,
dims = dims,
normalization.method = "LogNormalize",
verbose = FALSE)
rm(anchors)
gc()
# count level normalization
message("normalization")
norm_data = expm1(integrated@assays$RNA@data)
# trim high expression gene, which affect the mean of genes
trim_quantil1 = Matrix::rowMeans(scRNA@assays$RNA@data[genes_select, ])
trim_quantil1 = trim_quantil1[trim_quantil1 < quantile(trim_quantil1, prob = 0.98) ]
trim_quantil2 = Matrix::rowMeans(spRNA@assays$RNA@data[genes_select, ])
trim_quantil2 = trim_quantil2[trim_quantil2 < quantile(trim_quantil2, prob = 0.98) ]
trim_genes = unique(c(names(trim_quantil1), names(trim_quantil2)))
# normalize factor based on trimmed genes
norm_factor = Matrix::colMeans(norm_data[trim_genes, ])
norm_factor = as.numeric(norm_factor)
norm_factor = norm_factor/mean(norm_factor)
norm_data@x <- norm_data@x / rep.int(norm_factor, diff(norm_data@p))
integrated@assays$RNA@data = log1p(norm_data)
# remove cell with number of expressed gene < 98%
integrated = integrated[, norm_factor != 0]
spRNA_images = suppressWarnings(spRNA@images)
rm(norm_data, norm_factor, scRNA, spRNA)
gc()
# check normalization
#avg_expr = Seurat::AverageExpression(integrated, group.by="tech", slot="data")
#boxplot(log1p(avg_expr$RNA[genes_select, ]))
# 2nd level integration
message("2nd level integration")
#SeuratObject::VariableFeatures(integrated, assay = "integrated") = genes_select
integrated = Seurat::ScaleData(integrated, #vars.to.regress = "tech", #features = genes_select,
assay = "integrated",
verbose = FALSE)
integrated = Seurat::RunPCA(integrated,
npcs = length(dims),
assay = "integrated",
verbose = FALSE)
integrated <- suppressWarnings( harmony::RunHarmony(
object = integrated,
group.by.vars = 'tech',
plot_convergence = F,
theta = 3,
lambda = 0.5,
assay.use = "integrated",
verbose = FALSE,
max.iter.harmony = 20
))
# find neighbors
integrated = Seurat::FindNeighbors(integrated,
k.param = k.neighbor,
reduction="harmony",
dims=dims,
return.neighbor = T,
assay = "integrated")
neigbors = suppressWarnings(sapply(colnames(subset(integrated, subset = tech == "scRNA", invert = TRUE)), function(i){
Seurat::TopNeighbors(integrated@neighbors$integrated.nn, i, n = k.neighbor)
}))
neigbors = as.data.frame(neigbors)
DefaultAssay(integrated) <- "RNA"
integrated = suppressWarnings(Seurat::DietSeurat(integrated, counts = F, assays = "RNA"))
integrated_scRNA = suppressWarnings(subset(integrated, subset = tech == "scRNA"))
integrated_merFISH = suppressWarnings(subset(integrated, subset = tech == "scRNA", invert = TRUE))
cells_name = colnames(integrated_scRNA)
neigbors = as.list(neigbors)
# only select neighbor in scRNA data
neigbors = lapply(neigbors, function(x){
x = x[x %in% cells_name]
})
enhancer_expr = integrated_merFISH@assays$RNA@data
# infer expression via scRNA
message("infer expression.")
if(weighted.KNN){
# dynamics proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
integrated@assays$RNA@data[, cell]
}else{
cor_dist = sparse.cor(integrated@assays$RNA@data[genes_select, c(cell, cell_neighbors)])[,1]
cor_dist[is.na(cor_dist)] <- 0
cor_dist[cor_dist < 0] <- 0
cor_dist = cor_dist ** 2
# normalized correlation distance matrix
cor_dist = cor_dist / sum(cor_dist)
cor_dist = c((1-RNA.weight) * cor_dist[1], RNA.weight * cor_dist[-1]) # cor_dist[1] is the cell in spRNA, here = 1
# inner produce
infer_expr = integrated@assays$RNA@data[, c(cell, cell_neighbors)] %*% cor_dist
infer_expr[,1]
}
}, mc.cores = n.core)
}else{
# constant proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
enhancer_expr[, cell]
}else if (length(cell_neighbors) == 1){
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * integrated_scRNA@assays$RNA@data[,cell_neighbors]
}else{
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * Matrix::rowMeans(integrated_scRNA@assays$RNA@data[,cell_neighbors])
}
}, mc.cores = n.core)
}
enhancer_expr = do.call(cbind, enhancer_expr)
colnames(enhancer_expr) = colnames(integrated_merFISH)
# assign inferred expression value to new assay
integrated_merFISH[[infered.assay]] = SeuratObject::CreateAssayObject(data = as(enhancer_expr, "dgCMatrix"))
DefaultAssay(integrated_merFISH) <- infered.assay
integrated_merFISH = Seurat::DietSeurat(integrated_merFISH, assays = infered.assay)
# add spatial information
integrated_merFISH@images = spRNA_images
for(img in names(integrated_merFISH@images)){
integrated_merFISH@images[[img]]@coordinates <- integrated_merFISH@images[[img]]@coordinates[
rownames(integrated_merFISH@images[[img]]@coordinates) %in% colnames(integrated_merFISH), ]
}
return(integrated_merFISH)
}
#' Main function of iSpatial
#'
#' This function integrates scRNA-seq and spatial transcriptome. With limited
#' genes in merFISH/SeqFISH, iSpatial infer/enhance all genes from scRNA-seq.
#' Finally, this function generates spatially expression of all genes.
#'
#' @param spRNA seurat object of spatial transcriptome data.
#' Should contain normalized data. Run Seurat::NormalizeData or others.
#' @param scRNA seurat object of single cell RNA-seq data.
#' Should contain normalized data. Run Seurat::NormalizeData or others.
#' @param dims which dimensions to use when find the nearest neighbors
#' @param k.neighbor number of neighbors to use when infer the expression
#' @param infered.assay names of output assay in seurat object
#' @param weighted.KNN we use a dynamics weight to assign scRNA values to spRNA.
#' The dynamics weight based on the correlation between scRNA cell and spRNA
#' cell.
#' @param RNA.weight the weight of scRNA-seq expression when genes detected both
#' in spRNA and scRNA. RNA.weight should be 0 to 1.
#' The inferred expression = (1 - RNA.weight) * spRNA + RNA.weight * scRNA.
#' @param n.core number of CPU cores used to parallel.
#' @param correct.scRNA Whether to stabilize expression in scRNA.
#' @param correct.spRNA Whether to stabilize expression in spRNA.
#' @param correct.neighbor number of nearest neighbors used to correct expr.
#'
#' @return returns a seurat object with inferred expression in infered.assay.
#'
#' @export
#'
infer_v2 = function(
spRNA,
scRNA,
dims = 1:30,
k.neighbor = 30,
infered.assay = "enhanced",
weighted.KNN = TRUE,
RNA.weight = 0.5,
n.core = 8,
correct.spRNA = FALSE,
correct.scRNA = FALSE,
correct.neighbor = 5
){
spRNA$tech = "spatial"
scRNA$tech = "scRNA"
if(is.null(spRNA@assays$RNA)){
stop(paste(spRNA, " do not have 'RNA' assay."))
}
if(is.null(scRNA@assays$RNA)){
stop(paste(scRNA, " do not have 'RNA' assay."))
}
if(length(spRNA@assays$RNA@data) == 0){
stop(paste(spRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(length(scRNA@assays$RNA@data) == 0){
stop(paste(scRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(correct.spRNA){
message("Stablize spatial transcriptome.")
spRNA = stabilize_expr(spRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
if(correct.scRNA){
message("Stablize single cell RNAseq.")
scRNA = stabilize_expr(scRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
genes_select = intersect(rownames(scRNA), rownames(spRNA))
if(length(genes_select) < 10){
stop("Too few intesected genes between scRNA and spRNA.")
}
#
# run pca
spRNA = Seurat::FindVariableFeatures(spRNA, verbose = FALSE)
SeuratObject::VariableFeatures(spRNA) = SeuratObject::VariableFeatures(spRNA)[SeuratObject::VariableFeatures(spRNA) %in% genes_select]
spRNA = Seurat::ScaleData(spRNA, scale.max = 10, verbose = FALSE)
spRNA = Seurat::RunPCA(spRNA, npcs = length(dims), verbose = FALSE)
SeuratObject::VariableFeatures(scRNA) = SeuratObject::VariableFeatures(spRNA)
scRNA = Seurat::ScaleData(scRNA, scale.max = 10, verbose = FALSE)
scRNA = Seurat::RunPCA(scRNA, npcs = length(dims), verbose = FALSE)
spRNA_images = suppressWarnings(spRNA@images)
# merge two objects
message("1st level integration")
anchors <- Seurat::FindIntegrationAnchors(object.list = list(spRNA, scRNA),
normalization.method = "LogNormalize",
reduction = "rpca",
anchor.features = SeuratObject::VariableFeatures(spRNA),
dims = dims,
#scale = F,
k.anchor = 20,
verbose = FALSE)
integrated <- Seurat::IntegrateData(anchorset = anchors,
dims = dims,
normalization.method = "LogNormalize",
verbose = FALSE)
rm(scRNA, spRNA, anchors)
gc()
# 2nd level integration
message("2nd level integration")
#SeuratObject::VariableFeatures(integrated, assay = "integrated") = genes_select
integrated = Seurat::ScaleData(integrated, #vars.to.regress = "tech", features = genes_select,
assay = "integrated",
scale.max = 10,
verbose = FALSE)
integrated = Seurat::RunPCA(integrated,
npcs = length(dims),
assay = "integrated",
verbose = FALSE)
integrated <- suppressWarnings( harmony::RunHarmony(
object = integrated,
group.by.vars = 'tech',
plot_convergence = F,
theta = 2,
lambda = 0.5,
assay.use = "integrated",
verbose = FALSE,
max.iter.harmony = 20
))
integrated_scRNA = suppressWarnings(subset(integrated, subset = tech == "scRNA"))
integrated_merFISH = suppressWarnings(subset(integrated, subset = tech == "scRNA", invert = TRUE))
cells_name = colnames(integrated_scRNA)
NN <- Seurat:::FindNN(
object = integrated,
cells1 = colnames(integrated_merFISH),
cells2 = colnames(integrated_scRNA),
internal.neighbors = NULL,
dims = dims,
reduction = "harmony",
nn.reduction = "harmony",
k = k.neighbor,
verbose = TRUE
)
neighbors <- Seurat::GetIntegrationData(object = NN, integration.name = "integrated", slot = 'neighbors')
neighbors = as.data.frame(t(neighbors[['nnab']]@nn.idx))
colnames(neighbors) = colnames(integrated_merFISH)
neighbors = as.list(neighbors)
# only select neighbor in scRNA data
neigbors = lapply(neighbors, function(x){
x = cells_name[x]
})
DefaultAssay(integrated) <- "RNA"
integrated = suppressWarnings(Seurat::DietSeurat(integrated, counts = F, assays = "RNA"))
enhancer_expr = integrated_merFISH@assays$RNA@data
# infer expression via scRNA
message("infer expression.")
if(weighted.KNN){
# dynamics proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
integrated@assays$RNA@data[, cell]
}else{
cor_dist = sparse.cor(integrated@assays$RNA@data[genes_select, c(cell, cell_neighbors)])[,1]
#cor_dist = cor(t(integrated@reductions$harmony@cell.embeddings[c(cell, cell_neighbors), ]))[,1]
cor_dist[is.na(cor_dist)] <- 0
cor_dist[cor_dist < 0] <- 0
cor_dist = cor_dist ** 3 #
# normalized correlation distance matrix
cor_dist = cor_dist / sum(cor_dist)
cor_dist = c((1-RNA.weight) * cor_dist[1], RNA.weight * cor_dist[-1]) # cor_dist[1] is the cell in spRNA, here = 1
# inner produce
infer_expr = integrated@assays$RNA@data[, c(cell, cell_neighbors)] %*% cor_dist
infer_expr[,1]
}
}, mc.cores = n.core)
}else{
# constant proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
enhancer_expr[, cell]
}else if (length(cell_neighbors) == 1){
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * (integrated_scRNA@assays$RNA@data[,cell_neighbors])
}else{
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * Matrix::rowMeans(integrated_scRNA@assays$RNA@data[,cell_neighbors])
}
}, mc.cores = n.core)
}
enhancer_expr = do.call(cbind, enhancer_expr)
colnames(enhancer_expr) = colnames(integrated_merFISH)
# assign inferred expression value to new assay
integrated_merFISH[[infered.assay]] = SeuratObject::CreateAssayObject(data = as(enhancer_expr, "dgCMatrix"))
DefaultAssay(integrated_merFISH) <- infered.assay
integrated_merFISH = Seurat::DietSeurat(integrated_merFISH, assays = infered.assay)
# add spatial information
integrated_merFISH@images = spRNA_images
for(img in names(integrated_merFISH@images)){
integrated_merFISH@images[[img]]@coordinates <- integrated_merFISH@images[[img]]@coordinates[
rownames(integrated_merFISH@images[[img]]@coordinates) %in% colnames(integrated_merFISH), ]
}
return(integrated_merFISH)
}
#
#' function of iSpatial
#'
#' This function integrates scRNA-seq and spatial transcriptome. With limited
#' genes in merFISH/SeqFISH, iSpatial infer/enhance all genes from scRNA-seq.
#' Finally, this function generates spatially expression of all genes.
#'
#' @param spRNA seurat object of spatial transcriptome data
#' @param scRNA seurat object of single cell RNA-seq data
#' @param dims which dimensions to use when find the nearest neighbors
#' @param k.neighbor number of neighbors to use when infer the expression
#' @param infered.assay names of output assay in seurat object
#' @param weighted.KNN we use a dynamics weight to assign scRNA values to spRNA.
#' The dynamics weight based on the correlation between scRNA cell and spRNA
#' cell.
#' @param RNA.weight the weight of scRNA-seq expression when genes detected both
#' in spRNA and scRNA. RNA.weight should be 0 to 1.
#' The inferred expression = (1 - RNA.weight) * spRNA + RNA.weight * scRNA.
#' @param n.core number of CPU cores used to parallel.
#' @param correct.scRNA Whether to stabilize expression in scRNA.
#' @param correct.spRNA Whether to stabilize expression in spRNA.
#' @param correct.neighbor number of nearest neighbors used to correct expr.
#'
#' @return returns a seurat object with inferred expression in infered.assay.
#'
#' @export
#'
infer_harmony = function(
spRNA,
scRNA,
dims = 1:30,
k.neighbor = 30,
infered.assay = "enhanced",
weighted.KNN = TRUE,
RNA.weight = 0.5,
n.core = 10,
correct.spRNA = FALSE,
correct.scRNA = FALSE,
correct.neighbor = 3
){
spRNA$tech = "spatial"
scRNA$tech = "scRNA"
if(is.null(spRNA@assays$RNA)){
stop(paste(spRNA, " do not have 'RNA' assay."))
}
if(is.null(scRNA@assays$RNA)){
stop(paste(scRNA, " do not have 'RNA' assay."))
}
if(length(spRNA@assays$RNA@data) == 0){
stop(paste(spRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(length(scRNA@assays$RNA@data) == 0){
stop(paste(scRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(correct.spRNA){
message("Stablize spatial transcriptome.")
spRNA = stabilize_expr(spRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
if(correct.scRNA){
message("Stablize single cell RNAseq.")
scRNA = stabilize_expr(scRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
genes_select = intersect(rownames(scRNA), rownames(spRNA))
if(length(genes_select) < 10){
stop("Too few intesected genes between scRNA and spRNA.")
}
# merge two objects
message("integrating.")
integrated = merge(scRNA, spRNA)
# count level normalization
message("normalization.")
norm_data = expm1(integrated@assays$RNA@data)
# trim high expression gene, which affect the mean of genes
trim_quantil1 = Matrix::rowMeans(scRNA@assays$RNA@data[genes_select, ])
trim_quantil1 = trim_quantil1[trim_quantil1 < quantile(trim_quantil1, prob = 0.95) ]
trim_quantil2 = Matrix::rowMeans(spRNA@assays$RNA@data[genes_select, ])
trim_quantil2 = trim_quantil2[trim_quantil2 < quantile(trim_quantil2, prob = 0.95) ]
trim_genes = unique(c(names(trim_quantil1), names(trim_quantil2)))
# normalize factor based on trimmed genes
norm_factor = Matrix::colMeans(norm_data[trim_genes, ])
norm_factor = as.numeric(norm_factor)
norm_factor = norm_factor/mean(norm_factor)
norm_data@x <- norm_data@x / rep.int(norm_factor, diff(norm_data@p))
integrated@assays$RNA@data = log1p(norm_data)
# remove cell with number of expressed gene < 95%
integrated = integrated[, norm_factor != 0]
rm(norm_data, norm_factor)
gc()
# check normalization
#avg_expr = Seurat::AverageExpression(integrated, group.by="tech", slot="data")
#boxplot(log1p(avg_expr$RNA[genes_select, ]))
# global level integration
integrated = suppressWarnings(run_harmony(integrated, genes_select, npc = length(dims)))
# find neighbors
integrated = Seurat::FindNeighbors(integrated, k.param = k.neighbor, reduction="harmony", dims=dims, return.neighbor = T)
neigbors = sapply(colnames(subset(integrated, subset = tech == "scRNA", invert = TRUE)), function(i){
Seurat::TopNeighbors(integrated@neighbors$RNA.nn, i, n = k.neighbor)
})
neigbors = as.data.frame(neigbors)
integrated_scRNA = subset(integrated, subset = tech == "scRNA")
integrated_merFISH = subset(integrated, subset = tech == "scRNA", invert = TRUE)
cells_name = colnames(integrated_scRNA)
neigbors = as.list(neigbors)
# only select neighbor in scRNA data
neigbors = lapply(neigbors, function(x){
x = x[x %in% cells_name]
})
enhancer_expr = integrated_merFISH@assays$RNA@data
# infer expression via scRNA
message("infer expression.")
if(weighted.KNN){
# dynamics proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
integrated@assays$RNA@data[, cell]
}else{
cor_dist = sparse.cor(integrated@assays$RNA@data[genes_select, c(cell, cell_neighbors)])[,1]
cor_dist[is.na(cor_dist)] <- 0
cor_dist[cor_dist < 0] <- 0
cor_dist = cor_dist ** 2
# normalized correlation distance matrix
cor_dist = cor_dist / sum(cor_dist)
cor_dist = c((1-RNA.weight) * cor_dist[1], RNA.weight * cor_dist[-1]) # cor_dist[1] is the cell in spRNA, here = 1
# inner produce
infer_expr = integrated@assays$RNA@data[, c(cell, cell_neighbors)] %*% cor_dist
infer_expr[,1]
}
}, mc.cores = n.core)
}else{
# constant proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
enhancer_expr[, cell]
}else if (length(cell_neighbors) == 1){
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * integrated_scRNA@assays$RNA@data[,cell_neighbors]
}else{
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * Matrix::rowMeans(integrated_scRNA@assays$RNA@data[,cell_neighbors])
}
}, mc.cores = n.core)
}
enhancer_expr = do.call(cbind, enhancer_expr)
colnames(enhancer_expr) = colnames(integrated_merFISH)
# assign inferred expression value to new assay
integrated_merFISH[[infered.assay]] = SeuratObject::CreateAssayObject(data = as(enhancer_expr, "dgCMatrix"))
DefaultAssay(integrated_merFISH) <- infered.assay
# add spatial information
integrated_merFISH@images = spRNA@images
for(img in names(integrated_merFISH@images)){
integrated_merFISH@images[[img]]@coordinates <- integrated_merFISH@images[[img]]@coordinates[
rownames(integrated_merFISH@images[[img]]@coordinates) %in% colnames(integrated_merFISH), ]
}
return(integrated_merFISH)
}
#' function of iSpatial
#' integrated used rPCA
#'
#' This function integrates scRNA-seq and spatial transcriptome. With limited
#' genes in merFISH/SeqFISH, iSpatial infer/enhance all genes from scRNA-seq.
#' Finally, this function generates spatially expression of all genes.
#'
#' @param spRNA seurat object of spatial transcriptome data
#' @param scRNA seurat object of single cell RNA-seq data
#' @param dims which dimensions to use when find the nearest neighbors
#' @param k.neighbor number of neighbors to use when infer the expression
#' @param infered.assay names of output assay in seurat object
#' @param weighted.KNN we use a dynamics weight to assign scRNA values to spRNA.
#' The dynamics weight based on the correlation between scRNA cell and spRNA
#' cell.
#' @param RNA.weight the weight of scRNA-seq expression when genes detected both
#' in spRNA and scRNA. RNA.weight should be 0 to 1.
#' The inferred expression = (1 - RNA.weight) * spRNA + RNA.weight * scRNA.
#' @param n.core number of CPU cores used to parallel.
#' @param correct.scRNA Whether to stabilize expression in scRNA.
#' @param correct.spRNA Whether to stabilize expression in spRNA.
#' @param correct.neighbor number of nearest neighbors used to correct expr.
#'
#' @return returns a seurat object with inferred expression in infered.assay.
#'
#' @export
#'
infer_rPCA = function(
spRNA,
scRNA,
dims = 1:30,
k.neighbor = 30,
infered.assay = "enhanced",
weighted.KNN = TRUE,
RNA.weight = 0.5,
n.core = 10,
correct.spRNA = FALSE,
correct.scRNA = FALSE,
correct.neighbor = 3
){
spRNA$tech = "spatial"
scRNA$tech = "scRNA"
if(is.null(spRNA@assays$RNA)){
stop(paste(spRNA, " do not have 'RNA' assay."))
}
if(is.null(scRNA@assays$RNA)){
stop(paste(scRNA, " do not have 'RNA' assay."))
}
if(length(spRNA@assays$RNA@data) == 0){
stop(paste(spRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(length(scRNA@assays$RNA@data) == 0){
stop(paste(scRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(correct.spRNA){
message("Stablize spatial transcriptome.")
spRNA = stabilize_expr(spRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
if(correct.scRNA){
message("Stablize single cell RNAseq.")
scRNA = stabilize_expr(scRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
genes_select = intersect(rownames(scRNA), rownames(spRNA))
if(length(genes_select) < 10){
stop("Too few intesected genes between scRNA and spRNA.")
}
# run pca
spRNA = Seurat::FindVariableFeatures(spRNA, verbose = FALSE)
SeuratObject::VariableFeatures(spRNA) = SeuratObject::VariableFeatures(spRNA)[SeuratObject::VariableFeatures(spRNA) %in% genes_select]
spRNA = Seurat::ScaleData(spRNA, verbose = FALSE)
spRNA = Seurat::RunPCA(spRNA, npcs = length(dims), verbose = FALSE)
SeuratObject::VariableFeatures(scRNA) = SeuratObject::VariableFeatures(spRNA)
scRNA = Seurat::ScaleData(scRNA, verbose = FALSE)
scRNA = Seurat::RunPCA(scRNA, npcs = length(dims), verbose = FALSE)
# merge two objects
message("1st level integration")
anchors <- Seurat::FindIntegrationAnchors(object.list = list(spRNA, scRNA),
normalization.method = "LogNormalize",
reduction = "rpca",
anchor.features = SeuratObject::VariableFeatures(spRNA),
dims = dims,
k.anchor = 20,
verbose = FALSE)
integrated <- Seurat::IntegrateData(anchorset = anchors,
dims = dims,
normalization.method = "LogNormalize",
verbose = TRUE)
rm(anchors)
gc()
# count level normalization
message("normalization")
norm_data = expm1(integrated@assays$RNA@data)
# trim high expression gene, which affect the mean of genes
trim_quantil1 = Matrix::rowMeans(scRNA@assays$RNA@data[genes_select, ])
trim_quantil1 = trim_quantil1[trim_quantil1 < quantile(trim_quantil1, prob = 0.98) ]
trim_quantil2 = Matrix::rowMeans(spRNA@assays$RNA@data[genes_select, ])
trim_quantil2 = trim_quantil2[trim_quantil2 < quantile(trim_quantil2, prob = 0.98) ]
trim_genes = unique(c(names(trim_quantil1), names(trim_quantil2)))
# normalize factor based on trimmed genes
norm_factor = Matrix::colMeans(norm_data[trim_genes, ])
norm_factor = as.numeric(norm_factor)
norm_factor = norm_factor/mean(norm_factor)
norm_data@x <- norm_data@x / rep.int(norm_factor, diff(norm_data@p))
integrated@assays$RNA@data = log1p(norm_data)
# remove cell with number of expressed gene < 98%
integrated = integrated[, norm_factor != 0]
spRNA_images = suppressWarnings(spRNA@images)
rm(norm_data, norm_factor, scRNA, spRNA)
gc()
# check normalization
#avg_expr = Seurat::AverageExpression(integrated, group.by="tech", slot="data")
#boxplot(log1p(avg_expr$RNA[genes_select, ]))
integrated = Seurat::ScaleData(integrated, #vars.to.regress = "tech", #features = genes_select,
assay = "integrated",
verbose = FALSE)
integrated = Seurat::RunPCA(integrated,
npcs = length(dims),
assay = "integrated",
verbose = FALSE)
# find neighbors
integrated = Seurat::FindNeighbors(integrated,
k.param = k.neighbor,
reduction="pca",
dims=dims,
return.neighbor = T,
assay = "integrated")
neigbors = suppressWarnings(sapply(colnames(subset(integrated, subset = tech == "scRNA", invert = TRUE)), function(i){
Seurat::TopNeighbors(integrated@neighbors$integrated.nn, i, n = k.neighbor)
}))
neigbors = as.data.frame(neigbors)
DefaultAssay(integrated) <- "RNA"
integrated = suppressWarnings(Seurat::DietSeurat(integrated, counts = F, assays = "RNA"))
integrated_scRNA = suppressWarnings(subset(integrated, subset = tech == "scRNA"))
integrated_merFISH = suppressWarnings(subset(integrated, subset = tech == "scRNA", invert = TRUE))
cells_name = colnames(integrated_scRNA)
neigbors = as.list(neigbors)
# only select neighbor in scRNA data
neigbors = lapply(neigbors, function(x){
x = x[x %in% cells_name]
})
enhancer_expr = integrated_merFISH@assays$RNA@data
# infer expression via scRNA
message("infer expression.")
if(weighted.KNN){
# dynamics proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
integrated@assays$RNA@data[, cell]
}else{
cor_dist = sparse.cor(integrated@assays$RNA@data[genes_select, c(cell, cell_neighbors)])[,1]
cor_dist[is.na(cor_dist)] <- 0
cor_dist[cor_dist < 0] <- 0
cor_dist = cor_dist ** 2
# normalized correlation distance matrix
cor_dist = cor_dist / sum(cor_dist)
cor_dist = c((1-RNA.weight) * cor_dist[1], RNA.weight * cor_dist[-1]) # cor_dist[1] is the cell in spRNA, here = 1
# inner produce
infer_expr = integrated@assays$RNA@data[, c(cell, cell_neighbors)] %*% cor_dist
infer_expr[,1]
}
}, mc.cores = n.core)
}else{
# constant proportion to assign scRNA values to merFISH
enhancer_expr = parallel::mclapply(colnames(enhancer_expr), function(cell){
cell_neighbors = neigbors[[cell]]
if (length(cell_neighbors) == 0){
enhancer_expr[, cell]
}else if (length(cell_neighbors) == 1){
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * integrated_scRNA@assays$RNA@data[,cell_neighbors]
}else{
(1-RNA.weight) * enhancer_expr[, cell] + RNA.weight * Matrix::rowMeans(integrated_scRNA@assays$RNA@data[,cell_neighbors])
}
}, mc.cores = n.core)
}
enhancer_expr = do.call(cbind, enhancer_expr)
colnames(enhancer_expr) = colnames(integrated_merFISH)
# assign inferred expression value to new assay
integrated_merFISH[[infered.assay]] = SeuratObject::CreateAssayObject(data = as(enhancer_expr, "dgCMatrix"))
DefaultAssay(integrated_merFISH) <- infered.assay
integrated_merFISH = Seurat::DietSeurat(integrated_merFISH, assays = infered.assay)
# add spatial information
integrated_merFISH@images = spRNA_images
for(img in names(integrated_merFISH@images)){
integrated_merFISH@images[[img]]@coordinates <- integrated_merFISH@images[[img]]@coordinates[
rownames(integrated_merFISH@images[[img]]@coordinates) %in% colnames(integrated_merFISH), ]
}
return(integrated_merFISH)
}
#####
#' function of iSpatial
#'
#' recommand optimized K value for KNN
#'
#'
#' @param spRNA seurat object of spatial transcriptome data
#' @param scRNA seurat object of single cell RNA-seq data
#' @param dims which dimensions to use when find the nearest neighbors
#' @param k.neighbor a list of K to test
#' @param infered.assay names of output assay in seurat object
#' @param weighted.KNN we use a dynamics weight to assign scRNA values to spRNA.
#' The dynamics weight based on the correlation between scRNA cell and spRNA
#' cell.
#' @param RNA.weight the weight of scRNA-seq expression when genes detected both
#' in spRNA and scRNA. RNA.weight should be 0 to 1.
#' The inferred expression = (1 - RNA.weight) * spRNA + RNA.weight * scRNA.
#' @param n.core number of CPU cores used to parallel.
#' @param correct.scRNA Whether to stabilize expression in scRNA.
#' @param correct.spRNA Whether to stabilize expression in spRNA.
#' @param correct.neighbor number of nearest neighbors used to correct expr.
#'
#' @return returns a ggplot2 object.
#'
#' @export
#'
recommend_k = function( spRNA,
scRNA,
dims = 1:30,
k.neighbor = c(5, seq(10,80,by=10)),
infered.assay = "enhanced",
n.core = 8,
correct.spRNA = FALSE,
correct.scRNA = FALSE,
correct.weight.NN = 0.2,
correct.neighbor = 5){
obj_iSpatial = integrate_iSpatial( spRNA, scRNA,
dims = dims,
k.neighbor = max(k.neighbor),
infered.assay = "enhanced",
n.core = n.core,
correct.spRNA = correct.spRNA,
correct.scRNA = correct.scRNA,
correct.weight.NN = correct.weight.NN,
correct.neighbor = correct.neighbor)
obj_iSpatial = Seurat::FindNeighbors(obj_iSpatial,
k.param = max(k.neighbor),
reduction = "harmony",
dims = max(dims),
return.neighbor = T,
assay = "integrated")
neigbors = suppressWarnings(lapply(colnames(subset(obj_iSpatial, subset = tech == "spatial")), function(i){
neb = Seurat::TopNeighbors(obj_iSpatial@neighbors$integrated.nn, i, n = max(k.neighbor))
obj_iSpatial$tech[neb]
}))
neigbors = do.call(cbind, neigbors)
neigbors = neigbors == "scRNA"
KNN_K = lapply(k.neighbor, function(k){
neigbors_counts_iSpatial = colSums(neigbors[1:k,])
neigbors_counts = sum(neigbors_counts_iSpatial != 0)
neigbors_counts = neigbors_counts/length(neigbors_counts_iSpatial)
return(c(k, neigbors_counts))
})
KNN_K = do.call(cbind, KNN_K)
KNN_K = as.data.frame(t(KNN_K))
colnames(KNN_K) = c("K", "Percent")
ggplot2::ggplot(KNN_K, aes(K, Percent)) +
geom_point() +
theme_classic(base_size = 15) +
ylab("Percent of cells in ST, \nwith K-neighbors from scRNAseq")
}
#' 2 round intergrated in iSpatial
#'
#' This function integrates scRNA-seq and spatial transcriptome. With limited
#' genes in merFISH/SeqFISH, iSpatial infer/enhance all genes from scRNA-seq.
#' Finally, this function generates spatially expression of all genes.
#'
#' @param spRNA seurat object of spatial transcriptome data.
#' Should contain normalized data. Run Seurat::NormalizeData or others.
#' @param scRNA seurat object of single cell RNA-seq data.
#' Should contain normalized data. Run Seurat::NormalizeData or others.
#' @param dims which dimensions to use when find the nearest neighbors
#' @param k.neighbor number of neighbors to use when infer the expression
#' @param infered.assay names of output assay in seurat object
#' @param weighted.KNN we use a dynamics weight to assign scRNA values to spRNA.
#' The dynamics weight based on the correlation between scRNA cell and spRNA
#' cell.
#' @param RNA.weight the weight of scRNA-seq expression when genes detected both
#' in spRNA and scRNA. RNA.weight should be 0 to 1.
#' The inferred expression = (1 - RNA.weight) * spRNA + RNA.weight * scRNA.
#' @param n.core number of CPU cores used to parallel.
#' @param correct.scRNA Whether to stabilize expression in scRNA.
#' @param correct.spRNA Whether to stabilize expression in spRNA.
#' @param correct.neighbor number of nearest neighbors used to correct expr.
#'
#' @return returns a seurat object with inferred expression in infered.assay.
#'
#' @export
#'
intergrate = function(
spRNA,
scRNA,
dims = 1:30,
k.neighbor = 30,
infered.assay = "enhanced",
weighted.KNN = TRUE,
RNA.weight = 0.5,
n.core = 8,
correct.spRNA = FALSE,
correct.scRNA = FALSE,
correct.neighbor = 5
){
spRNA$tech = "spatial"
scRNA$tech = "scRNA"
if(is.null(spRNA@assays$RNA)){
stop(paste(spRNA, " do not have 'RNA' assay."))
}
if(is.null(scRNA@assays$RNA)){
stop(paste(scRNA, " do not have 'RNA' assay."))
}
if(length(spRNA@assays$RNA@data) == 0){
stop(paste(spRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(length(scRNA@assays$RNA@data) == 0){
stop(paste(scRNA, " is not normlized. Run Seurat::NormalizeData."))
}
if(correct.spRNA){
message("Stablize spatial transcriptome.")
spRNA = stabilize_expr(spRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
if(correct.scRNA){
message("Stablize single cell RNAseq.")
scRNA = stabilize_expr(scRNA, neighbor = correct.neighbor, n.core = n.core, npcs = length(dims))
}
genes_select = intersect(rownames(scRNA), rownames(spRNA))
if(length(genes_select) < 10){
stop("Too few intesected genes between scRNA and spRNA.")
}
#
# run pca
spRNA = Seurat::FindVariableFeatures(spRNA, verbose = FALSE)
SeuratObject::VariableFeatures(spRNA) = SeuratObject::VariableFeatures(spRNA)[SeuratObject::VariableFeatures(spRNA) %in% genes_select]
spRNA = Seurat::ScaleData(spRNA, scale.max = 10, verbose = FALSE)
spRNA = Seurat::RunPCA(spRNA, npcs = length(dims), verbose = FALSE)
SeuratObject::VariableFeatures(scRNA) = SeuratObject::VariableFeatures(spRNA)
scRNA = Seurat::ScaleData(scRNA, scale.max = 10, verbose = FALSE)
scRNA = Seurat::RunPCA(scRNA, npcs = length(dims), verbose = FALSE)
spRNA_images = suppressWarnings(spRNA@images)
# merge two objects
message("1st level integration")
anchors <- Seurat::FindIntegrationAnchors(object.list = list(spRNA, scRNA),
normalization.method = "LogNormalize",
reduction = "rpca",
anchor.features = SeuratObject::VariableFeatures(spRNA),
dims = dims,
#scale = F,
k.anchor = 20,
verbose = FALSE)
integrated <- Seurat::IntegrateData(anchorset = anchors,
dims = dims,
normalization.method = "LogNormalize",
verbose = FALSE)
rm(scRNA, spRNA, anchors)
gc()
# 2nd level integration
message("2nd level integration")
#SeuratObject::VariableFeatures(integrated, assay = "integrated") = genes_select
integrated = Seurat::ScaleData(integrated, #vars.to.regress = "tech", features = genes_select,
assay = "integrated",
scale.max = 10,
verbose = FALSE)
integrated = Seurat::RunPCA(integrated,
npcs = length(dims),
assay = "integrated",
verbose = FALSE)
integrated <- suppressWarnings( harmony::RunHarmony(
object = integrated,
group.by.vars = 'tech',
plot_convergence = F,
theta = 2,
lambda = 0.5,
assay.use = "integrated",
verbose = FALSE,
max.iter.harmony = 20
))
return(integrated)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.