R/utils.R

Defines functions .get_tf_path .random_walk .get_tf_res .get_tf_res_doParallel .get_score .generate_tf_res .generate_tf_gene_all .co_exp_batch .generate_ggi_res .lr_distance .lr_distance_doParallel .co_exp_p .co_exp .get_cellpair .get_st_data .get_st_meta .generate_newmeta_spot .generate_newmeta_cell .generate_newmeta_doParallel .det_neighbor .get_weight2 .get_weight1 .generate_newmeta .determine_celltype .st_dist .coef_nor .hvg .normalize_data .generate_ref .run_cell2location .run_stereoscope .run_deconvSeq .run_spotlight .run_seurat .run_rctd .percent_cell .show_warning .rename_chr

.rename_chr <- function(x){
    x <- strsplit(x, split = " ")
    x_new <- NULL
    for (i in 1:length(x)) {
        x1 <- x[[i]]
        if (length(x1) > 1) {
            x2 <- x1[1]
            for (j in 2:length(x1)) {
                x2 <- paste(x2, x1[j], sep = "_")
            }
            x1 <- x2
        }
        x_new <- c(x_new, x1)
    }
    x <- strsplit(x_new, split = "-")
    x_new <- NULL
    for (i in 1:length(x)) {
        x1 <- x[[i]]
        if (length(x1) > 1) {
            x2 <- x1[1]
            for (j in 2:length(x1)) {
                x2 <- paste(x2, x1[j], sep = "_")
            }
            x1 <- x2
        }
        x_new <- c(x_new, x1)
    }
    return(x_new)
}

.show_warning <- function(celltype, celltype_new){
    sc_meta <- data.frame(celltype = celltype, celltype_new = celltype_new, stringsAsFactors = FALSE)
    sc_meta <- unique(sc_meta)
    sc_meta <- sc_meta[sc_meta$celltype != sc_meta$celltype_new, ]
    warning_info <- NULL
    if (nrow(sc_meta) > 0) {
        warning_info <- "celltype of "
        if (nrow(sc_meta) == 1) {
            warning_info <- paste0(warning_info, sc_meta$celltype[1], " has been replaced by ", sc_meta$celltype_new[1])
        } else {
            for (i in 1:nrow(sc_meta)) {
                if (i == nrow(sc_meta)) {
                    warning_info <- paste0(warning_info, "and ",sc_meta$celltype[i])
                } else {
                    warning_info <- paste0(warning_info, sc_meta$celltype[i], ", ")
                }
            }
            warning_info <- paste0(warning_info, " have been replaced by ")
            for (i in 1:nrow(sc_meta)) {
                if (i == nrow(sc_meta)) {
                    warning_info <- paste0(warning_info, "and ",sc_meta$celltype_new[i])
                } else {
                    warning_info <- paste0(warning_info, sc_meta$celltype_new[i], ", ")
                }
            }
        }
    }
    return(warning_info)
}

.percent_cell <- function(x) {
    return(length(x[x > 0]))
}

.run_rctd <- function(st_data, sc_data, st_meta, sc_celltype){
    cellname <- unique(sc_celltype$celltype)
    ref_data <- list()
    ref_celltype <- list()
    for (i in 1:length(cellname)) {
        ref_celltype1 <- sc_celltype[sc_celltype$celltype == cellname[i], ]
        if (nrow(ref_celltype1) < 25) {
            set.seed(i)
            cell_num <- sample(x = 1:nrow(ref_celltype1), size = 25, replace = T)
            cell_new <- ref_celltype1$cell[cell_num]
            ref_data1 <- sc_data[, cell_new]
            ref_celltype1 <- data.frame(cell = colnames(ref_data1), celltype = cellname[i], stringsAsFactors = F)
        } else{
            ref_data1 <- sc_data[, ref_celltype1$cell]
        }
        ref_data[[i]] <- ref_data1
        ref_celltype[[i]] <- ref_celltype1
    }
    ref_data1 <- ref_data[[1]]
    ref_celltype1 <- ref_celltype[[1]]
    for (i in 2:length(cellname)) {
        ref_data1 <- cbind(ref_data1, ref_data[[i]])
        ref_celltype1 <- rbind(ref_celltype1, ref_celltype[[i]])
    }
    ref_celltype1$cell <- paste0("C", 1:nrow(ref_celltype1))
    colnames(ref_data1) <- ref_celltype1$cell
    # reference
    ref_cell_types <- ref_celltype1$celltype
    names(ref_cell_types) <- ref_celltype1$cell
    ref_cell_types <- factor(ref_cell_types)
    ref_nUMI <- colSums(ref_data1)
    ref_refernce <- spacexr::Reference(ref_data1, ref_cell_types, ref_nUMI)
    # test data
    test_spot_coords <- data.frame(xcoord = st_meta$x, ycoord = st_meta$y)
    rownames(test_spot_coords) <- st_meta[,1]
    test_spot_nUMI <- colSums(st_data)
    test_spot_RCTD <- spacexr::SpatialRNA(test_spot_coords, st_data, test_spot_nUMI)
    # run RCTD
    myRCTD <- spacexr::create.RCTD(test_spot_RCTD, ref_refernce)
    myRCTD <- spacexr::run.RCTD(myRCTD, doublet_mode = "full")
    results <- myRCTD@results
    st_coef <- as.matrix(results$weights)
    return(st_coef)
}

.run_seurat <- function(st_data, sc_data, sc_celltype){
    ref_data<- Seurat::CreateSeuratObject(sc_data)
    ref_data<- Seurat::SCTransform(ref_data, verbose = F)
    ref_data<- Seurat::RunPCA(ref_data, verbose = F)
    ref_data$celltype <- sc_celltype$celltype
    # Seurat intergration
    test_spot_data<- Seurat::CreateSeuratObject(st_data)
    test_spot_data<- Seurat::SCTransform(test_spot_data,verbose = F)
    test_spot_data<- Seurat::RunPCA(test_spot_data,verbose = F)
    anchors <- Seurat::FindTransferAnchors(reference = ref_data, query = test_spot_data, normalization.method = "SCT", verbose = F)
    predictions.assay <- Seurat::TransferData(anchorset = anchors, refdata = ref_data$celltype,
        prediction.assay = TRUE, weight.reduction = test_spot_data[["pca"]],dims = 1:30, verbose = F)
    res<- predictions.assay@data
    res<- as.data.frame(t(res))
    res<- res[,-ncol(res)]
    cellname<- colnames(res)
    cellname<- stringr::str_replace_all(cellname,pattern = '-',replacement = '_')
    colnames(res)<- cellname
    st_coef <- as.matrix(res)
    return(st_coef)
}

.run_spotlight <- function(st_data, sc_data, sc_celltype){
    ref_data<- Seurat::CreateSeuratObject(sc_data)
    ref_data<- Seurat::SCTransform(ref_data, verbose = F)
    Seurat::Idents(ref_data)<- sc_celltype$celltype
    cluster_markers_all <- Seurat::FindAllMarkers(object = ref_data, assay = "SCT", slot = "data", verbose = F, only.pos = TRUE)
    ref_data$celltype <- sc_celltype$celltype
    spotlight_ls <- SPOTlight::spotlight_deconvolution(se_sc = ref_data, counts_spatial = as.matrix(st_data), clust_vr = "celltype", cluster_markers = cluster_markers_all)
    spotlight_ls<- as.data.frame(spotlight_ls[[2]])
    spotlight_ls<- spotlight_ls[,-ncol(spotlight_ls)]
    st_coef <- as.matrix(spotlight_ls)
    return(st_coef)
}

.run_deconvSeq <- function(st_data, sc_data, sc_celltype){
    label_sc <- sc_celltype$celltype
    names(label_sc) <- sc_celltype$cell
    label_sc <- as.factor(label_sc)
    design.sc <- model.matrix(~-1+label_sc)
    colnames(design.sc) <- levels(label_sc)
    rownames(design.sc) <- colnames(sc_data)
    dge.sc <- deconvSeq::getdge(as.matrix(sc_data), design.sc, ncpm.min=1, nsamp.min=4, method="bin.loess")
    b0.sc <- deconvSeq::getb0.rnaseq(dge.sc, design.sc, ncpm.min=1, nsamp.min=4)
    dge.st <- deconvSeq::getdge(as.matrix(st_data), NULL, ncpm.min=1, nsamp.min=4, method="bin.loess")
    res <- deconvSeq::getx1.rnaseq(NB0=200, b0.sc, dge.st)
    st_coef <- as.matrix(res$x1)
    return(st_coef)
}

.run_stereoscope <- function(st_data, sc_data, sc_celltype, env, anaconda_path){
    ref_data<- as.data.frame(as.matrix(t(sc_data)))
    ref_data$cell<- rownames(ref_data)
    ref_data<- ref_data[,c(ncol(ref_data),1:(ncol(ref_data)-1))]
    ref_celltype <- sc_celltype
    colnames(ref_celltype)[2] <- 'bio_celltype'
    readr::write_tsv(ref_data,file = 'cnt_data.tsv',quote = "none")
    readr::write_tsv(ref_celltype,file = 'mta_data.tsv',quote = "none")
    test_spot_data<- as.data.frame(as.matrix(t(st_data)))
    test_spot_data$spot<- rownames(test_spot_data)
    test_spot_data<- test_spot_data[,c(ncol(test_spot_data),1:(ncol(test_spot_data)-1))]
    readr::write_tsv(test_spot_data,file = 'cnt_st.tsv',quote = "none")
    if (env == "base") {
      res_sh <- c("#!/bin/sh", paste0(anaconda_path,"/condabin/conda init"), paste0("source ", anaconda_path, "/etc/profile.d/conda.sh"), "conda activate")
        res_sh <- c(res_sh, "stereoscope run --sc_cnt cnt_data.tsv --sc_labels mta_data.tsv -sce 75000  -o res -n 5000 --st_cnt cnt_st.tsv -ste 75000 --gpu -stb 100 -scb 100")
        readr::write_lines(res_sh,file = "res_sh.sh")
    } else {
        res_sh <- c("#!/bin/sh", paste0(anaconda_path,"/condabin/conda init"), paste0("source ", anaconda_path, "/etc/profile.d/conda.sh"), paste0("conda activate ", env))
        res_sh <- c(res_sh, "stereoscope run --sc_cnt cnt_data.tsv --sc_labels mta_data.tsv -sce 75000  -o res -n 5000 --st_cnt cnt_st.tsv -ste 75000 --gpu -stb 100 -scb 100")
        readr::write_lines(x = res_sh,file = "res_sh.sh")
    }
    system("bash res_sh.sh")
    filename <- dir("res/cnt_st/")
    if (length(filename) > 1) {
        for (j in 1:length(filename)) {
            cat(paste0("[", j, "] ", filename[j]),"\n")
        }
        filename_used <- readline(prompt = "Which result is it, select the number: ")
        filename <- filename[as.integer(filename_used)]
    }
    st_coef <- readr::read_tsv(paste0("res/cnt_st/", filename))
    st_coef <- as.data.frame(st_coef)
    rownames(st_coef) <- st_coef[,1]
    st_coef <- st_coef[,-1]
    return(as.matrix(st_coef))
}

.run_cell2location <- function(st_data, st_meta, sc_data, sc_celltype, env){
    sc_meta <- sc_celltype
    rownames(sc_meta) <- sc_meta$cell
    sc_obj <- Seurat::CreateSeuratObject(sc_data,meta.data = sc_meta)
    st_obj <- Seurat::CreateSeuratObject(st_data)
    st_obj[['x']] <- st_meta$x
    st_obj[['y']] <- st_meta$y
    if (!dir.exists("cell2location")) {
        dir.create("cell2location")
    }
    adata_sc <- sceasy::convertFormat(sc_obj, from="seurat", to="anndata", main_layer="counts", drop_single_values=FALSE)
    adata_st <- sceasy::convertFormat(st_obj, from="seurat", to="anndata", main_layer="counts", drop_single_values=FALSE)
    anndata::write_h5ad(adata_sc,"cell2location/sc_rna.h5ad")
    anndata::write_h5ad(adata_st,"cell2location/st_rna.h5ad")
    res_py <- c("import sys", "import scanpy as sc", "import anndata", "import pandas as pd", "import numpy as np","import matplotlib.pyplot as plt")
    res_py <- c(res_py, "import matplotlib as mpl", "import subprocess", "import time", "import os", "import cell2location", "import scvi")
    res_py <- c(res_py, "from matplotlib import rcParams", "rcParams['pdf.fonttype'] = 42", "import seaborn as sns", "from scipy.sparse import csr_matrix", "from cell2location.utils.filtering import filter_genes")
    res_py <- c(res_py, "sc_file_path = 'cell2location/sc_rna.h5ad'", "spatial_file_path = 'cell2location/st_rna.h5ad'", "celltype_key = 'celltype'", "output_file_path = 'cell2location/'")
    res_py <- c(res_py, "adata_snrna_raw = sc.read_h5ad(sc_file_path)", "adata_vis = sc.read_h5ad(spatial_file_path)", "adata_snrna_raw.X = csr_matrix(adata_snrna_raw.X)", "adata_vis.X = csr_matrix(adata_vis.X)")
    res_py <- c(res_py, "adata_snrna_raw = adata_snrna_raw[~adata_snrna_raw.obs[celltype_key].isin(np.array(adata_snrna_raw.obs[celltype_key].value_counts()[adata_snrna_raw.obs[celltype_key].value_counts() <=1].index))]")
    res_py <- c(res_py, "sc.pp.filter_genes(adata_snrna_raw,min_cells=1)", "sc.pp.filter_cells(adata_snrna_raw,min_genes=1)")
    res_py <- c(res_py, "adata_snrna_raw.obs[celltype_key] = pd.Categorical(adata_snrna_raw.obs[celltype_key])", "adata_snrna_raw = adata_snrna_raw[~adata_snrna_raw.obs[celltype_key].isna(), :]")
    res_py <- c(res_py, "selected = filter_genes(adata_snrna_raw, cell_count_cutoff=5, cell_percentage_cutoff2=0.03, nonz_mean_cutoff=1.12)")
    res_py <- c(res_py, "adata_snrna_raw = adata_snrna_raw[:, selected].copy()")
    res_py <- c(res_py, "cell2location.models.RegressionModel.setup_anndata(adata=adata_snrna_raw,labels_key=celltype_key)")
    res_py <- c(res_py, "from cell2location.models import RegressionModel")
    res_py <- c(res_py, "mod = RegressionModel(adata_snrna_raw)")
    res_py <- c(res_py, "mod.train(max_epochs=250, batch_size=2500, train_size=1, lr=0.002, use_gpu=True)")
    res_py <- c(res_py, "adata_snrna_raw = mod.export_posterior(adata_snrna_raw, sample_kwargs={'num_samples': 1000, 'batch_size': 2500, 'use_gpu': True})")
    res_py <- c(res_py, "if 'means_per_cluster_mu_fg' in adata_snrna_raw.varm.keys():")
    res_py <- c(res_py, "    inf_aver = adata_snrna_raw.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mu_fg_{i}'", "                                    for i in adata_snrna_raw.uns['mod']['factor_names']]].copy()")
    res_py <- c(res_py, "else:", "    inf_aver = adata_snrna_raw.var[[f'means_per_cluster_mu_fg_{i}'", "                                    for i in adata_snrna_raw.uns['mod']['factor_names']]].copy()")
    res_py <- c(res_py, "inf_aver.columns = adata_snrna_raw.uns['mod']['factor_names']", "inf_aver.iloc[0:5, 0:5]")
    res_py <- c(res_py, "intersect = np.intersect1d(adata_vis.var_names, inf_aver.index)", "adata_vis = adata_vis[:, intersect].copy()", "inf_aver = inf_aver.loc[intersect, :].copy()")
    res_py <- c(res_py, "cell2location.models.Cell2location.setup_anndata(adata = adata_vis)")
    res_py <- c(res_py, "mod = cell2location.models.Cell2location(adata_vis, cell_state_df=inf_aver,N_cells_per_location=30,detection_alpha=200)")
    res_py <- c(res_py, "cell2location.models.Cell2location.view_anndata_setup(mod)", "mod.train(max_epochs=30000,batch_size=None,train_size=1,use_gpu=True)")
    res_py <- c(res_py, "adata_vis = mod.export_posterior(adata_vis, sample_kwargs={'num_samples': 1000, 'batch_size': mod.adata.n_obs, 'use_gpu': True})")
    res_py <- c(res_py, "print(adata_vis)", "adata_vis.obsm['q05_cell_abundance_w_sf'].to_csv(output_file_path + '/cell2location_result.csv')")
    readr::write_lines(x = res_py,file = "res_py.py")
    reticulate::use_condaenv(env)
    reticulate::source_python(file = "res_py.py")
    st_coef <- readr::read_csv("cell2location/cell2location_result.csv")
    st_coef <- as.data.frame(st_coef)
    rownames(st_coef) <- st_coef[,1]
    st_coef <- st_coef[,-1]
    coef_cellname <- colnames(st_coef)
    coef_cellname <- strsplit(coef_cellname,split = "abundance_w_sf_")
    coef_cellname <- as.data.frame(coef_cellname)
    coef_cellname <- as.data.frame(t(coef_cellname))
    coef_cellname <- coef_cellname[,2]
    colnames(st_coef) <- coef_cellname
    return(as.matrix(st_coef))
}

.generate_ref <- function(sc_ndata, sc_celltype, if_doParallel) {
    ref_celltype <- unique(sc_celltype$celltype)
    if (if_doParallel) {
        sc_ref <- foreach::foreach(i = 1:length(ref_celltype), .combine = "cbind", .packages = "Matrix") %dopar% {
            sc_data_celltype <- sc_ndata[, sc_celltype[sc_celltype$celltype == ref_celltype[i], ]$cell]
            if (is(sc_data_celltype, "dgCMatrix")) {
                as.numeric(rowMeans(sc_data_celltype))
            } else {
                as.numeric(sc_data_celltype)
            }
        }
        rownames(sc_ref) <- rownames(sc_ndata)
        colnames(sc_ref) <- ref_celltype
    } else {
        sc_ref <- list()
        for (i in 1:length(ref_celltype)) {
            sc_data_celltype <- sc_ndata[, sc_celltype[sc_celltype$celltype == ref_celltype[i], ]$cell]
            if (is(sc_data_celltype, "dgCMatrix")) {
                sc_ref[[i]] <- rowMeans(sc_data_celltype)
                names(sc_ref)[i] <- ref_celltype[i]
            } else {
                names(sc_data_celltype) <- rownames(sc_ndata)
                sc_ref[[i]] <- sc_data_celltype
                names(sc_ref)[i] <- ref_celltype[i]
            }
        }
    }
    sc_ref <- as.data.frame(sc_ref, stringsAsFactors = F)
    cellname <- colnames(sc_ref)
    cellname <- cellname[order(cellname)]
    sc_ref <- sc_ref[, cellname]
    return(sc_ref)
}

.normalize_data <- function(rawdata) {
    rawdata <- Seurat::CreateSeuratObject(rawdata)
    rawdata <- Seurat::NormalizeData(rawdata, verbose = F)
    ver <- packageVersion("Seurat")
    ver <- substr(ver,1,1)
    if (ver >= 5) {
        rawdata <- rawdata@assays$RNA$data
    } else {
        rawdata <- rawdata[["RNA"]]@data
    }
    return(rawdata)
}

.hvg <- function(sc_ndata, sc_celltype) {
    sc_data1 <- Seurat::CreateSeuratObject(sc_ndata)
    Seurat::Idents(sc_data1) <- sc_celltype$celltype
    sc_hvg <- Seurat::FindAllMarkers(sc_data1)
    sc_hvg <- unique(sc_hvg$gene)
    return(sc_hvg)
}

.coef_nor <- function(st_coef) {
    for (i in 1:nrow(st_coef)) {
        st_coef1 <- as.numeric(st_coef[i, ])
        st_coef1 <- st_coef1/sum(st_coef1)
        st_coef[i, ] <- st_coef1
    }
    return(as.data.frame(st_coef))
}

.st_dist <- function(st_meta) {
    st_dist <- as.matrix(stats::dist(x = cbind(st_meta$x, st_meta$y)))
    rownames(st_dist) <- st_meta[, 1]
    colnames(st_dist) <- st_meta[, 1]
    return(st_dist)
}

.determine_celltype <- function(st_meta, min_percent) {
    st_coef <- st_meta[, -c(1:7)]
    cellname <- colnames(st_coef)
    for (i in 1:nrow(st_coef)) {
        st_coef1 <- as.numeric(st_coef[i, ])
        if (max(st_coef1) > min_percent) {
            st_meta$celltype[i] <- cellname[which(st_coef1 == max(st_coef1))]
        }
    }
    return(st_meta)
}

.generate_newmeta <- function(st_meta, st_dist, min_percent) {
    # generate new data
    newmeta_spot <- NULL
    newmeta_ratio <- NULL
    newmeta_cell <- NULL
    newmeta_x <- NULL
    newmeta_y <- NULL
    st_meta <- st_meta[st_meta$label != "less nFeatures", ]
    cellname <- colnames(st_meta)[-c(1:7)]
    for (i in 1:nrow(st_meta)) {
        spot_name <- st_meta$spot[i]
        spot_x <- st_meta$x[i]
        spot_y <- st_meta$y[i]
        spot_cellnum <- st_meta$cell_num[i]
        spot_percent <- as.numeric(st_meta[i, -c(1:7)])
        spot_percent <- spot_percent * spot_cellnum
        spot_percent_floor <- floor(spot_percent)
        spot_percent_dec <- spot_percent - spot_percent_floor
        # cell_num < 1
        spot_celltype <- which(spot_percent_dec > min_percent)
        k <- 0
        if (length(spot_celltype) > 0) {
            spot_percent <- spot_percent_dec[spot_celltype]
            spot_celltype <- cellname[spot_celltype]
            for (j in 1:length(spot_celltype)) {
                spot_percent1 <- spot_percent[j]
                newmeta_ratio <- c(newmeta_ratio, spot_percent1)
                newmeta_cell <- c(newmeta_cell, spot_celltype[j])
                k <- k + 1
            }
        }
        # cell_num >= 1
        spot_celltype <- which(spot_percent_floor > 0)
        if (length(spot_celltype) > 0) {
            spot_percent <- spot_percent_floor[spot_celltype]
            spot_celltype <- cellname[spot_celltype]
            spot_cell <- NULL
            for (j in 1:length(spot_celltype)) {
                spot_cell <- c(spot_cell, rep(spot_celltype[j], spot_percent[j]))
            }
            for (j in 1:length(spot_cell)) {
                spot_percent1 <- 1
                newmeta_ratio <- c(newmeta_ratio, spot_percent1)
                newmeta_cell <- c(newmeta_cell, spot_cell[j])
                k <- k + 1
            }
        }
        if (k > 0) {
            newmeta_spot <- c(newmeta_spot, rep(spot_name, k))
            if (k == 1) {
                newmeta_x <- c(newmeta_x, spot_x)
                newmeta_y <- c(newmeta_y, spot_y)
            } else {
                n_neighbor <- 4
                st_dist1 <- st_dist[, spot_name]
                st_dist1 <- st_dist1[st_dist1 > 0]
                st_dist1 <- st_dist1[order(st_dist1)]
                st_dist1 <- st_dist1[1:n_neighbor]
                st_meta_neighbor <- st_meta[st_meta$spot %in% names(st_dist1), ]
                st_meta_neighbor <- .det_neighbor(st_meta_neighbor, spot_x, spot_y, st_dist1)
                if (nrow(st_meta_neighbor) == 0) {
                    for (j in 1:k) {
                        st_angle_new <- sample(x = c(1:360), size = 1)
                        st_dist_new <- sample(x = c(0:st_dist1), size = 1)
                        newmeta_x1 <- spot_x + st_dist_new * cos(st_angle_new * pi/180)
                        newmeta_y1 <- spot_y + st_dist_new * sin(st_angle_new * pi/180)
                        newmeta_x <- c(newmeta_x, newmeta_x1)
                        newmeta_y <- c(newmeta_y, newmeta_y1)
                    }
                } else {
                    for (j in 1:k) {
                        sc_name <- newmeta_cell[j]
                        sc_w1 <- .get_weight1(st_meta_neighbor, sc_name)
                        set.seed(j)
                        st_angle_new <- sample(x = c(1:360), size = 1, prob = as.numeric(sc_w1))
                        spot_ratio <- st_meta[st_meta$spot == spot_name, sc_name]
                        st_dist_new <- .get_weight2(st_meta_neighbor, sc_name, st_dist1, st_angle_new, spot_ratio)
                        newmeta_x1 <- spot_x + st_dist_new * cos(st_angle_new * pi/180)
                        newmeta_y1 <- spot_y + st_dist_new * sin(st_angle_new * pi/180)
                        newmeta_x <- c(newmeta_x, newmeta_x1)
                        newmeta_y <- c(newmeta_y, newmeta_y1)
                    }
                }
            }
        }
    }
    newmeta <- data.frame(spot = newmeta_spot, cell_ratio = newmeta_ratio, celltype = newmeta_cell,
                        x = newmeta_x, y = newmeta_y, stringsAsFactors = F)
    return(newmeta)
}

.get_weight1 <- function(st_meta_neighbor, sc_name){
    sc_name_ratio <- st_meta_neighbor[,sc_name]
    names(sc_name_ratio) <- st_meta_neighbor$w1
    if (!"I" %in% names(sc_name_ratio)) {
        sc_name_ratio <- c(sc_name_ratio, 0)
        names(sc_name_ratio)[length(sc_name_ratio)] <- "I"
    }
    if (!"II" %in% names(sc_name_ratio)) {
        sc_name_ratio <- c(sc_name_ratio, 0)
        names(sc_name_ratio)[length(sc_name_ratio)] <- "II"
    }
    if (!"III" %in% names(sc_name_ratio)) {
        sc_name_ratio <- c(sc_name_ratio, 0)
        names(sc_name_ratio)[length(sc_name_ratio)] <- "III"
    }
    if (!"IV" %in% names(sc_name_ratio)) {
        sc_name_ratio <- c(sc_name_ratio, 0)
        names(sc_name_ratio)[length(sc_name_ratio)] <- "IV"
    }
    sc_name_ratio <- sc_name_ratio[c("I", "II", "III", "IV")]
    sc_name_ratio <- sc_name_ratio + 1
    sc_name_ratio <- sc_name_ratio/sum(sc_name_ratio)
    sc_w1 <- rep(sc_name_ratio, each = 90)
    return(sc_w1)
}

.get_weight2 <- function(st_meta_neighbor, sc_name, st_dist1, st_angle_new, spot_ratio){
    neighbor_weight <- 0
    if (st_angle_new > 0 & st_angle_new <= 90) {
        if ("I" %in% st_meta_neighbor$w1) {
            st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$w1 == "I",]
            neighbor_weight <- st_meta_neighbor1[ ,sc_name]
        }
    }
    if (st_angle_new > 90 & st_angle_new <= 180) {
        if ("II" %in% st_meta_neighbor$w1) {
            st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$w1 == "II",]
            neighbor_weight <- st_meta_neighbor1[ ,sc_name]
        }
    }
    if (st_angle_new > 180 & st_angle_new <= 270) {
        if ("III" %in% st_meta_neighbor$w1) {
            st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$w1 == "III",]
            neighbor_weight <- st_meta_neighbor1[ ,sc_name]
        }
    }
    if (st_angle_new > 270 & st_angle_new <= 360) {
        if ("IV" %in% st_meta_neighbor$w1) {
            st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$w1 == "IV",]
            neighbor_weight <- st_meta_neighbor1[ ,sc_name]
        }
    }
    spot_ratio <- c(spot_ratio, neighbor_weight)
    spot_ratio <- spot_ratio + 1
    spot_ratio <- spot_ratio/sum(spot_ratio)
    sc_w2 <- rep(spot_ratio, each = 5)
    sc_w2 <- sample(c(1:10), size = 1, prob = sc_w2)/10
    st_dist_new <- st_dist1[1]*sc_w2/2
    return(st_dist_new)
}

.det_neighbor <- function(st_meta_neighbor, spot_x, spot_y, st_dist1){
    st_meta_neighbor$w1 <- "NA"
    # right-down
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x >= spot_x & st_meta_neighbor$y > spot_y,]
    if (nrow(st_meta_neighbor1)> 1) {
        neighbor_name <- st_meta_neighbor1$spot
        st_dist2 <- st_dist1[neighbor_name]
        st_dist3 <- names(which(st_dist2 == min(st_dist2)))
        if (length(st_dist3) > 1) {
            st_dist3 <- st_dist3[1]
        }
        st_dist2 <- names(st_dist2)
        st_dist2 <- st_dist2[!st_dist2 %in% st_dist3]
        st_meta_neighbor <- st_meta_neighbor[!st_meta_neighbor$spot %in% st_dist2,]
    }
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x >= spot_x & st_meta_neighbor$y > spot_y,]
    if (nrow(st_meta_neighbor1) > 0) {
        st_meta_neighbor[st_meta_neighbor$spot == st_meta_neighbor1$spot, ]$w1 <- "I"
    }
    # right-down
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x > spot_x & st_meta_neighbor$y <= spot_y,]
    if (nrow(st_meta_neighbor1)> 1) {
        neighbor_name <- st_meta_neighbor1$spot
        st_dist2 <- st_dist1[neighbor_name]
        st_dist3 <- names(which(st_dist2 == min(st_dist2)))
        if (length(st_dist3) > 1) {
            st_dist3 <- st_dist3[1]
        }
        st_dist2 <- names(st_dist2)
        st_dist2 <- st_dist2[!st_dist2 %in% st_dist3]
        st_meta_neighbor <- st_meta_neighbor[!st_meta_neighbor$spot %in% st_dist2,]
    }
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x > spot_x & st_meta_neighbor$y <= spot_y,]
    if (nrow(st_meta_neighbor1) > 0) {
        st_meta_neighbor[st_meta_neighbor$spot == st_meta_neighbor1$spot, ]$w1 <- "IV"
    }
    # left-up
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x < spot_x & st_meta_neighbor$y >= spot_y,]
    if (nrow(st_meta_neighbor1)> 1) {
        neighbor_name <- st_meta_neighbor1$spot
        st_dist2 <- st_dist1[neighbor_name]
        st_dist3 <- names(which(st_dist2 == min(st_dist2)))
        if (length(st_dist3) > 1) {
            st_dist3 <- st_dist3[1]
        }
        st_dist2 <- names(st_dist2)
        st_dist2 <- st_dist2[!st_dist2 %in% st_dist3]
        st_meta_neighbor <- st_meta_neighbor[!st_meta_neighbor$spot %in% st_dist2,]
    }
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x < spot_x & st_meta_neighbor$y >= spot_y,]
    if (nrow(st_meta_neighbor1) > 0) {
        st_meta_neighbor[st_meta_neighbor$spot == st_meta_neighbor1$spot, ]$w1 <- "II"
    }
    # left-down
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x <= spot_x & st_meta_neighbor$y < spot_y,]
    if (nrow(st_meta_neighbor1)> 1) {
        neighbor_name <- st_meta_neighbor1$spot
        st_dist2 <- st_dist1[neighbor_name]
        st_dist3 <- names(which(st_dist2 == min(st_dist2)))
        if (length(st_dist3) > 1) {
            st_dist3 <- st_dist3[1]
        }
        st_dist2 <- names(st_dist2)
        st_dist2 <- st_dist2[!st_dist2 %in% st_dist3]
        st_meta_neighbor <- st_meta_neighbor[!st_meta_neighbor$spot %in% st_dist2,]
    }
    st_meta_neighbor1 <- st_meta_neighbor[st_meta_neighbor$x <= spot_x & st_meta_neighbor$y < spot_y,]
    if (nrow(st_meta_neighbor1) > 0) {
        st_meta_neighbor[st_meta_neighbor$spot == st_meta_neighbor1$spot, ]$w1 <- "III"
    }
    return(st_meta_neighbor)
}

.generate_newmeta_doParallel <- function(st_meta, st_dist, min_percent) {
    # generate new data
    st_meta <- st_meta[st_meta$label != "less nFeatures", ]
    cellname <- colnames(st_meta)[-c(1:7)]
    newmeta <- foreach::foreach (i = 1:nrow(st_meta), .combine = rbind, .packages = "Matrix", .export = c(".det_neighbor", ".get_weight1", ".get_weight2")) %dopar% {
        newmeta_spot <- NULL
        newmeta_ratio <- NULL
        newmeta_cell <- NULL
        newmeta_x <- NULL
        newmeta_y <- NULL
        spot_name <- st_meta$spot[i]
        spot_x <- st_meta$x[i]
        spot_y <- st_meta$y[i]
        spot_cellnum <- st_meta$cell_num[i]
        spot_percent <- as.numeric(st_meta[i, -c(1:7)])
        spot_percent <- spot_percent * spot_cellnum
        spot_percent_floor <- floor(spot_percent)
        spot_percent_dec <- spot_percent - spot_percent_floor
        # cell_num < 1
        spot_celltype <- which(spot_percent_dec > min_percent)
        k <- 0
        if (length(spot_celltype) > 0) {
            spot_percent <- spot_percent_dec[spot_celltype]
            spot_celltype <- cellname[spot_celltype]
            for (j in 1:length(spot_celltype)) {
                spot_percent1 <- spot_percent[j]
                newmeta_ratio <- c(newmeta_ratio, spot_percent1)
                newmeta_cell <- c(newmeta_cell, spot_celltype[j])
                k <- k + 1
            }
        }
        # cell_num >= 1
        spot_celltype <- which(spot_percent_floor > 0)
        if (length(spot_celltype) > 0) {
            spot_percent <- spot_percent_floor[spot_celltype]
            spot_celltype <- cellname[spot_celltype]
            spot_cell <- NULL
            for (j in 1:length(spot_celltype)) {
                spot_cell <- c(spot_cell, rep(spot_celltype[j], spot_percent[j]))
            }
            for (j in 1:length(spot_cell)) {
                spot_percent1 <- 1
                newmeta_ratio <- c(newmeta_ratio, spot_percent1)
                newmeta_cell <- c(newmeta_cell, spot_cell[j])
                k <- k + 1
            }
        }
        if (k > 0) {
            newmeta_spot <- c(newmeta_spot, rep(spot_name, k))
            if (k == 1) {
                newmeta_x <- c(newmeta_x, spot_x)
                newmeta_y <- c(newmeta_y, spot_y)
            } else {
                n_neighbor <- 4
                st_dist1 <- st_dist[, spot_name]
                st_dist1 <- st_dist1[st_dist1 > 0]
                st_dist1 <- st_dist1[order(st_dist1)]
                st_dist1 <- st_dist1[1:n_neighbor]
                st_meta_neighbor <- st_meta[st_meta$spot %in% names(st_dist1), ]
                st_meta_neighbor <- .det_neighbor(st_meta_neighbor, spot_x, spot_y, st_dist1)
                if (nrow(st_meta_neighbor) == 0) {
                    for (j in 1:k) {
                        st_angle_new <- sample(x = c(1:360), size = 1)
                        st_dist_new <- sample(x = c(0:st_dist1), size = 1)
                        newmeta_x1 <- spot_x + st_dist_new * cos(st_angle_new * pi/180)
                        newmeta_y1 <- spot_y + st_dist_new * sin(st_angle_new * pi/180)
                        newmeta_x <- c(newmeta_x, newmeta_x1)
                        newmeta_y <- c(newmeta_y, newmeta_y1)
                    }
                } else {
                    for (j in 1:k) {
                        sc_name <- newmeta_cell[j]
                        sc_w1 <- .get_weight1(st_meta_neighbor, sc_name)
                        set.seed(j)
                        st_angle_new <- sample(x = c(1:360), size = 1, prob = as.numeric(sc_w1))
                        spot_ratio <- st_meta[st_meta$spot == spot_name, sc_name]
                        st_dist_new <- .get_weight2(st_meta_neighbor, sc_name, st_dist1, st_angle_new, spot_ratio)
                        newmeta_x1 <- spot_x + st_dist_new * cos(st_angle_new * pi/180)
                        newmeta_y1 <- spot_y + st_dist_new * sin(st_angle_new * pi/180)
                        newmeta_x <- c(newmeta_x, newmeta_x1)
                        newmeta_y <- c(newmeta_y, newmeta_y1)
                    }
                }
            }
            data.frame(spot = newmeta_spot, cell_ratio = newmeta_ratio, celltype = newmeta_cell, x = as.numeric(newmeta_x), y = as.numeric(newmeta_y), stringsAsFactors = F)
        } else {
            data.frame(spot = "NA", cell_ratio = "NA", celltype = "NA", x = "NA", y = "NA", stringsAsFactors = F)
        }
    }
    newmeta <- newmeta[newmeta$spot != "NA",]
    return(newmeta)
}

.generate_newmeta_cell <- function(newmeta, st_ndata, sc_ndata, sc_celltype, iter_num, n_cores, if_doParallel) {
    newmeta_spotname <- unique(newmeta$spot)
    newmeta_cell <- NULL
    cat(crayon::cyan("Generating single-cell data for each spot", "\n"))
    if (if_doParallel) {
        cl <- parallel::makeCluster(n_cores)
        doParallel::registerDoParallel(cl)
        newmeta_cell <- foreach::foreach (i = 1:length(newmeta_spotname), .combine = "rbind", .packages = "Matrix", .export = ".generate_newmeta_spot") %dopar% {
            spot_name <- newmeta_spotname[i]
            .generate_newmeta_spot(spot_name, newmeta, st_ndata, sc_ndata, sc_celltype, iter_num)
        }
        doParallel::stopImplicitCluster()
        parallel::stopCluster(cl)
    } else {
        for (i in 1:length(newmeta_spotname)) {
            spot_name <- newmeta_spotname[i]
            newmeta_spot <- .generate_newmeta_spot(spot_name, newmeta, st_ndata, sc_ndata, sc_celltype, iter_num)
            newmeta_cell <- rbind(newmeta_cell, newmeta_spot)
        }
    }
    newmeta_cell$cell <- paste0("C", 1:nrow(newmeta))
    newmeta_cell <- newmeta_cell[, c(8, 4, 5, 3:1, 7, 6)]
    return(newmeta_cell)
}

.generate_newmeta_spot <- function(spot_name, newmeta, st_ndata, sc_ndata, sc_celltype, iter_num) {
    newmeta_spot <- newmeta[newmeta$spot == spot_name, ]
    spot_ndata <- as.numeric(st_ndata[, spot_name])
    # random sampling
    score_cor <- NULL
    spot_cell_id <- list()
    for (k in 1:iter_num) {
        spot_cell_id_k <- NULL
        for (j in 1:nrow(newmeta_spot)) {
            spot_celltype <- newmeta_spot$celltype[j]
            sc_celltype1 <- sc_celltype[sc_celltype$celltype == spot_celltype, "cell"]
            sc_celltype1 <- sc_celltype1[sample(x = 1:length(sc_celltype1), size = 1)]
            spot_cell_id_k <- c(spot_cell_id_k, sc_celltype1)
        }
        if (length(spot_cell_id_k) == 1) {
            spot_ndata_pred <- as.numeric(sc_ndata[, spot_cell_id_k])
        } else {
            spot_ndata_pred <- as.numeric(rowSums(sc_ndata[, spot_cell_id_k]))
        }
        spot_ndata_cor <- cor(spot_ndata, spot_ndata_pred)
        score_cor <- c(score_cor, spot_ndata_cor)
        spot_cell_id[[k]] <- spot_cell_id_k
    }
    spot_cell_id <- spot_cell_id[[which.max(score_cor)]]
    newmeta_spot$cell_id <- spot_cell_id
    newmeta_spot$cor <- max(score_cor)
    return(newmeta_spot)
}

.get_st_meta <- function(object) {
    st_type <- object@para$st_type
    if_skip_dec_celltype <- object@para$if_skip_dec_celltype
    if (st_type == "single-cell") {
        st_meta <- object@meta$rawmeta
        st_meta <- st_meta[st_meta$celltype != "unsure", ]
        st_meta <- st_meta[st_meta$label != "less nFeatures", ]
        if (nrow(st_meta) == 0) {
            stop("No cell types found in rawmeta by excluding the unsure and less nFeatures cells!")
        }
    } else {
        if (if_skip_dec_celltype) {
            st_meta <- object@meta$rawmeta
            colnames(st_meta)[1] <- "cell"
        } else {
            st_meta <- object@meta$newmeta
        }
    }
    return(st_meta)
}

.get_st_data <- function(object) {
    st_type <- object@para$st_type
    if_skip_dec_celltype <- object@para$if_skip_dec_celltype
    if (st_type == "single-cell") {
        st_data <- object@data
        if (if_skip_dec_celltype) {
            st_data <- st_data$rawdata
        } else {
            st_data <- st_data$rawndata
        }
        st_meta <- object@meta$rawmeta
        st_meta <- st_meta[st_meta$celltype != "unsure", ]
        st_meta <- st_meta[st_meta$label != "less nFeatures", ]
        st_data <- st_data[, st_meta$cell]
        gene_expressed_ratio <- rowSums(st_data)
        st_data <- st_data[which(gene_expressed_ratio > 0), ]
        if (nrow(st_data) == 0) {
            stop("No cell types found in rawmeta by excluding the unsure and less nFeatures cells!")
        }
    } else {
        if (if_skip_dec_celltype) {
            st_data <- object@data$rawdata
        } else {
            st_data <- object@data$newdata
        }
        gene_expressed_ratio <- rowSums(st_data)
        st_data <- st_data[which(gene_expressed_ratio > 0), ]
        if (nrow(st_data) == 0) {
            stop("No expressed genes in newdata!")
        }
    }
    return(st_data)
}

.get_cellpair <- function(celltype_dist, st_meta, celltype_sender, celltype_receiver, n_neighbor) {
    cell_sender <- st_meta[st_meta$celltype == celltype_sender, ]
    cell_receiver <- st_meta[st_meta$celltype == celltype_receiver, ]
    cell_pair <- list()
    for (j in 1:nrow(cell_sender)) {
        celltype_dist1 <- celltype_dist[, cell_sender$cell[j]]
        celltype_dist1 <- celltype_dist1[celltype_dist1 > 0]
        celltype_dist1 <- celltype_dist1[order(celltype_dist1)]
        cell_pair[[j]] <- names(celltype_dist1[1:n_neighbor])
    }
    names(cell_pair) <- cell_sender$cell
    cell_pair <- as.data.frame(cell_pair, stringsAsFactors = F)
    cell_pair <- reshape2::melt(cell_pair, measure.vars = colnames(cell_pair), variable.name = "cell_sender", value.name = "cell_receiver")
    cell_pair$cell_sender <- as.character(cell_pair$cell_sender)
    cell_pair <- cell_pair[cell_pair$cell_receiver %in% cell_receiver$cell, ]
    return(cell_pair)
}

.co_exp <- function(x) {
    x_1 <- x[1:(length(x)/2)]
    x_2 <- x[(length(x)/2 + 1):length(x)]
    x_12 <- x_1 * x_2
    x_12_ratio <- length(x_12[x_12 > 0])/length(x_12)
    return(x_12_ratio)
}

.co_exp_p <- function(x) {
    x_real <- x[length(x)]
    x_per <- x[-length(x)]
    x_p <- length(x_per[x_per >= x_real])/length(x_per)
}

.lr_distance_doParallel <- function(st_data, cell_pair, lrdb, celltype_sender, celltype_receiver, per_num, pvalue) {
    ### [1] LR distance
    lrdb$celltype_sender <- celltype_sender
    lrdb$celltype_receiver <- celltype_receiver
    lrdb$lr_co_exp_num <- 0
    lrdb$lr_co_ratio <- 0
    lrdb$lr_co_ratio_pvalue <- 1
    rownames(lrdb) <- 1:nrow(lrdb)
    # calculate co-expression ratio
    ndata_ligand <- st_data[lrdb$ligand, cell_pair$cell_sender]
    ndata_receptor <- st_data[lrdb$receptor, cell_pair$cell_receiver]
    if (nrow(lrdb) == 1) {
        ndata_lr <- ndata_ligand * ndata_receptor
        lrdb$lr_co_exp_num <- length(ndata_lr[ndata_lr > 0])
        lrdb$lr_co_ratio <- length(ndata_lr[ndata_lr > 0])/length(ndata_lr)
    } else {
        ndata_lr <- cbind(ndata_ligand, ndata_receptor)
        lrdb$lr_co_ratio <- apply(ndata_lr, 1, .co_exp)
        lrdb$lr_co_exp_num <- apply(ndata_lr, 1, .co_exp) * nrow(cell_pair)
    }
    # permutation test
    res_per <- foreach::foreach (j=1:per_num, .packages = "Matrix", .export = ".co_exp") %dopar% {
        set.seed(j)
        cell_id <- sample(x = 1:ncol(st_data), size = nrow(cell_pair) * 2, replace = T)
        ndata_ligand <- st_data[lrdb$ligand, cell_id[1:(length(cell_id)/2)]]
        ndata_receptor <- st_data[lrdb$receptor, cell_id[(length(cell_id)/2 + 1):length(cell_id)]]
        if (nrow(lrdb) == 1) {
            ndata_lr <- ndata_ligand * ndata_receptor
            length(ndata_lr[ndata_lr > 0])/length(ndata_lr)
        } else {
            ndata_lr <- cbind(ndata_ligand, ndata_receptor)
            as.numeric(apply(ndata_lr, 1, .co_exp))
        }
    }
    names(res_per) <- paste0("P", 1:length(res_per))
    res_per <- as.data.frame(res_per)
    res_per$real <- lrdb$lr_co_ratio
    lrdb$lr_co_ratio_pvalue <- apply(res_per, 1, .co_exp_p)
    lrdb <- lrdb[lrdb$lr_co_ratio_pvalue < pvalue, ]
    return(lrdb)
}

.lr_distance <- function(st_data, cell_pair, lrdb, celltype_sender, celltype_receiver, per_num, pvalue) {
    .co_exp <- function(x) {
        x_1 <- x[1:(length(x)/2)]
        x_2 <- x[(length(x)/2 + 1):length(x)]
        x_12 <- x_1 * x_2
        x_12_ratio <- length(x_12[x_12 > 0])/length(x_12)
        return(x_12_ratio)
    }
    .co_exp_p <- function(x) {
        x_real <- x[length(x)]
        x_per <- x[-length(x)]
        x_p <- length(x_per[x_per >= x_real])/length(x_per)
    }
    ### [1] LR distance
    lrdb$celltype_sender <- celltype_sender
    lrdb$celltype_receiver <- celltype_receiver
    lrdb$lr_co_exp_num <- 0
    lrdb$lr_co_ratio <- 0
    lrdb$lr_co_ratio_pvalue <- 1
    rownames(lrdb) <- 1:nrow(lrdb)
    # calculate co-expression ratio
    if (nrow(lrdb) == 1) {
        ndata_lr <- ndata_ligand * ndata_receptor
        lrdb$lr_co_exp_num <- length(ndata_lr[ndata_lr > 0])
        lrdb$lr_co_ratio <- length(ndata_lr[ndata_lr > 0])/length(ndata_lr)
    } else {
        ndata_ligand <- st_data[lrdb$ligand, cell_pair$cell_sender]
        ndata_receptor <- st_data[lrdb$receptor, cell_pair$cell_receiver]
        ndata_lr <- cbind(ndata_ligand, ndata_receptor)
        lrdb$lr_co_ratio <- apply(ndata_lr, 1, .co_exp)
        lrdb$lr_co_exp_num <- apply(ndata_lr, 1, .co_exp) * nrow(cell_pair)
    }
    # permutation test
    res_per <- list()
    for (j in 1:per_num) {
        set.seed(j)
        cell_id <- sample(x = 1:ncol(st_data), size = nrow(cell_pair) * 2, replace = T)
        ndata_ligand <- st_data[lrdb$ligand, cell_id[1:(length(cell_id)/2)]]
        ndata_receptor <- st_data[lrdb$receptor, cell_id[(length(cell_id)/2 + 1):length(cell_id)]]
        if (nrow(lrdb) == 1) {
            ndata_lr <- ndata_ligand * ndata_receptor
            res_per[[j]] <- length(ndata_lr[ndata_lr > 0])/length(ndata_lr)
        } else {
            ndata_lr <- cbind(ndata_ligand, ndata_receptor)
            res_per[[j]] <- apply(ndata_lr, 1, .co_exp)
        }
    }
    names(res_per) <- paste0("P", 1:length(res_per))
    res_per <- as.data.frame(res_per)
    res_per$real <- lrdb$lr_co_ratio
    lrdb$lr_co_ratio_pvalue <- apply(res_per, 1, .co_exp_p)
    lrdb <- lrdb[lrdb$lr_co_ratio_pvalue < pvalue, ]
    return(lrdb)
}

.generate_ggi_res <- function(ggi_tf, cell_pair, receptor_name, st_data, max_hop, co_exp_ratio) {
    .co_exp <- function(x) {
        x_1 <- x[1:(length(x)/2)]
        x_2 <- x[(length(x)/2 + 1):length(x)]
        x_12 <- x_1 * x_2
        x_12_ratio <- length(x_12[x_12 > 0])/length(x_12)
        return(x_12_ratio)
    }
    .co_exp_batch <- function(st_data, ggi_res, cell_pair) {
        ggi_res_temp <- unique(ggi_res[, c("src", "dest")])
        cell_receiver <- unique(cell_pair$cell_receiver)
        m <- floor(nrow(ggi_res_temp)/5000)
        i <- 1
        res <- NULL
        while (i <= (m + 1)) {
            m_int <- 5000 * i
            if (m_int < nrow(ggi_res_temp)) {
                ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):(5000 * i), ]
            } else {
                if (m_int == nrow(ggi_res_temp)) {
                    ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):(5000 * i), ]
                    i <- i + 1
                } else {
                    ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):nrow(ggi_res_temp), ]
                }
            }
            ndata_src <- st_data[ggi_res_temp1$src, cell_receiver]
            ndata_dest <- st_data[ggi_res_temp1$dest, cell_receiver]
            ndata_gg <- cbind(ndata_src, ndata_dest)
            # calculate co-expression
            ggi_res_temp1$co_ratio <- NA
            ggi_res_temp1$co_ratio <- apply(ndata_gg, 1, .co_exp)
            res <- rbind(res, ggi_res_temp1)
            i <- i + 1
        }
        res$merge_key <- paste0(res$src, res$dest)
        ggi_res$merge_key <- paste0(ggi_res$src, ggi_res$dest)
        ggi_res <- merge(ggi_res, res, by = "merge_key", all.x = T, sort = F)
        ggi_res <- ggi_res[, c(2:6, 9)]
        colnames(ggi_res) <- c("src", "dest", "src_tf", "dest_tf", "hop", "co_ratio")
        return(ggi_res)
    }
    # generate ggi_res
    ggi_res <- NULL
    ggi_tf1 <- ggi_tf[ggi_tf$src == receptor_name, ]
    ggi_tf1 <- unique(ggi_tf1[ggi_tf1$dest %in% rownames(st_data), ])
    n <- 0
    ggi_tf1$hop <- n + 1
    while (n <= max_hop) {
        ggi_res <- rbind(ggi_res, ggi_tf1)
        ggi_tf1 <- ggi_tf[ggi_tf$src %in% ggi_tf1$dest, ]
        ggi_tf1 <- unique(ggi_tf1[ggi_tf1$dest %in% rownames(st_data), ])
        if (nrow(ggi_tf1) == 0) {
            break
        }
        ggi_tf1$hop <- n + 2
        n <- n + 1
    }
    ggi_res <- unique(ggi_res)
    # ndata_src and ndata_dest
    ggi_res_temp <- unique(ggi_res[, c("src", "dest")])
    if (nrow(ggi_res_temp) >= 5000) {
        ggi_res <- .co_exp_batch(st_data, ggi_res, cell_pair)
    } else {
        if (nrow(ggi_res) == 1) {
            ndata_src <- st_data[ggi_res$src, cell_pair$cell_receiver]
            ndata_dest <- st_data[ggi_res$dest, cell_pair$cell_receiver]
            ndata_gg <- c(ndata_src, ndata_dest)
            # calculate co-expression
            ggi_res$co_ratio <- NA
            ggi_res$co_ratio <- .co_exp(ndata_gg)
        } else {
            ndata_src <- st_data[ggi_res$src, cell_pair$cell_receiver]
            ndata_dest <- st_data[ggi_res$dest, cell_pair$cell_receiver]
            ndata_gg <- cbind(ndata_src, ndata_dest)
            # calculate co-expression
            ggi_res$co_ratio <- NA
            ggi_res$co_ratio <- apply(ndata_gg, 1, .co_exp)
        }
    }
    ggi_res <- ggi_res[ggi_res$co_ratio > co_exp_ratio, ]
    return(ggi_res)
}

.co_exp_batch <- function(st_data, ggi_res, cell_pair) {
    ggi_res_temp <- unique(ggi_res[, c("src", "dest")])
    cell_receiver <- unique(cell_pair$cell_receiver)
    m <- floor(nrow(ggi_res_temp)/5000)
    i <- 1
    res <- NULL
    while (i <= (m + 1)) {
        m_int <- 5000 * i
        if (m_int < nrow(ggi_res_temp)) {
            ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):(5000 * i), ]
        } else {
            if (m_int == nrow(ggi_res_temp)) {
                ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):(5000 * i), ]
                i <- i + 1
            } else {
                ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):nrow(ggi_res_temp), ]
            }
        }
        ndata_src <- st_data[ggi_res_temp1$src, cell_receiver]
        ndata_dest <- st_data[ggi_res_temp1$dest, cell_receiver]
        ndata_gg <- cbind(ndata_src, ndata_dest)
        # calculate co-expression
        ggi_res_temp1$co_ratio <- NA
        ggi_res_temp1$co_ratio <- apply(ndata_gg, 1, .co_exp)
        res <- rbind(res, ggi_res_temp1)
        i <- i + 1
    }
    res$merge_key <- paste0(res$src, res$dest)
    ggi_res$merge_key <- paste0(ggi_res$src, ggi_res$dest)
    ggi_res <- merge(ggi_res, res, by = "merge_key", all.x = T, sort = F)
    ggi_res <- ggi_res[, c(2:6, 9)]
    colnames(ggi_res) <- c("src", "dest", "src_tf", "dest_tf", "hop", "co_ratio")
    return(ggi_res)
}

.generate_tf_gene_all <- function(ggi_res, max_hop) {
    tf_gene_all <- NULL
    ggi_hop <- ggi_res[ggi_res$hop == 1, ]
    for (k in 1:max_hop) {
        ggi_hop_yes <- ggi_hop[ggi_hop$dest_tf == "YES", ]
        if (nrow(ggi_hop_yes) > 0) {
            ggi_hop_tf <- ggi_res[ggi_res$hop == k + 1, ]
            if (nrow(ggi_hop_tf) > 0) {
                ggi_hop_yes <- ggi_hop_yes[ggi_hop_yes$dest %in% ggi_hop_tf$src, ]
                if (nrow(ggi_hop_yes) > 0) {
                    tf_gene <- ggi_hop_yes$hop
                    names(tf_gene) <- ggi_hop_yes$dest
                    tf_gene_all <- c(tf_gene_all, tf_gene)
                }
            }
        }
        ggi_hop_no <- ggi_hop[ggi_hop$dest_tf == "NO", ]
        ggi_hop <- ggi_res[ggi_res$hop == k + 1, ]
        ggi_hop <- ggi_hop[ggi_hop$src %in% ggi_hop_no$dest, ]
    }
    return(tf_gene_all)
}

.generate_tf_res <- function(tf_gene_all, celltype_sender, celltype_receiver, receptor_name, ggi_res) {
    receptor_tf_temp <- data.frame(celltype_sender = celltype_sender, celltype_receiver = celltype_receiver,
        receptor = receptor_name, tf = names(tf_gene_all), n_hop = as.numeric(tf_gene_all), n_target = 0, stringsAsFactors = F)
    tf_names <- names(tf_gene_all)
    tf_n_hop <- as.numeric(tf_gene_all)
    for (i in 1:length(tf_names)) {
        ggi_res_tf <- ggi_res[ggi_res$src == tf_names[i] & ggi_res$hop == tf_n_hop[i] + 1, ]
        receptor_tf_temp$n_target[i] <- length(unique(ggi_res_tf$dest))
    }
    return(receptor_tf_temp)
}

.get_score <- function(lrdb, receptor_tf) {
    lrdb$score <- 0
    for (j in 1:nrow(lrdb)) {
        receptor_name <- lrdb$receptor[j]
        score_lr <- 1 - lrdb$lr_co_ratio_pvalue[j]
        if (receptor_name %in% receptor_tf$receptor) {
            receptor_tf_temp <- receptor_tf[receptor_tf$receptor == receptor_name, ]
            receptor_tf_temp$score_rt <- receptor_tf_temp$n_target * receptor_tf_temp$score/receptor_tf_temp$n_hop
            score_rt <- sum(receptor_tf_temp$score_rt) * (-1)
            score_rt <- 1/(1 + exp(score_rt))
            lrdb$score[j] <- sqrt(score_lr * score_rt)
        }
    }
    lrdb <- lrdb[lrdb$score > 0, ]
    if (nrow(lrdb) == 0) {
        return("NA")
    } else {
        return(lrdb)
    }
}

.get_tf_res_doParallel <- function(celltype_sender, celltype_receiver, lrdb, ggi_tf, cell_pair, st_data, max_hop, co_exp_ratio) {
    receptor_name <- unique(lrdb$receptor)
    receptor_tf <- NULL
    receptor_tf <- foreach::foreach (j=1:length(receptor_name), .packages = "Matrix", .combine = "rbind",
        .export = c(".generate_ggi_res", ".generate_tf_gene_all", ".generate_tf_res", ".random_walk")) %dopar% {
        # generate ggi_res
        ggi_res <- .generate_ggi_res(ggi_tf, cell_pair, receptor_name[j], st_data, max_hop, co_exp_ratio)
        if (nrow(ggi_res) > 0) {
            tf_gene_all <- .generate_tf_gene_all(ggi_res, max_hop)
            tf_gene_all <- data.frame(gene = names(tf_gene_all), hop = tf_gene_all, stringsAsFactors = F)
            tf_gene_all_new <- unique(tf_gene_all)
            tf_gene_all <- tf_gene_all_new$hop
            names(tf_gene_all) <- tf_gene_all_new$gene
            ggi_res$dest_tf_enrich <- "NO"
            if (!is.null(tf_gene_all)) {
                ggi_res[ggi_res$dest %in% names(tf_gene_all), ]$dest_tf_enrich <- "YES"
                # generate tf res
                receptor_tf_temp <- .generate_tf_res(tf_gene_all, celltype_sender, celltype_receiver, receptor_name[j], ggi_res)
                # random walk
                receptor_tf_temp$score <- .random_walk(receptor_tf_temp, ggi_res)
                receptor_tf_temp
            } else {
                data.frame()
            }
        }
        else {
            data.frame()
        }
    }
    return(receptor_tf)
}

.get_tf_res <- function(celltype_sender, celltype_receiver, lrdb, ggi_tf, cell_pair, st_data, max_hop, co_exp_ratio) {
    .co_exp <- function(x) {
        x_1 <- x[1:(length(x)/2)]
        x_2 <- x[(length(x)/2 + 1):length(x)]
        x_12 <- x_1 * x_2
        x_12_ratio <- length(x_12[x_12 > 0])/length(x_12)
        return(x_12_ratio)
    }
    .co_exp_batch <- function(st_data, ggi_res, cell_pair) {
        ggi_res_temp <- unique(ggi_res[, c("src", "dest")])
        cell_receiver <- unique(cell_pair$cell_receiver)
        m <- floor(nrow(ggi_res_temp)/5000)
        i <- 1
        res <- NULL
        while (i <= (m + 1)) {
            m_int <- 5000 * i
            if (m_int < nrow(ggi_res_temp)) {
                ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):(5000 * i), ]
            } else {
                if (m_int == nrow(ggi_res_temp)) {
                    ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):(5000 * i), ]
                    i <- i + 1
                } else {
                    ggi_res_temp1 <- ggi_res_temp[((i - 1) * 5000 + 1):nrow(ggi_res_temp), ]
                }
            }
            ndata_src <- st_data[ggi_res_temp1$src, cell_receiver]
            ndata_dest <- st_data[ggi_res_temp1$dest, cell_receiver]
            ndata_gg <- cbind(ndata_src, ndata_dest)
            # calculate co-expression
            ggi_res_temp1$co_ratio <- NA
            ggi_res_temp1$co_ratio <- apply(ndata_gg, 1, .co_exp)
            res <- rbind(res, ggi_res_temp1)
            i <- i + 1
        }
        res$merge_key <- paste0(res$src, res$dest)
        ggi_res$merge_key <- paste0(ggi_res$src, ggi_res$dest)
        ggi_res <- merge(ggi_res, res, by = "merge_key", all.x = T, sort = F)
        ggi_res <- ggi_res[, c(2:6, 9)]
        colnames(ggi_res) <- c("src", "dest", "src_tf", "dest_tf", "hop", "co_ratio")
        return(ggi_res)
    }
    .generate_ggi_res <- function(ggi_tf, cell_pair, receptor_name, st_data, max_hop, co_exp_ratio) {
        # generate ggi_res
        ggi_res <- NULL
        ggi_tf1 <- ggi_tf[ggi_tf$src == receptor_name, ]
        ggi_tf1 <- unique(ggi_tf1[ggi_tf1$dest %in% rownames(st_data), ])
        n <- 0
        ggi_tf1$hop <- n + 1
        while (n <= max_hop) {
            ggi_res <- rbind(ggi_res, ggi_tf1)
            ggi_tf1 <- ggi_tf[ggi_tf$src %in% ggi_tf1$dest, ]
            ggi_tf1 <- unique(ggi_tf1[ggi_tf1$dest %in% rownames(st_data), ])
            if (nrow(ggi_tf1) == 0) {
                break
            }
            ggi_tf1$hop <- n + 2
            n <- n + 1
        }
        ggi_res <- unique(ggi_res)
        # ndata_src and ndata_dest
        ggi_res_temp <- unique(ggi_res[, c("src", "dest")])
        if (nrow(ggi_res_temp) >= 5000) {
            ggi_res <- .co_exp_batch(st_data, ggi_res, cell_pair)
        } else {
            if (nrow(ggi_res) == 1) {
                ndata_src <- st_data[ggi_res$src, cell_pair$cell_receiver]
                ndata_dest <- st_data[ggi_res$dest, cell_pair$cell_receiver]
                ndata_gg <- c(ndata_src, ndata_dest)
                # calculate co-expression
                ggi_res$co_ratio <- NA
                ggi_res$co_ratio <- .co_exp(ndata_gg)
            } else {
                ndata_src <- st_data[ggi_res$src, cell_pair$cell_receiver]
                ndata_dest <- st_data[ggi_res$dest, cell_pair$cell_receiver]
                ndata_gg <- cbind(ndata_src, ndata_dest)
                # calculate co-expression
                ggi_res$co_ratio <- NA
                ggi_res$co_ratio <- apply(ndata_gg, 1, .co_exp)
            }
        }
        ggi_res <- ggi_res[ggi_res$co_ratio > co_exp_ratio, ]
        return(ggi_res)
    }
    .generate_tf_gene_all <- function(ggi_res, max_hop) {
        tf_gene_all <- NULL
        ggi_hop <- ggi_res[ggi_res$hop == 1, ]
        for (k in 1:max_hop) {
            ggi_hop_yes <- ggi_hop[ggi_hop$dest_tf == "YES", ]
            if (nrow(ggi_hop_yes) > 0) {
                ggi_hop_tf <- ggi_res[ggi_res$hop == k + 1, ]
                if (nrow(ggi_hop_tf) > 0) {
                    ggi_hop_yes <- ggi_hop_yes[ggi_hop_yes$dest %in% ggi_hop_tf$src, ]
                    if (nrow(ggi_hop_yes) > 0) {
                        tf_gene <- ggi_hop_yes$hop
                        names(tf_gene) <- ggi_hop_yes$dest
                        tf_gene_all <- c(tf_gene_all, tf_gene)
                    }
                }
            }
            ggi_hop_no <- ggi_hop[ggi_hop$dest_tf == "NO", ]
            ggi_hop <- ggi_res[ggi_res$hop == k + 1, ]
            ggi_hop <- ggi_hop[ggi_hop$src %in% ggi_hop_no$dest, ]
        }
        return(tf_gene_all)
    }
    .generate_tf_res <- function(tf_gene_all, celltype_sender, celltype_receiver, receptor_name, ggi_res) {
        receptor_tf_temp <- data.frame(celltype_sender = celltype_sender, celltype_receiver = celltype_receiver,
                                       receptor = receptor_name, tf = names(tf_gene_all), n_hop = as.numeric(tf_gene_all), n_target = 0, stringsAsFactors = F)
        tf_names <- names(tf_gene_all)
        tf_n_hop <- as.numeric(tf_gene_all)
        for (i in 1:length(tf_names)) {
            ggi_res_tf <- ggi_res[ggi_res$src == tf_names[i] & ggi_res$hop == tf_n_hop[i] + 1, ]
            receptor_tf_temp$n_target[i] <- length(unique(ggi_res_tf$dest))
        }
        return(receptor_tf_temp)
    }
    .random_walk <- function(receptor_tf, ggi_res) {
        receptor_name <- unique(receptor_tf$receptor)
        tf_score <- rep(0, nrow(receptor_tf))
        names(tf_score) <- receptor_tf$tf
        max_n_hop <- 10
        for (i in 1:10000) {
            ggi_res1 <- ggi_res[ggi_res$src == receptor_name, ]
            n_hop <- 1
            while (nrow(ggi_res1) > 0 & n_hop <= max_n_hop) {
                target_name <- sample(x = 1:nrow(ggi_res1), size = 1)
                ggi_res1 <- ggi_res1[target_name, ]
                if (ggi_res1$dest %in% names(tf_score)) {
                    tf_score[ggi_res1$dest] <- tf_score[ggi_res1$dest] + 1
                }
                ggi_res1 <- ggi_res[ggi_res$src == ggi_res1$dest, ]
                n_hop <- n_hop + 1
            }
        }
        tf_score <- as.numeric(tf_score/10000)
    }
    receptor_tf <- NULL
    receptor_name <- unique(lrdb$receptor)
    for (j in 1:length(receptor_name)) {
        # generate ggi_res
        ggi_res <- .generate_ggi_res(ggi_tf, cell_pair, receptor_name[j], st_data, max_hop, co_exp_ratio)
        if (nrow(ggi_res) > 0) {
            tf_gene_all <- .generate_tf_gene_all(ggi_res, max_hop)
            tf_gene_all <- data.frame(gene = names(tf_gene_all), hop = tf_gene_all, stringsAsFactors = F)
            tf_gene_all_new <- unique(tf_gene_all)
            tf_gene_all <- tf_gene_all_new$hop
            names(tf_gene_all) <- tf_gene_all_new$gene
            ggi_res$dest_tf_enrich <- "NO"
            if (!is.null(tf_gene_all)) {
                ggi_res[ggi_res$dest %in% names(tf_gene_all), ]$dest_tf_enrich <- "YES"
                # generate tf res
                receptor_tf_temp <- .generate_tf_res(tf_gene_all, celltype_sender, celltype_receiver, receptor_name[j], ggi_res)
                # random walk
                receptor_tf_temp$score <- .random_walk(receptor_tf_temp, ggi_res)
                receptor_tf <- rbind(receptor_tf, receptor_tf_temp)
            }
        }
    }
    return(receptor_tf)
}

.random_walk <- function(receptor_tf, ggi_res) {
    receptor_name <- unique(receptor_tf$receptor)
    tf_score <- rep(0, nrow(receptor_tf))
    names(tf_score) <- receptor_tf$tf
    max_n_hop <- 10
    for (i in 1:10000) {
        ggi_res1 <- ggi_res[ggi_res$src == receptor_name, ]
        n_hop <- 1
        while (nrow(ggi_res1) > 0 & n_hop <= max_n_hop) {
            target_name <- sample(x = 1:nrow(ggi_res1), size = 1)
            ggi_res1 <- ggi_res1[target_name, ]
            if (ggi_res1$dest %in% names(tf_score)) {
                tf_score[ggi_res1$dest] <- tf_score[ggi_res1$dest] + 1
            }
            ggi_res1 <- ggi_res[ggi_res$src == ggi_res1$dest, ]
            n_hop <- n_hop + 1
        }
    }
    tf_score <- as.numeric(tf_score/10000)
}

.get_tf_path <- function(ggi_res, tf_gene, tf_hop, receptor) {
    tf_path <- NULL
    ggi_res1 <- ggi_res[ggi_res$dest == tf_gene & ggi_res$hop == tf_hop, ]
    if (tf_hop > 1) {
        tf_hop_new <- tf_hop - 1
        for (i in tf_hop_new:1) {
            ggi_res2 <- ggi_res[ggi_res$dest %in% ggi_res1$src & ggi_res$hop == i, ]
            ggi_res1 <- ggi_res1[ggi_res1$src %in% ggi_res2$dest, ]
            ggi_res2 <- ggi_res2[ggi_res2$dest %in% ggi_res1$src, ]
            if (i == tf_hop_new) {
                tf_path <- rbind(tf_path, ggi_res1, ggi_res2)
            } else {
                tf_path <- rbind(tf_path, ggi_res2)
            }
            ggi_res1 <- ggi_res2
        }
    } else {
        tf_path <- ggi_res1
    }
    tf_path_new <- NULL
    ggi_res1 <- tf_path[tf_path$src == receptor & tf_path$hop == 1, ]
    if (tf_hop > 1) {
        for (i in 2:tf_hop) {
            ggi_res2 <- tf_path[tf_path$src %in% ggi_res1$dest & tf_path$hop == i, ]
            ggi_res2 <- ggi_res2[ggi_res2$src %in% ggi_res1$dest, ]
            if (i == 2) {
                tf_path_new <- rbind(tf_path_new, ggi_res1, ggi_res2)
            } else {
                tf_path_new <- rbind(tf_path_new, ggi_res2)
            }
            ggi_res1 <- ggi_res2
        }
    } else {
        tf_path_new <- ggi_res1
    }
    ggi_res1 <- ggi_res[ggi_res$src == tf_gene & ggi_res$hop == (tf_hop + 1), ]
    tf_path_new <- rbind(tf_path_new, ggi_res1)
    tf_path_new$tf <- tf_gene
    return(tf_path_new)
}

#' @title Show SpaTalk object
#'
#' @return SpaTalk object
#' @import Matrix
#' @importFrom methods show
#'
#' @export

setMethod(
    f = 'show',
    signature = 'SpaTalk',
    definition = function(object) {
        cat("An object of class SpaTalk", "\n")
        st_data <- object@data$rawdata
        st_type <- object@para[["st_type"]]
        lrpair <- object@lrpair
        cat(paste0(nrow(st_data), " genes across ", ncol(st_data), " ", st_type, "s (", nrow(lrpair), " lrpair)"), "\n")
        return(invisible(x = NULL))
    }
)
ZJUFanLab/SpaTalk documentation built on Jan. 21, 2025, 3:13 p.m.