Nothing
##' Preprocessing
##'
##' @title Data preprocessing
##'
##' @description This function is to prepare data for the \code{ConNetGNN} function.
##'
##' @param data The input data should be a data frame or a matrix where the rows are genes and the columns are cells. The \code{seurat} object are also accepted.
##' @param parallel.cores Number of processors to use when doing the calculations in parallel (default: \code{2}). If \code{parallel.cores=0}, then it will use all available core processors unless we set this argument with a smaller number.
##' @param verbose Gives information about each step. Default: \code{TRUE}.
##'
##' @details
##' The function is able to interface with the \code{seurat} framework. The process of \code{seurat} data processing refers to \code{Examples}.
##' The input data should be containing hypervariable genes and log-transformed. Left-truncated mixed Gaussian (LTMG) modeling to calculate gene
##' regulatory signal matrix. Positively correlated gene-gene and cell-cell are used as the initial gene correlation matrix and cell correlation matrix.
##'
##' @return A list:
##' \describe{
##' \item{orig_dara}{User-submitted raw data, rows are highly variable genes and columns are cells.}
##' \item{cell_features}{Cell feature matrix.}
##' \item{gene_features}{Gene feature matrix.}
##' \item{ltmg_matrix}{Gene regulatory signal matrix for LTMG.}
##' \item{cell_adj}{The adjacency matrix of the cell correlation network.}
##' \item{gene_adj}{The adjacency matrix of the gene correlation network.}
##' }
##'
##' @importFrom coop tpcor
##' @importFrom methods new
##' @importFrom parallel makeCluster
##' @importFrom parallel clusterExport
##' @importFrom parallel parLapply
##' @importFrom parallel stopCluster
##'
##' @export
##'
##' @examples
##'
##' # Load dependent packages.
##' # require(coop)
##'
##' # Seurat data processing.
##' # require(Seurat)
##'
##' # Load the PBMC dataset (Case data for seurat)
##' # pbmc.data <- Read10X(data.dir = "../data/pbmc3k/filtered_gene_bc_matrices/hg19/")
##'
##' # Our recommended data filtering is that only genes expressed as non-zero in more than
##' # 1% of cells, and cells expressed as non-zero in more than 1% of genes are kept.
##' # In addition, users can also filter mitochondrial genes according to their own needs.
##' # pbmc <- CreateSeuratObject(counts = pbmc.data, project = "case",
##' # min.cells = 3, min.features = 200)
##' # pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
##' # pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
##'
##' # Normalizing the data.
##' # pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize")
##'
##' # Identification of highly variable features.
##' # pbmc <- FindVariableFeatures(pbmc, selection.method = 'vst', nfeatures = 2000)
##'
##' # Run Preprocessing.
##' # Prep_data <- Preprocessing(pbmc)
##'
##'
##'
##' # Users can also directly input data
##' # in data frame or matrix format
##' # containing highly variable genes.
##' data("Hv_exp")
##' Hv_exp <- Hv_exp[,1:20]
##' Hv_exp <- Hv_exp[which(rowSums(Hv_exp) > 0),]
##' Prep_data <- Preprocessing(Hv_exp[1:10,])
Preprocessing<-function(data,parallel.cores=1,verbose=TRUE){
if(!isLoaded("coop")){
stop("The package coop is not available!")
}
if(!isLoaded("parallel")){
stop("The package parallel is not available!")
}
if(sum(grep("Seurat",class(data)))!=0){
exp<-data@assays[["RNA"]]@data
hvgi<-match(data@assays[["RNA"]]@var.features,row.names(exp))
exp<-as.matrix(exp[hvgi,])
}else{
exp<-as.matrix(data)
}
cell_features<-t(apply(exp, 2, function(x){
return(x/max(x))
}))
gene_features<-t(apply(exp, 1, function(x){
return(x/max(x))
}))
if (verbose) {
cat("Run LTMG \n")
}
LTMG_Object<- new(Class = 'LTMG', InputData = exp)
if(verbose){
suppressMessages(LTMG <- RunLTMG(LTMG_Object,Gene_use = 'all', verbose=verbose)@OrdinalMatrix)
}
if (verbose) {
cat("Calculate cell correlation matrix \n")
}
cellnet <- coop::tpcor(cell_features)
cellnet[cellnet<0]<-0
diag(cellnet)<-0
cl <- makeCluster(parallel.cores)
cellnet_p<-parLapply(cl,1:nrow(cellnet),function(i,cellnet){
x <- cellnet[i,]
len<-length(which(x>0))
x1<-rep(0,length(x))
for(j in 1:length(x)){
if(x[j]>0){
p<-length(which(x>x[j]))/len
if(p==0){
x1[j]<-0.00001
}else{
x1[j]<-p
}
}
}
x1[x1>0.05]<-0
return(x1)
},cellnet)
cellnet_p<-do.call("rbind",cellnet_p)
x<-cellnet_p
x[upper.tri(x)] <- 0
s<-cellnet_p
s[lower.tri(s)] <- 0
s<-t(s)
s[s!=0]<-1
x[x!=0]<-1
cellnet_p<-s+x
cellnet_p[cellnet_p==2]<-1
cellnet_p<-cellnet_p+t(cellnet_p)
cellnet<-cellnet*cellnet_p
rm(cellnet_p)
rm(x)
rm(s)
gc()
if (verbose) {
cat("Calculate gene correlation matrix \n")
}
genenet <- coop::tpcor(gene_features)
genenet[genenet<0]<-0
diag(genenet)<-0
genenet_p<-parLapply(cl,1:nrow(genenet),function(i,genenet){
x <- genenet[i,]
len<-length(which(x>0))
x1<-rep(0,length(x))
for(j in 1:length(x)){
if(x[j]>0){
p<-length(which(x>x[j]))/len
if(p==0){
x1[j]<-0.00001
}else{
x1[j]<-p
}
}
}
x1[x1>0.05]<-0
return(x1)
},genenet)
genenet_p<-do.call("rbind",genenet_p)
stopCluster(cl)
x<-genenet_p
x[upper.tri(x)] <- 0
s<-genenet_p
s[lower.tri(s)] <- 0
s<-t(s)
s[s!=0]<-1
x[x!=0]<-1
genenet_p<-s+x
genenet_p[genenet_p==2]<-1
genenet_p<-genenet_p+t(genenet_p)
genenet<-genenet*genenet_p
if (verbose) {
cat("Done \n")
}
results<-list(orig_dara=exp,cell_features=cell_features,gene_features=gene_features,
ltmg_matrix=LTMG,cell_adj=cellnet,gene_adj=genenet)
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.