devtools::install_github("nghiavtr/BPSC")
BiocManager::install("DEsingle")
BiocManager::install("DESeq2")
BiocManager::install("edgeR")
BiocManager::install("MAST")
BiocManager::install("monocle")
BiocManager::install("scDD")
BiocManager::install("limma")
BiocManager::install("Seurat")
devtools::install_github("statOmics/zingeR")
BiocManager::install("SingleCellExperiment")
BiocManager::install("scater")
install.packages("aggregation")
devtools::install_github("Zhangxf-ccnu/scDEA")

library(scDEA)
# data("Grun.counts.hvg")
# data("Grun.group.information")
# Pvals <- scDEA_individual_methods(raw.count = Grun.counts.hvg, cell.label = Grun.group.information)
# combination.Pvals <- lancaster.combination(Pvals, weight = TRUE, trimmed = 0.2)
# adjusted.Pvals <- scDEA.p.adjust(combination.Pvals, adjusted.method = "bonferroni")
knitr::opts_chunk$set(echo = TRUE)
library(purrr)
library(RVenn)
library(ggplot2)
library("scDEA")
library(SingleCellExperiment)
library(dplyr)
library(DESeq2)
library(heatmaply)
library(BiocParallel)
library(topGO)
library(ALL)




printTopGo <- function(minPvals, Pvals, title) {
  paste(rownames(minPvals),collapse = ", ")
  de_symbols = rownames(minPvals)
  bg_symbols = rownames(Pvals)
  gocat = list("GO Biological Process" = "BP", "GO Molecular Function" = "MF", "GO Cellular Component" = "CC")
  topgo_bp <- suppressMessages(pcaExplorer::topGOtable(de_symbols, bg_symbols,
                                                       ontology = "BP",
                                                       mapping = "org.Hs.eg.db",
                                                       geneID = "symbol",
                                                       addGeneToTerms = TRUE))
  topgo_mf <- suppressMessages(pcaExplorer::topGOtable(de_symbols, bg_symbols,
                                                       ontology = "MF",
                                                       mapping = "org.Hs.eg.db",
                                                       geneID = "symbol",
                                                       addGeneToTerms = TRUE))
  return(list(topgo_bp,topgo_mf))
  # cat(knitr::knit_print((DT::datatable(topgo_bp, caption  = paste(title, "BP")))))
  # cat(knitr::knit_print((DT::datatable(topgo_mf, caption  = paste(title, "MF")))))
}
cp = load("/Volumes/LaCie2022/katja/all3vs6.scDEA.RData")
table(projections$ighaHigh, projections$sampleNames)
input
fps = dir(path = "/Volumes/LaCie2022/katja/",full.names = T, pattern = ".*Mon.*.RData")
groups = list()
for(fp in fps){
  cp = load(fp)
  name = basename(fp)
  if("minPvals" %in% cp){
    groups[[name]] = minPvals
  }
}
ListKatja = c("IGHA1", "IGHA2" , "IGKC", "EEF1A1", 
              "JCHAIN", "SSR4", "ATP5F1", "COX7C" , "CYBA", "EEF1D" , "FKBP2" , "NACA", "NOP53" , "RACK1" , "TMA7"
)
rownames(groups[[1]])
vList = lapply(groups, rownames)
vList[["katja"]] = ListKatja
toy = Venn(vList)
setmap(toy, element_clustering = T, set_clustering = T        )
cp = load("/Volumes/LaCie2022/katja/test.Mon.Mar.21.08.16.31.2022.RData")
dat = "s3.s6.igha."
cp
colnames(projections)

table(projections$ighaHigh, projections$sampleNames)
tb = table(projections$dbCluster, projections$ighaHigh)
tb
sweep(tb,2,colSums(tb),`/`)

table(projections$dbCluster, projections$sampleNames, projections$ighaHigh)
tb = table(projections$dbCluster, projections$sampleNames)
tb = 
  tb
sweep(tb,2,colSums(tb),`/`)


cn="dbCluster"
whichCells = projections$sampleNames %in% c("s_3", "s_6")
scExSml = scEx[,whichCells]
counts = as.matrix(assays(scExSml)[["counts"]])

for (cl in levels(projections$dbCluster)){
  datSet =paste0(dat,"clvsRest.", cl)
  # if(file.exists( paste0(datSet,".scDEA.RData")))next()
  projections$testCluster = "Rest"
  projections$testCluster[projections$dbCluster == cl] = cl


  cDat = projections
  colData = as.factor(as.character(cDat[whichCells,"testCluster"]))

  Pvals <- tryCatch(
    scDEA_individual_methods(raw.count = counts, cell.label = colData,),
    error = function(e) {
      return(NULL)
    }
  )
  if (is.null(Pvals)) next()

  combination.Pvals <- lancaster.combination(Pvals, weight = TRUE, trimmed = 0.2)

  adjusted.Pvals <- scDEA.p.adjust(combination.Pvals, adjusted.method = "bonferroni")

  # data.table::data.table(adjusted.Pvals)

  minPvals = Pvals[which(adjusted.Pvals<0.05),]
  minPvals[minPvals<1e-10] = 1e-10
  # sum((Pvals[which(adjusted.Pvals<0.1),]==0))
  # print(pheatmap::pheatmap(-log10(minPvals)))
  upReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)>
                           apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]
  downReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)<
                             apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]

  save(file = paste0(datSet,".scDEA.RData"), list = c("minPvals", "adjusted.Pvals", "combination.Pvals", "Pvals", "counts", "colData", "upReg", "downReg"))


}
cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha.clvsRest.0.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
paste(rownames(upReg),collapse = ", ")
paste(rownames(downReg),collapse = ", ")
li = printTopGo(upReg, Pvals, paste("upReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])
li = printTopGo(downReg, Pvals, paste("downReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

genes = rownames(upReg)
gene1 = strsplit(li[[1]]$genes,",")[[1]][1]
c1 = projections$dbCluster==0
clusterGenes = apply(counts[,c1],1,mean)[genes]
otherGenes   = apply(counts[,!c1],1,mean)[genes]
data.frame(clusterGenes, otherGenes)

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha.clvsRest.1.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
paste(rownames(upReg),collapse = ", ")
paste(rownames(downReg),collapse = ", ")
li = printTopGo(upReg, Pvals, paste("upReg-",dat, 1))
DT::datatable(li[[1]])
DT::datatable(li[[2]])
li = printTopGo(downReg, Pvals, paste("downReg-",dat, 1))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha.clvsRest.2.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 2))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha.clvsRest.3.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 3))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

test.Mon.Mar.21.17.33.16.2022.RData => s3.s6.igha2.clvsRest.X.RData

s_1 s_2 s_3 s_4 s_5 s_6 FALSE - TRUE 0 0 58 0 0 43 TRUE - FALSE 0 0 0 0 0 0

s_1 s_2 s_3 s_4 s_5 s_6 0 0 0 12 0 0 15 1 0 0 14 0 0 3 2 0 0 9 0 0 11 3 0 0 10 0 0 8 4 0 0 4 0 0 4 5 0 0 8 0 0 6 6 0 0 8 0 0 9 7 0 0 2 0 0 0

not enough cells

test.Mon.Mar.21.14.23.50.2022.RData => s3.s6.igha1.clvsRest.X.RData

s_1 s_2 s_3 s_4 s_5 s_6 FALSE - TRUE 0 0 0 0 0 0 TRUE - FALSE 0 0 110 0 0 36

s_1 s_2 s_3 s_4 s_5 s_6 0 0 0 25 0 0 11 1 0 0 21 0 0 9 2 0 0 24 0 0 9 3 0 0 17 0 0 3 4 0 0 6 0 0 3 5 0 0 6 0 0 1 6 0 0 8 0 0 1 7 0 0 11 0 0 3 8 0 0 2 0 0 0

sweep(tb,2,colSums(tb),/)

s_1 s_2 s_3 s_4 s_5 s_6 0 0.20833333 0.27500000 1 0.17500000 0.22500000 2 0.20000000 0.22500000 3 0.14166667 0.07500000 4 0.05000000 0.07500000 5 0.05000000 0.02500000 6 0.06666667 0.02500000 7 0.09166667 0.07500000 8 0.01666667 0.00000000

cp = load("/Volumes/LaCie2022/katja/test.Mon.Mar.21.14.23.50.2022.RData")


whichCells = projections$sampleNames %in% c("s_3", "s_6")
projectionsSml= projections[whichCells,]
table(projectionsSml$x2.x, projectionsSml$sampleNames)
tb = table(projectionsSml$dbCluster, projectionsSml$sampleNames)
tb
sweep(tb,2,colSums(tb),`/`)

dat = "s3.s6.igha1."

cn="dbCluster"
whichCells = projections$sampleNames %in% c("s_3", "s_6")
scExSml = scEx[,whichCells]
counts = as.matrix(assays(scExSml)[["counts"]])

table(projectionsSml$sampleNames )
colData = as.factor(as.character(projectionsSml$sampleNames))

Pvals <- tryCatch(
  scDEA_individual_methods(raw.count = counts, cell.label = colData),
  error = function(e) {
    return(NULL)
  }
)
if (is.null(Pvals)) cat(file = stderr(),"Something is off\n")

combination.Pvals <- lancaster.combination(Pvals, weight = TRUE, trimmed = 0.2)

adjusted.Pvals <- scDEA.p.adjust(combination.Pvals, adjusted.method = "bonferroni")

# data.table::data.table(adjusted.Pvals)

minPvals = Pvals[which(adjusted.Pvals<0.1),]
minPvals[minPvals<1e-10] = 1e-10
upReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)>
                         apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]
downReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)<
                           apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]


paste(rownames(minPvals),collapse = ", ")
save(file = paste0(dat,".all.scDEA.RData"), list = c("minPvals", "adjusted.Pvals", "combination.Pvals", "Pvals", "counts", "colData", "upReg", "downReg"))



for (cl in levels(projections$dbCluster)){
  datSet =paste0(dat,"clvsRest.", cl)
  # if(file.exists( paste0(datSet,".scDEA.RData")))next()
  projections$testCluster = "Rest"
  projections$testCluster[projections$dbCluster == cl] = cl


  cDat = projections
  colData = as.factor(as.character(cDat[whichCells,"testCluster"]))

  Pvals <- tryCatch(
    scDEA_individual_methods(raw.count = counts, cell.label = colData),
    error = function(e) {
      return(NULL)
    }
  )
  if (is.null(Pvals)) next()

  combination.Pvals <- lancaster.combination(Pvals, weight = TRUE, trimmed = 0.2)

  adjusted.Pvals <- scDEA.p.adjust(combination.Pvals, adjusted.method = "bonferroni")

  # data.table::data.table(adjusted.Pvals)

  minPvals = Pvals[which(adjusted.Pvals<0.1),]
  minPvals[minPvals<1e-10] = 1e-10
  # sum((Pvals[which(adjusted.Pvals<0.1),]==0))
  # print(pheatmap::pheatmap(-log10(minPvals)))
  upReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)>
                           apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]
  downReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)<
                             apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]

  save(file = paste0(datSet,".scDEA.RData"), list = c("minPvals", "adjusted.Pvals", "combination.Pvals", "Pvals", "counts", "colData", "upReg", "downReg"))


}

propper clusters from the s3/6-igha1/2 only cells

cp = load("/Volumes/LaCie2022/katja/test.Mon.Mar.21.08.16.31.2022.withDBcluster.RData")
cp
dbCluster

cdat =colData(scEx)
cdat = cdat[colnames(counts),]


colData = cdat$dbCluster

Pvals <- tryCatch(
  scDEA_individual_methods(raw.count = counts, cell.label = colData),
  error = function(e) {
    return(NULL)
  }
)
combination.Pvals <- lancaster.combination(Pvals, weight = TRUE, trimmed = 0.2)

adjusted.Pvals <- scDEA.p.adjust(combination.Pvals, adjusted.method = "bonferroni")

# data.table::data.table(adjusted.Pvals)

minPvals = Pvals[which(adjusted.Pvals<0.05),]
minPvals[minPvals<1e-10] = 1e-10
# sum((Pvals[which(adjusted.Pvals<0.1),]==0))
# print(pheatmap::pheatmap(-log10(minPvals)))
upReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)>
                         apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]
downReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)<
                           apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]

save(file = paste0("withDBcluster",".scDEA.RData"), list = c("minPvals", "adjusted.Pvals", "combination.Pvals", "Pvals", "counts", "colData", "upReg", "downReg"))



paste(rownames(minPvals),collapse = ", ")
paste(rownames(upReg),collapse = ", ")
paste(rownames(downReg),collapse = ", ")
li = printTopGo(upReg, Pvals, paste("upReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])
li = printTopGo(downReg, Pvals, paste("downReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

genes = rownames(upReg)
gene1 = strsplit(li[[1]]$genes,",")[[1]][1]
c1 = projections$dbCluster==0
clusterGenes = apply(counts[,c1],1,mean)[genes]
otherGenes   = apply(counts[,!c1],1,mean)[genes]
data.frame(clusterGenes, otherGenes)

128 cells smallest possible data set

cp = load("/Volumes/LaCie2022/katja/katja128cells.RData")
cp
dbCluster

cdat =colData(scEx)
counts = assays(scEx)[["counts"]]


colData = cdat$dbCluster

projections = colData(scEx)
table(projections$igha2, projections$sampleNames)
tb = table(projections$dbCluster, projections$igha2)
tb
sweep(tb,2,colSums(tb),`/`)

table(projections$dbCluster, projections$sampleNames, projections$igha2)
tb = table(projections$dbCluster, projections$sampleNames)
tb = 
tb
sweep(tb,2,colSums(tb),`/`)




Pvals <- tryCatch(
  scDEA_individual_methods(raw.count = counts, cell.label = colData),
  error = function(e) {
    return(NULL)
  }
)
combination.Pvals <- lancaster.combination(Pvals, weight = TRUE, trimmed = 0.2)

adjusted.Pvals <- scDEA.p.adjust(combination.Pvals, adjusted.method = "bonferroni")

# data.table::data.table(adjusted.Pvals)

minPvals = Pvals[which(adjusted.Pvals<0.05),]
minPvals[minPvals<1e-10] = 1e-10
# sum((Pvals[which(adjusted.Pvals<0.1),]==0))
# print(pheatmap::pheatmap(-log10(minPvals)))
upReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)>
                         apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]
downReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)<
                           apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]

save(file = paste0("katja128cell",".scDEA.RData"), list = c("minPvals", "adjusted.Pvals", "combination.Pvals", "Pvals", "counts", "colData", "upReg", "downReg"))



paste(rownames(minPvals),collapse = ", ")
paste(rownames(upReg),collapse = ", ")
paste(rownames(downReg),collapse = ", ")
li = printTopGo(upReg, Pvals, paste("upReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])
li = printTopGo(downReg, Pvals, paste("downReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

genes = rownames(upReg)
gene1 = strsplit(li[[1]]$genes,",")[[1]][1]
c1 = colData==0
clusterGenes = apply(counts[,c1],1,mean)[genes]
otherGenes   = apply(counts[,!c1],1,mean)[genes]
data.frame(clusterGenes, otherGenes)

128 cells smallest possible data set katja128cells-4clusters.RData

cp = load("/Volumes/LaCie2022/katja/katja128cells-4clusters.RData")
cp
dbCluster

cdat =colData(scEx)
counts = assays(scEx)[["counts"]]


colData = cdat$dbCluster

projections = colData(scEx)
table(projections$igha2, projections$sampleNames)
tb = table(projections$dbCluster, projections$igha2)
tb
sweep(tb,2,colSums(tb),`/`)

table(projections$dbCluster, projections$sampleNames, projections$igha2)
tb = table(projections$dbCluster, projections$sampleNames)
tb
sweep(tb,2,colSums(tb),`/`)

projections$colData = "other"
projections$colData[cdat$dbCluster == 2] = "2"
colData = factor(projections$colData)
Pvals <- tryCatch(
  scDEA_individual_methods(raw.count = counts, cell.label = colData),
  error = function(e) {
    return(NULL)
  }
)
combination.Pvals <- lancaster.combination(Pvals, weight = TRUE, trimmed = 0.2)

adjusted.Pvals <- scDEA.p.adjust(combination.Pvals, adjusted.method = "bonferroni")

# data.table::data.table(adjusted.Pvals)

minPvals = Pvals[which(adjusted.Pvals<0.05),]
minPvals[minPvals<1e-10] = 1e-10
# sum((Pvals[which(adjusted.Pvals<0.1),]==0))
# print(pheatmap::pheatmap(-log10(minPvals)))
upReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)>
                         apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]
downReg = minPvals[which(apply(counts[rownames(minPvals), colData == levels(colData)[1]],1,mean)<
                           apply(counts[rownames(minPvals), colData == levels(colData)[2]],1,mean)),]

save(file = paste0("katja128cell-4clusters",".scDEA.RData"), list = c("minPvals", "adjusted.Pvals", "combination.Pvals", "Pvals", "counts", "colData", "upReg", "downReg"))



paste(rownames(minPvals),collapse = ", ")
paste(rownames(upReg),collapse = ", ")
paste(rownames(downReg),collapse = ", ")
li = printTopGo(upReg, Pvals, paste("upReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])
li = printTopGo(downReg, Pvals, paste("downReg-",dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

genes = rownames(upReg)
gene1 = strsplit(li[[1]]$genes,",")[[1]][1]
c1 = colData=="2"
clusterGenes = apply(counts[,c1],1,mean)[genes]
otherGenes   = apply(counts[,!c1],1,mean)[genes]
data.frame(clusterGenes, otherGenes)
cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.0.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 0))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.1.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 1))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.2.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 2))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.3.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 3))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.4.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 4))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.5.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 5))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.6.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 6))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

cp = load("~/Rstudio/UTechSCB-SCHNAPPs/inst/develo/s3.s6.igha1.clvsRest.7.scDEA.RData")

paste(rownames(minPvals),collapse = ", ")
li = printTopGo(minPvals, Pvals, paste(dat, 7))
DT::datatable(li[[1]])
DT::datatable(li[[2]])

"/Volumes/LaCie2022/katja/test.Mon.Mar.21.11.49.25.2022.RData"

contains lables for igha1/2 and a combination of only s3/s6

table(projectionsSml$igha, projectionsSml$sampleNames)

s_1 s_2 s_3 s_4 s_5 s_6 FALSE 0 0 180 0 0 171 TRUE 0 0 146 0 0 68

table(projectionsSml$igha1, projectionsSml$sampleNames)

s_1 s_2 s_3 s_4 s_5 s_6 FALSE 0 0 234 0 0 205 TRUE 0 0 92 0 0 34

table(projectionsSml$igha2, projectionsSml$sampleNames)

s_1 s_2 s_3 s_4 s_5 s_6 FALSE 0 0 271 0 0 182 TRUE 0 0 55 0 0 57



C3BI-pasteur-fr/UTechSCB-SCHNAPPs documentation built on April 28, 2024, 10:51 a.m.