history files

intro to history file organization and it procedure.

TODO: addin to remove files not needed anymore for a given active document (check load statements vs. files available and either list files or remove them, or move them.)

TODO: optional integration of shiny elements / modules

Basic information is already provided in the history. Here we just show how to create specific plots/tables.

not yet in implemented but should be

sampNames <- levels(colData(scEx)$sampleNames)
sampleCols=list()
projectionColors$sampleNames <- allowedColors[seq_along(sampNames)]
source("../../inst/app/contributions/sCA_subClusterAnalysis/reactives.R")

individual plots

histogram raw counts

dat <- data.frame(counts = Matrix::colSums(assays(scEx)[["counts"]]))
dat$sample <- colData(scEx)$sampleNames
scols <- projectionColors$sampleNames
retVal <- ggplot(data = dat, aes(counts, fill = sample)) +
  geom_histogram(bins = 50) +
  labs(title = "Histogram for raw counts", x = "count", y = "Frequency") +
  scale_fill_manual(values = scols, aesthetics = "fill")
retVal

sample info

samples = colData(scEx)$sampleNames
counts <- table(samples)
df <- as.data.frame(counts)
ggplot(data = df,aes(x=samples, y=Freq, fill=samples)) + geom_bar(stat = "identity")  + 
  scale_color_manual(values=scols) 

Violin Plot

source("../../inst/app/contributions/coE_coExpression//reactives.R")
  geneListStr <- c("PDCD1")
  projectionVar <- "dbCluster"
  minExpr <- 1
  coE_showPermutations <- FALSE
  # colPal = coE_geneGrp_vioFunc # TODO must be wrong
  # sampCol <- projectionColors$sampleNames
ccols <- allowedColors[1:length(levels(projections$dbCluster))]
  upI <- coE_updateInputXviolinPlot() # no need to check because this is done in projections
  if (is.null(projections) | is.null(scEx_log)) {
    if (DEBUG) cat(file = stderr(), "output$coE_geneGrp_vio_plot:NULL\n")
    return(NULL)
  }
  if (.schnappsEnv$DEBUGSAVE) {
    save(file = "~/SCHNAPPsDebug/coE_geneGrp_vio_plot.RData", list = c(ls()))
  }
  # load(file="~/SCHNAPPsDebug/coE_geneGrp_vio_plot.RData")

  featureData <- rowData(scEx_log)
  retVal <- coE_geneGrp_vioFunc(
    genesin = geneListStr,
    projections = projections,
    scEx = scEx_log,
    featureData = featureData,
    minExpr = minExpr,
    dbCluster = projectionVar,
    coE_showPermutations = coE_showPermutations,
    sampCol = scols,
    ccols = ccols
  )
retVal

panel plot

  .schnappsEnv <- new.env(parent=emptyenv())

source("~/Rstudio/Schnapps/inst/app/contributions/DE_DataExploration/reactives.R")
# panelplotFunc ----
# scEx_log singlecell Experiment object
# projections as used in schnapps
# genesin gene names to be plotted
# dimx4, dimy4 dimensions to be plotted on
# sameScale True/False
# nCol number of columns for final plot
# sampdes header for plot
# cellNs cell names to be used


retVal = panelPlotFunc(scEx_log, projections, genesin, "sampleNames", "UMI.count", sameScale = FALSE, nCol = 4, sampdesc = "all samples", cellNs = rownames(projections)) 

retVal

2D plot (module)

# g_id should be a valid rowname
# geneNames needed for umicounts
# geneNames2 needed for umicounts2
# clId clId <- levels(projections$dbCluster) # might not be used anymore
# grpN, grpNs
  # if (length(grpN) > 0) {
  #   if (length(grpNs[rownames(subsetData), grpN] == "TRUE") > 0 & sum(grpNs[rownames(subsetData), grpN] == "TRUE", na.rm = TRUE) > 0) {
  #     grpNSub <- grpNs[rownames(subsetData), ]
  #     selectedCells <- rownames(grpNSub[grpNSub[, grpN] == "TRUE", ])
  #   }
  # }
# legend.position not used
# logx, logy TRUE/FALSE
# dimY can be "histogram"
# dimX 
# dimCol can be "cellDensity"

source("~/Rstudio/Schnapps/inst/app/defaultValues.R")
source("~/Rstudio/Schnapps/inst/app/serverFunctions.R")
DEBUG = FALSE
.schnappsEnv$DEBUGSAVE =  FALSE
clId <- levels(projections$dbCluster)
colnames(projections)
p1 <- plot2Dprojection(scEx_log,
      projections = projections, g_id = rowData(scEx_log)[1,"symbol"], featureData = rowData(scEx_log), geneNames = NULL,
      geneNames2 = NULL, dimX = "UMI.count", dimY = "before.filter", clId, grpN = NULL, legend.position = NULL,
      grpNs = "", logx = FALSE, logy = FALSE, divXBy = "before.filter", divYBy = "None", dimCol = "cellDensity", colors = NULL
    )

p1

SOM

The som as implemented in SCHNAPPs will cluster the genes and output genes that cluster with a target gene. This makes sense when looking a given dbCluster and try to identify co-expressed genes. Here, we go through all the clusters and print out those groups of genes

suppressMessages(require(kohonen))
suppressMessages(require(Rsomoclu))
iData = as.matrix(assays(scEx_log)[[1]])
geneName = "ENSG00000073861"
nSom = 20
featureData = rowData(scEx_log)
for (dbCl in levels(projections$dbCluster)) {
cols2use <- which(projections$dbCluster == dbCl)
  res2 <- Rsomoclu.train(
    input_data = iData[, cols2use],
    nSomX = nSom, nSomY = nSom,
    nEpoch = 10,
    radius0 = 0,
    radiusN = 0,
    radiusCooling = "linear",
    mapType = "planar",
    gridType = "rectangular",
    scale0 = 1,
    scaleN = 0.01,
    scaleCooling = "linear"
  )
  colnames(res2$codebook) <- colnames(iData)[cols2use]
  rownames(res2$globalBmus) <- make.unique(as.character(rownames(iData)), sep = "_#_")
  simGenes <- rownames(res2$globalBmus)[which(res2$globalBmus[, 1] == res2$globalBmus[geneName, 1] &
                                                res2$globalBmus[, 2] == res2$globalBmus[geneName, 2])]
  print(paste("dbCluster:", dbCl))
  print(featureData[simGenes,"symbol"])
}

custom DGE

myDiffExpFunctions <- list(
  c("Chi-square test of an estimated binomial distribution", "sCA_dge_CellViewfunc"),
  c("t-test", "sCA_dge_ttest"),
  c("DESeq2", "sCA_dge_deseq2"),
  c("seurat:wilcox", "sCA_dge_s_wilcox"),
  c("seurat:bimod", "sCA_dge_s_bimod"),
  c("seurat:t-test", "sCA_dge_s_t"),
  c("seurat:LR", "sCA_dge_s_LR"),
  c("seurat:neg-binomial", "sCA_dge_s_negbinom"),
  c("seurat:Poisson", "sCA_dge_s_poisson")
)

dgeFunc = myDiffExpFunctions[[1]][2]
if (dgeFunc %in% c("sCA_dge_deseq2", "sCA_dge_s_negbinom", "sCA_dge_s_poisson")) {
  scEx_log = scEx
}

# add projections
# c1 = c("AAAGATGTCTACCTGC-1", "AAAGCAATCTGCGACG-1", "AACACGTTCGTACCGG-1", "AACCGCGAGCAGGCTA-1")
projections$lung = projections$sampleNames %in% c("d3_lung", "d4_lung", "d8_lung", "d16_lung")

clusterNr = 12
c1 = rownames(projections[projections$dbCluster == clusterNr & projections$lung,])
c2 = rownames(projections[projections$dbCluster == clusterNr & ! projections$lung,])

top.genes <- do.call(dgeFunc, args = list(
  scEx_log = scEx_log,
  cells.1 = c1, cells.2 = c2
))
featureData <- rowData(scEx)

top.genes$symbol <-
  featureData[rownames(top.genes), "symbol"]
if ("Description" %in% colnames(featureData)) {
  top.genes$Description <- featureData[rownames(top.genes), "Description"]
}
rownames(top.genes) <- make.unique(as.character(top.genes$symbol), sep = "_#_")

DT::datatable(top.genes)

specific example of a custom child report

requires specific sample names and projections (lung) data cannot be shared but this can be used as a template nonetheless

#load scEx
load(file = "~/Rstudio/Schnapps/history/hist_2020-Feb-19.13.03/scEx.360f308aaa23.RData")
#load scEx_log
load(file = "~/Rstudio/Schnapps/history/hist_2020-Feb-19.13.03/scEx_log.360f5fa7f8ba.RData")
#load projections with umap
load(file = "~/Rstudio/Schnapps/history/hist_2020-Feb-19.13.03/projections.360f2ba33a1b.RData")
scEx <- scEx$scEx
featureData <- rowData(scEx)
scEx_log <- scEx_log$scEx_log
projections <- projections$projections
projections$lung = FALSE
projections$lung[grep("lung",projections$sampleNames)] = TRUE
projections$lung = factor(projections$lung, levels = c(TRUE,FALSE))

scExAll = scEx
scEx_logAll = scEx_log
projectionsAll = projections
cellTypeMarkers = list(
  t.cells = c("CD3G", "CD3D", "CD3E", "CD2"),
  cd8 = c("CD8A", "GZMA"), # CD8+ T cells
  treg = c("CD4", "FOXP3"), # CD4+- Treg
  tbet = c("TBX21"), # t-bet
  b.cells = c("CD19", "CD79A"), # B cells
  nk.cells = c("KLRC1", "KLRC3"), # NK cells
  pc = c("SLAMF7", "IGKC"), # plasma cells
  macrophages = c("FCGR2A", "CSF1R"), # macrophages
  dc = c("FLT3"), # Dendritic cells
  plasmacytoid.dc = c("CLEC4C"), # plasmacytoid dendritic cells
  fibroblasts = c("COL1A2"), # fibroblasts
  myfibroblasts = c("MCAM", "MYLK"), # myfibroblasts
  ca.fibroblasts = c("FAP", "PDPN"), # cancer-associated fibroblasts
  malignant = c("EPCAM", "TP63"), # malignant cells
  endothelial = c("PECAM1", "VWF"), # endothelial cells
  melanocytes = c("PMEL", "MLANA"), # melanocytes
  cd103 = c("ITGAE"),
  cytotoxic = c("GZMB"), 
  exhaution = c("PDCD1", "CTLA4", "LAG3", "HAVCR2","CD244", "CD160", "TIGIT" )
)
# which(featureData$symbol %in% "TIGIT")
cellTypeCount = data.frame(row.names = rownames(projections))
for (ct in names(cellTypeMarkers)) {
  print(ct)
  print(cellTypeMarkers[[ct]])
  # print(all(cellTypeMarkers[[ct]] %in% featureData$symbol))
  geneIdx = which(featureData$symbol %in% cellTypeMarkers[[ct]])
  if (length(geneIdx) > 1) {
    print(paste("sum: ", sum(colSums(assays(scEx)[[1]][geneIdx,]) > 0)))
    cellTypeCount[ct] = colSums(assays(scEx)[[1]][geneIdx,]) > 0
  } else {
    print(paste("sum: ", sum(assays(scEx)[[1]][geneIdx,] > 0)))
    cellTypeCount[ct] = assays(scEx)[[1]][geneIdx,] > 0
  }
}
# cellTypeCount[cellTypeCount$cd8 & cellTypeCount$b.cells,]
ctcTab = table(cellTypeCount)
ctcTab = as.data.frame(ctcTab,stringsAsFactors =FALSE)[as.data.frame(ctcTab,stringsAsFactors =FALSE)$Freq>0,]
ctcTab = cbind(Freq = ctcTab$Freq, SumTrue = rowSums(ctcTab[,-ncol(ctcTab)] == "TRUE"), ctcTab[,-ncol(ctcTab)])
DT::datatable(ctcTab, )
  inputData <- scEx_logAll
anovaName = "CD4+cleaned"



C3BI-pasteur-fr/UTechSCB-SCHNAPPs documentation built on April 23, 2024, 11:54 a.m.