library(sp.scRNAseq) library(tidyverse) #ideally you would only run this if the operating system is windows #there is some info here: https://stackoverflow.com/questions/4747715/how-to-check-the-os-within-r #but I can't check it without a windows computer #You should be able to write something like: if(Sys.info()['sysname'] == "Windows") {extrafont::loadfonts(device="win")} #extrafont::loadfonts(device="win")
Load data
#load("counts_raw_and_log_180112.rda") #load("counts_ercc.rda") #this is better because it will work for anyone that has your package installed #and therefore is more reproduciable. load(system.file('rawData/counts_raw_and_log_180112.rda', package = "sp.scRNAseqAnalysis")) load(system.file('rawData/counts_ercc.rda', package = "sp.scRNAseqAnalysis")) #remove .htseq suffix colnames(counts) <- str_replace(colnames(counts), "(.*)\\.htseq$", "\\1") colnames(counts.ercc) <- str_replace(colnames(counts.ercc), "(.*)\\.htseq$", "\\1") #Select Singlets s <- grepl("SRR", colnames(counts)) | grepl("Singlet", colnames(counts)) | grepl("NJA00102", colnames(counts)) | grepl("NJA00103", colnames(counts)) | grepl("NJA00104", colnames(counts)) | grepl("NJA00109", colnames(counts)) | grepl("NJA00204", colnames(counts)) | grepl("NJA00205", colnames(counts)) | grepl("NJA00206", colnames(counts))
spCounts
#Create Objects with multiplets and Singlets #cObjSng <- spCounts(counts[, s], counts.ercc[, s]) #cObjMul <- spCounts(counts[, !s], counts.ercc[, !s]) #Plot Fraction of ERCC in Multiplets and Singlets (Works poorly due to no ERCC in Regev data) #plotCountsERCC(cObjSng, cObjMul) #I fixed this now in devel version 0.0.1.5. Samples that have no ERCC reads should have them set to NA instead of 0. Setting them to NA will give a warning when building the spCounts objects but that is ok. counts.ercc[, grepl("SRR", colnames(counts.ercc))] <- NA cObjSng <- spCounts(counts[, s], counts.ercc[, s]) cObjMul <- spCounts(counts[, !s], counts.ercc[, !s]) plotCountsERCC(cObjSng, cObjMul) # Plot Number of Cells expressing both of selected markers plotCountsMarkers(cObjSng, cObjMul, markers = c("Alpi", "Reg4"))
spUnsupervised
#Create tSNE and plot with Mlcust clusters as well as gene expression uObj <- spUnsupervised(cObjSng) plotUnsupervisedClass(uObj, cObjSng) #even though the seed is set by default I would: plotUnsupervisedClass(uObj, cObjSng, seed = 438923) plotUnsupervisedMarkers(uObj, cObjSng, markers = "Lgr5") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Olfm4") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Alpi") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Muc2") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Lyz1") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Reg4") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Ptprc") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Dclk1") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Mboat1") plotUnsupervisedMarkers(uObj, cObjSng, markers = "Chga") #would be good to write what cell type these markers correspond to
New classification
#Manually Create New Clusters by deducing cell types based on gene expression in clusters classification <- getData(uObj, "classification") names(classification) <- getData(uObj, "tsne") %>% rownames() SI.Stem <- names(classification)[classification %in% c("A1", "J1", "K1", "N1", "T1")] SI.TA.Cells <- names(classification)[classification %in% c("R1", "C1", "B1")] SI.Enterocyte.Progenitor <- names(classification)[classification %in% c("E1", "Q1")] SI.Early.Enterocyte <- names(classification)[classification == "G1"] SI.Late.Enterocyte <- names(classification)[classification == "M1"] SI.Paneth <- names(classification)[classification %in% c("P1", "O1")] SI.Goblet <- names(classification)[classification %in% c("F1", "L1")] Blood.Cells <- names(classification)[classification == "D1"] SI.Tuft.Cells <- names(classification)[classification == "H1"] SI.Enteroendocrine <- names(classification)[classification == "S1"] Colon <- names(classification)[classification == "I1"] #tSNE for colon cells cs <- colnames(counts) %in% Colon #Colon object colcObjSng <- spCounts(counts[, cs], counts.ercc[, cs]) #tSNE for Colon cell and expression of markers C.uObj <- spUnsupervised(colcObjSng) plotUnsupervisedClass(C.uObj, colcObjSng) plotUnsupervisedMarkers(C.uObj, colcObjSng, markers = c("Lgr5")) plotUnsupervisedMarkers(C.uObj, colcObjSng, markers = c("Muc2")) plotUnsupervisedMarkers(C.uObj, colcObjSng, markers = c("Car1")) #Select and rename clusters according to cell types in colon C.classification <- getData(C.uObj, "classification") names(C.classification) <- getData(C.uObj, "tsne") %>% rownames() C.Stem <- names(C.classification)[C.classification == "B1"] C.Goblet <- names(C.classification)[C.classification == "C1"] C.Colonocytes <- names(C.classification)[C.classification == "A1"] #Organize new classifications and insert them into uObj which contains all cells celltypes <- data.frame( Samples = c( SI.Early.Enterocyte, SI.Enterocyte.Progenitor, SI.Enteroendocrine, SI.Goblet, SI.Late.Enterocyte, SI.Paneth, SI.Stem, SI.TA.Cells, C.Colonocytes, C.Goblet, C.Stem, SI.Tuft.Cells, Blood.Cells ), class = c( rep("SI.Early.Enterocyte", length(SI.Early.Enterocyte)), rep("SI.Enterocyte.Progenitor", length(SI.Enterocyte.Progenitor)), rep("SI.Enteroendocrine", length(SI.Enteroendocrine)), rep("SI.Goblet", length(SI.Goblet)), rep("SI.Late.Enterocyte", length(SI.Late.Enterocyte)), rep("SI.Paneth", length(SI.Paneth)), rep("SI.Stem", length(SI.Stem)), rep("SI.TA.Cells", length(SI.TA.Cells)), rep("C.Colonocytes", length(C.Colonocytes)), rep("C.Goblet", length(C.Goblet)), rep("C.Stem", length(C.Stem)), rep("SI.Tuft.Cells", length(SI.Tuft.Cells)), rep("Blood.Cells", length(Blood.Cells)) ) ) newclass <- celltypes[match(rownames(getData(uObj, "tsne")), celltypes$Samples),] newclass <- as.vector(newclass$class) devtools::use_data(celltypes, overwrite = TRUE) #!!!It would be much better to save the celltypes variable instead of the newclass variable since the sample names are not included in newclass!!! #Can you also save the uObj? We may remove it in the future but this might save us some time for now. #devtools::use_data(newclass, overwrite = TRUE) uObj@classification <- newclass #Plot tsne with new clusters plotUnsupervisedClass(uObj, cObjSng) #Calculate new means for groups tsneMeans(uObj) <- tsneGroupMeans(getData(uObj, "tsne"), getData(uObj, "classification")) groupMeans(uObj) <- averageGroupExpression(cObjSng, getData(uObj, "classification"), weighted=FALSE) #Set Uncertainty to 0 and attempt to identify cell composition of multiplets uncertainty(uObj) <- rep(0, length(getData(uObj, "uncertainty"))) uObj_countsMgfpRegev <- uObj devtools::use_data(uObj_countsMgfpRegev, overwrite = TRUE)
spSwarm I set eval = FALSE in the following "chunk" so that the spSwarm method is not run when knitting. I.e. the code is only there to show what was previously done.
sObj <- spSwarm(cObjMul, uObj, distFun = "dtsnCellNum", e = 0.0025, cellNumbers = estimateCells(cObjSng, cObjMul), swarmsize = 500, maxiter = 1000) plotSwarmGraph(sObj, uObj) #Check false positives by seeing how many of multiplets contain cells from both SI and Colon fp <- getMultipletsForEdge(sObj, edges = data.frame(to = "C.Stem", from = "SI.Stem"), edge.cutoff = 0) fp sObj_countsMgfpRegev <- sObj devtools::use_data(sObj_countsMgfpRegev, overwrite = TRUE) devtools::use_data(fp, overwrite = TRUE) #sObj <- spSwarm(cObjMul, uObj, distFun = "distToSliceNorm") #plotSwarm(sObj, uObj, cObjSng, cObjMul, type = "tsne") #plotSwarm(sObj, uObj, cObjSng, cObjMul, type = "heat") #tsne <- getData(uObj, "tsne") #plot.nice(tsne, gene.vals=counts.log, genes=c("Lyz1", "Alpi", "Lgr5", "Reg4", "Dll4"), cex=2)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.