###############################################################################
## Add to Seurat metadata ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="addDf2seuratMetaData",
def=function(obj, dfAdd) {
print(paste0("Dims before addition: ", dim(obj@meta.data)))
for (i in 1:ncol(dfAdd)){
addVec <- as.vector(dfAdd[,i])
names(addVec) <- row.names(dfAdd)
colName <- as.vector(names(dfAdd)[i])
obj <- Seurat::AddMetaData(
object = obj,
metadata = addVec,
colName
)
}
print(paste0("Dims after addition: ", dim(obj@meta.data)))
print(paste0("Meta data column names: ", paste(names(obj@meta.data), collapse = ", ")))
return(obj)
}
)
## Done adding to Seurat metadata ##
###############################################################################
###############################################################################
## Write table to Excel File ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
#'
#'
createXLSXoutput <- function(
dfTable = "dfTable",
outPutFN = "path/to/output/FN.xlsx",
tableName = "Table1"
){
library(openxlsx)
wb <- createWorkbook()
addWorksheet(wb, tableName)
freezePane(wb, tableName , firstActiveRow = 2)
hs1 <- createStyle(
fontColour = "#ffffff",
fgFill = "#000000",
halign = "CENTER",
textDecoration = "Bold"
)
writeData(wb, 1, dfTable, startRow = 1, startCol = 1, headerStyle = hs1)
saveWorkbook(
wb,
outPutFN ,
overwrite = TRUE
)
print(paste0("Table saved as ", outPutFN, "."))
}
## ##
###############################################################################
###############################################################################
## DoCRplots ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @import Matrix
#' @export
#'
setGeneric(
name="doCRplots",
def=function(
obj,
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4,
dotsize = 0.5
) {
library(Matrix)
figureCreated <- FALSE
sampleNames <- names(obj@sampleDetailList)
rawSampleList <- list()
filtSampleList <- list()
for (i in 1:length(sampleNames)){
if (obj@sampleDetailList[[sampleNames[i]]]$type == "TenX"){
pos <- grep("gene.column", names(obj@sampleDetailList[[sampleNames[i]]]))
if (length(pos) == 0){
gene.column = 2
} else {
gene.column <- obj@sampleDetailList[[i]]$gene.column
}
baseFN <- obj@sampleDetailList[[sampleNames[i]]]$path
rawFN <- gsub("filtered_feature_bc_matrix", "raw_feature_bc_matrix", baseFN)
if (file.exists(rawFN)){
rawSampleList[[sampleNames[i]]] <- Seurat::Read10X(data.dir = rawFN, gene.column = gene.column)
filtSampleList[[sampleNames[i]]] <- Seurat::Read10X(data.dir = baseFN, gene.column = gene.column)
cellID <- colnames(rawSampleList[[sampleNames[i]]])
CellRanger <- rep("Excl", length(cellID))
CellRanger[cellID %in% colnames(filtSampleList[[sampleNames[i]]])] <- "Incl"
UMI_count <- colSums(rawSampleList[[sampleNames[i]]])
sampleID <- rep(sampleNames[i], length(cellID))
###################################################################
## Calculate nFeatures ##
UMI_filt <- UMI_count[UMI_count > 0]
rawM <- rawSampleList[[sampleNames[i]]]
rawM <- rawM[,names(UMI_filt)]
#rawM[rawM > 0] <- 1
## Done calculate nFeatures ##
###################################################################
dfTemp <- data.frame(sampleID, cellID, CellRanger,UMI_count)
dfTemp <- dfTemp[dfTemp$cellID %in% names(UMI_filt), ]
###################################################################
## Count features ##
# increment <- 10000
# iter <- floor(nrow(dfTemp)/increment)
# resVec <- as.vector(NULL, mode="character")
#
# for (k in 0:(iter)){
# uL <- ((k+1)*increment )
# if (uL > ncol(rawM)){
# uL = ncol(rawM)
# }
#
# h <- rawM[,(k*increment + 1):uL]
# h2 <- apply(h, 2, function(x) length(x[x>0]))
# names(h2) <- colnames(h)
# resVec <- c(
# resVec,
# h2
# )
# print(k)
# }
## Done count features ##
###################################################################
dfTemp <- dfTemp[order(dfTemp$UMI_count, decreasing = T),]
dfTemp[["sampleOrder"]] <- 1:nrow(dfTemp)
dfTemp[dfTemp$sampleOrder < 10, "sampleOrder"] <- 10
dfTemp$sampleOrder <- log10(dfTemp$sampleOrder)
dfTemp[["lg10_UMI_count"]] <- dfTemp$UMI_count
#dfTemp$lg10_UMI_count[dfTemp$lg10_UMI_count < 10] <- 1
dfTemp$lg10_UMI_count <- log10(dfTemp$lg10_UMI_count)
###################################################################
## Plot Selection ##
selVecF <- as.vector(dfTemp[dfTemp$CellRanger == "Incl", "cellID"])
selVecR <- as.vector(dfTemp[dfTemp$CellRanger != "Incl", "cellID"])
if (length(selVecR) > length(selVecF)){
selVecR <- selVecR[sample(1:length(selVecR), size = length(selVecF), replace = F)]
}
selVec <- c(
selVecF,
selVecR
)
dfTemp[["plot_selection"]] <- "Excl_CR"
dfTemp[dfTemp$cellID %in% selVec, "plot_selection"] <- "Incl_CR"
## Done Plot Selection ##
###################################################################
if (!figureCreated){
figureCreated = TRUE
dfRes <- dfTemp
} else {
dfRes <- rbind(
dfRes,
dfTemp
)
}
}
}
}
## Done load raw data ##
###############################################################################
###############################################################################
## Make plots ##
plotListQC1 <- list()
chnkVec <- as.vector(NULL, mode = "character")
dfPlot <- dfRes[dfRes$plot_selection == "Incl_CR", ]
sampleNames <- as.vector(unique(dfPlot$sampleID))
for (i in 1:length(sampleNames)){
tag <- sampleNames[i]
greenLine <- min(dfPlot[dfPlot$CellRanger == "Incl" & dfPlot$sampleID == sampleNames[i], "lg10_UMI_count"])
blackLine <- max(dfPlot[dfPlot$CellRanger != "Incl" & dfPlot$sampleID == sampleNames[i], "lg10_UMI_count"])
plotListQC1[[tag]] <- ggplot2::ggplot(dfPlot[dfPlot$sampleID == sampleNames[i],], ggplot2::aes(sampleOrder, lg10_UMI_count, color=CellRanger)
) + ggplot2::geom_hline(yintercept = blackLine, color = "black", size=0.5
) + ggplot2::geom_hline(yintercept = greenLine, color = "#009900", size=0.5
) + ggplot2::geom_point(
shape = 16,
size = as.numeric(dotsize)
) + ggplot2::xlab("log10(N Droplets)") + ggplot2::ylab("lg10(UMI Count Per Cell)"
) + ggplot2::theme_bw(
) + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
) + ggplot2::ggtitle(paste0("QC Sample ", tag)
) + ggplot2::scale_color_manual(values= ggplot2::alpha(c("#000000","#009900"), 0.5)
) + ggplot2::coord_fixed(ratio=1
)
FNbase <- paste0("cellranger.result.", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
plotListQC1[[tag]]
dev.off()
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" CellRanger quality assessment. Green cells are considered for further analysis. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r CellRangerResult_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotListQC1[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
tag <- "All"
plotListQC1[[tag]] <- ggplot2::ggplot(dfPlot, ggplot2::aes(sampleOrder, lg10_UMI_count, color=CellRanger)
)+ ggplot2::geom_point(
shape = 16,
size = as.numeric(dotsize)
) + ggplot2::xlab("log10(N Droplets)") + ggplot2::ylab("lg10(UMI Count Per Cell)") + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
panel.background = ggplot2::element_rect(fill = "lightgrey")
) + ggplot2::ggtitle(paste0("QC Sample ", tag)
) + ggplot2::scale_color_manual(values= ggplot2::alpha(c("#000000","#009900"), 0.5)
) + ggplot2::theme_bw()
## Save to file ##
FNbase <- paste0("cellranger.result.", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
plotListQC1[[tag]]
dev.off()
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" CellRanger quality assessment. Green cells are considered for further analysis. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse = ""), " ", tag,
"\n```{r CellRangerResult_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotListQC1[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
returnList <- list(
"plotListQC1" = plotListQC1,
"chnkVec" = chnkVec,
"dfPlot" = dfPlot,
"figureCount" = figureCount
)
})
## Done doing CR plots ##
###############################################################################
###############################################################################
## RNA feature plot ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
#'
setGeneric(
name="doUMAP_plot_percMT",
def=function(
SampleList,
obj = "Obio",
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4,
dotsize = 0.5
) {
###############################################################################
## Make plots ##
plotListUMT <- list()
chnkVec <- as.vector(NULL, mode = "character")
sampleNames <- as.vector(names(obj@sampleDetailList))
## Determine min/max for all plots ##
for (i in 1:length(sampleNames)){
dfT <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
if (i ==1){
dfR <- dfT
} else {
dfR <- rbind(
dfT,
dfR
)
}
}
maxX <- 1.1*max(dfR$UMAP_1, na.rm = T)
minX <- 1.1*min(dfR$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfR$UMAP_2, na.rm = T)
minY <- 1.1*min(dfR$UMAP_2, na.rm = T)
for (i in 1:length(sampleNames)){
tag <- paste0("UMT_",sampleNames[i])
dfPlot <- SampleList[[sampleNames[i]]]@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
## Get UMAP coordinates ##
coord <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
# dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
#library(scales)
#clusterCols = hue_pal()(length(clusterVec))
dfPlot$percent_mt <- as.numeric(dfPlot$percent_mt)
plotListUMT[[tag]] <- ggplot2::ggplot(data=dfPlot[dfPlot$included == "+",], ggplot2::aes(UMAP_1, UMAP_2, color=percent_mt)
) + ggplot2::geom_point( shape=16, size = as.numeric(dotsize)
) + ggplot2::xlab("UMAP1") + ggplot2::ylab("UMAP2") + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
legend.title = ggplot2::element_blank()
) + ggplot2::ggtitle(paste0("Sample: ", tag)
) + ggplot2::xlim(minX, maxX) + ggplot2::ylim(minY, maxY
) + ggplot2::coord_fixed(ratio=1
) + ggplot2::scale_colour_gradient2(high = "red", mid = "black"
) + ggplot2::theme_bw()
FNbase <- paste0("Sample.level.UMAP.perMT", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotListSQCUMAP[[tag]])
dev.off()
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" Sample-level UMAP plot for QC purposes. Colored by the percent of mitochondrial gene expression per cell. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r UMT_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotListUMT[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
returnList <- list(
"plotListUMT" = plotListUMT,
"chnkVec" = chnkVec,
"figureCount" = figureCount
)
})
## Done SL UMAP ##
###############################################################################
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="doUMAP_plot_nFeatRNA",
def=function(
SampleList,
obj = "Obio",
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4,
dotsize = 0.5
) {
###############################################################################
## Make plots ##
plotListNC <- list()
chnkVec <- as.vector(NULL, mode = "character")
sampleNames <- as.vector(names(obj@sampleDetailList))
## Determine min/max for all plots ##
for (i in 1:length(sampleNames)){
dfT <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
if (i ==1){
dfR <- dfT
} else {
dfR <- rbind(
dfT,
dfR
)
}
}
maxX <- 1.1*max(dfR$UMAP_1, na.rm = T)
minX <- 1.1*min(dfR$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfR$UMAP_2, na.rm = T)
minY <- 1.1*min(dfR$UMAP_2, na.rm = T)
for (i in 1:length(sampleNames)){
tag <- paste0("NC_",sampleNames[i])
dfPlot <- SampleList[[sampleNames[i]]]@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
## Get UMAP coordinates ##
coord <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
# dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
#library(scales)
#clusterCols = hue_pal()(length(clusterVec))
dfPlot$nFeature_RNA <- as.numeric(dfPlot$nFeature_RNA)
plotListNC[[tag]] <- ggplot2::ggplot(data=dfPlot[dfPlot$included == "+",], ggplot2::aes(UMAP_1, UMAP_2, color=nFeature_RNA)
) + ggplot2::geom_point( shape=16, size = as.numeric(dotsize)
) + ggplot2::xlab("UMAP1") + ggplot2::ylab("UMAP2") + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
#legend.title = ggplot2::element_blank()
) + ggplot2::ggtitle(paste0("Sample: ", tag)
) + ggplot2::xlim(minX, maxX) + ggplot2::ylim(minY, maxY
) + ggplot2::coord_fixed(ratio=1
) + ggplot2::scale_colour_gradient2(high = "black", low = "red"
) + ggplot2::theme_bw()
FNbase <- paste0("Sample.level.UMAP.nFeatRNA", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotListNC[[tag]])
dev.off()
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" Sample-level UMAP plot for QC purposes. Colored by the nFeatureRNA number. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r NC_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotListNC[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
returnList <- list(
"plotListNC" = plotListNC,
"chnkVec" = chnkVec,
"figureCount" = figureCount
)
})
## Done SL UMAP ##
###############################################################################
###############################################################################
## RNA feature plot ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="doUMAP_plotSL",
def=function(
SampleList,
obj = "Obio",
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4,
dotsize = 0.5
) {
###############################################################################
## Make plots ##
plotListSQCUMAP <- list()
chnkVec <- as.vector(NULL, mode = "character")
sampleNames <- as.vector(names(obj@sampleDetailList))
## Determine min/max for all plots ##
for (i in 1:length(sampleNames)){
dfT <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
if (i ==1){
dfR <- dfT
} else {
dfR <- rbind(
dfT,
dfR
)
}
}
maxX <- 1.1*max(dfR$UMAP_1, na.rm = T)
minX <- 1.1*min(dfR$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfR$UMAP_2, na.rm = T)
minY <- 1.1*min(dfR$UMAP_2, na.rm = T)
for (i in 1:length(sampleNames)){
tag <- paste0("U_",sampleNames[i])
dfPlot <- SampleList[[sampleNames[i]]]@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
## Get UMAP coordinates ##
coord <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
library(scales)
clusterCols = hue_pal()(length(clusterVec))
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
plotListSQCUMAP[[tag]] <- ggplot2::ggplot(data=dfPlot[dfPlot$included == "+",], ggplot2::aes(UMAP_1, UMAP_2, color=Cluster)
) + ggplot2::geom_point( shape=16, size = as.numeric(dotsize)
) + ggplot2::xlab("UMAP1") + ggplot2::ylab("UMAP2") + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
legend.title = ggplot2::element_blank()
) + ggplot2::ggtitle(paste0("Sample: ", tag)
) + ggplot2::xlim(minX, maxX) + ggplot2::ylim(minY, maxY
) + ggplot2::coord_fixed(ratio=1
) + ggplot2::theme_bw()
FNbase <- paste0("Sample.level.UMAP.", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotListSQCUMAP[[tag]])
dev.off()
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" Sample-level UMAP plot for QC purposes. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r SLC_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotListSQCUMAP[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
returnList <- list(
"plotListSQCUMAP" = plotListSQCUMAP,
"chnkVec" = chnkVec,
"figureCount" = figureCount
)
})
## Done SL UMAP ##
###############################################################################
###############################################################################
## Do size exclustion ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="doRNAfeat_plotSL",
def=function(
SampleList,
obj = "Obio",
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4
) {
###############################################################################
## Make plots ##
plotListRF <- list()
chnkVec <- as.vector(NULL, mode = "character")
sampleNames <- as.vector(names(obj@sampleDetailList))
for (i in 1:length(sampleNames)){
tag <- sampleNames[i]
SampleList[[sampleNames[i]]]@meta.data[["included"]] <- "+"
SampleList[[sampleNames[i]]]@meta.data[((SampleList[[sampleNames[i]]]@meta.data$nFeature_RNA < obj@sampleDetailList[[sampleNames[i]]]$SeuratNrnaMinFeatures) | (SampleList[[sampleNames[i]]]@meta.data$nFeature_RNA > obj@sampleDetailList[[sampleNames[i]]]$SeuratNrnaMaxFeatures)), "included"] <- "ex_N_Feat_RNA"
SampleList[[sampleNames[i]]]@meta.data[(SampleList[[sampleNames[i]]]@meta.data$percent_mt > obj@sampleDetailList[[sampleNames[i]]]$singleCellSeuratMtCutoff ), "included"] <- "ex_MT_Perc"
dfHist <- SampleList[[sampleNames[i]]]@meta.data
## Fit GMM
library(mixtools)
x <- as.vector( dfHist$nFeature_RNA)
dfHist[["x"]] <- x
fit <- normalmixEM(x, k = 2) #try to fit two Gaussians
dfHist[["temp1"]] <- fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1])
dfHist[["temp2"]] <- fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2])
# https://labrtorian.com/tag/mixture-model/
## Calculate Mean for distribution 1
x1meanLine <- fit$mu[1]
x2meanLine <- fit$mu[2]
## Find histogram max count ##
pTest <- ggplot2::ggplot(data=dfHist, ggplot2::aes(x=nFeature_RNA, fill = included)
) + ggplot2::geom_histogram(binwidth=50
)
dfT <- ggplot2::ggplot_build(pTest)$data[[1]]
yMax <- max(dfT$count)
dMax1 <- max( fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1]))
dMax2 <- max( fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2]))
if (dMax1 > dMax2){
dMax <- dMax1
sF <- yMax/dMax
dfHist[["fitVec1"]] <- sF *fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1])
dfHist[["fitVec2"]] <- sF*fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2])
dfHist[["x"]] <- fit$x
} else {
dMax <- dMax2
sF <- yMax/dMax
dfHist[["fitVec2"]] <- sF *fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1])
dfHist[["fitVec1"]] <- sF*fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2])
dfHist[["x"]] <- fit$x
}
dfHist <- dfHist[order(dfHist$x, decreasing = F),]
colVec <- unique(sort(SampleList[[sampleNames[i]]]@meta.data$included))
library(RColorBrewer)
reds <- c("#FF0000", "#ffa500", "#A30000")
colVec <- c("#000000", reds[1:(length(colVec)-1)])
plotListRF[[paste0("Hist_GL_", tag)]] <- ggplot2::ggplot(data=dfHist, ggplot2::aes(x=nFeature_RNA, fill = included)
) + ggplot2::geom_vline( xintercept = c(x1meanLine, x2meanLine), col="grey", linetype = "dashed"
) + ggplot2::geom_histogram(binwidth=50, alpha = 0.5
) + ggplot2::scale_fill_manual(values=colVec) + ggplot2::geom_point( ggplot2::aes(x=x, y=fitVec1), color = "#009900", size = 0.1
) + ggplot2::geom_point( ggplot2::aes(x=x, y=fitVec2), color = "#FF0000", size = 0.1
) + ggplot2::theme_bw(
) + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
) + ggplot2::geom_vline(xintercept = obj@sampleDetailList[[i]]$SeuratNrnaMinFeatures, col="red"
) + ggplot2::geom_hline(yintercept = 0, col="black"
) + ggplot2::labs(title = paste0("Histogram nFeatures RNA per cell ", names(SampleList)[i], " (SD1: ", round(sd(x),2),")") ,y = "Count", x = "nFeatures RNA"
) + ggplot2::xlim(0, max(dfHist$x))
###########################################################################
## Save plot to file ##
FNbase <- paste0("Historgram.GL",names(SampleList)[i], VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotListRF[[paste0("Hist_GL_", tag)]])
dev.off()
## ##
###########################################################################
figCap <- paste0(
"**Figure ",
figureCount,
"C:** Histogram depicting genes found per cell/nuclei for sample ",
names(SampleList)[i],
". ",
"Download a pdf of this figure [here](", FNrel, "). "
)
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r Gene_plot_chunk_Histogram-",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotListRF[['",paste0("Hist_GL_", tag),"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
## Histogram Part C done ##
###########################################################################
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
}
returnList <- list(
"plotListRF" = plotListRF,
"chnkVec" = chnkVec,
"figureCount" = figureCount
)
})
## Done sizing plots ##
###############################################################################
###############################################################################
## RNA feature plot ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="doDF_plotSL",
def=function(
SampleList,
obj = "Obio",
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4,
dotsize = 0.5
) {
###############################################################################
## Make plots ##
library(DoubletFinder)
SCTvar <- TRUE
plotListDF <- list()
chnkVec <- as.vector(NULL, mode = "character")
addList <- list()
sampleNames <- as.vector(names(obj@sampleDetailList))
pKlist <- list()
bcmvnList <- list
## Determine min/max for all plots ##
for (i in 1:length(sampleNames)){
## pK Identification (no ground-truth) ---------------------------------------------------------------------------------------
# sweep.res.list <- paramSweep_v3(SampleList[[sampleNames[i]]], PCs = 1:10, sct = TRUE, num.cores =1)
# sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
# bcmvn_sample <- find.pK(sweep.stats)
# pK <- grep(max(bcmvm_sample))
# pKlist[[sampleNames[i]]] <- pK
# bcmvnList[[sampleNames[i]]] <- bcmvn_sample
#
# ## pK Identification (ground-truth) ------------------------------------------------------------------------------------------
# sweep.res.list_OsC <- paramSweep_v3(OsC, PCs = 1:10, sct = TRUE)
# gt.calls <- seu_kidney@meta.data[rownames(sweep.res.list_OsC[[1]]), "GT"]
# sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = TRUE, GT.calls = gt.calls)
# bcmvn_kidney <- find.pK(sweep.stats_kidney)
## Homotypic Doublet Proportion Estimate -------------------------------------------------------------------------------------
annotations <- SampleList[[sampleNames[i]]]@meta.data[,"seurat_clusters"]
homotypic.prop <- modelHomotypic(annotations) ## ex: annotations <- seu_kidney@meta.data$ClusteringResults
nExp_poi <- round(0.075*nrow(SampleList[[sampleNames[i]]]@meta.data)) ## Assuming 7.5% doublet formation rate - tailor for your dataset
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
## Run DoubletFinder with varying classification stringencies ----------------------------------------------------------------
#OsC_DF <- SampleList[[sampleNames[i]]]
SampleList[[sampleNames[i]]] <- doubletFinder_v3(SampleList[[sampleNames[i]]], PCs = 1:10, pN = 0.25, pK = 0.09, nExp = nExp_poi, reuse.pANN = FALSE, sct = SCTvar)
## Adjust names ##
names(SampleList[[sampleNames[i]]]@meta.data)[grep("pANN_",names(SampleList[[sampleNames[i]]]@meta.data))] <- "DF_pANN"
names(SampleList[[sampleNames[i]]]@meta.data)[grep("DF.classifications",names(SampleList[[sampleNames[i]]]@meta.data))] <- "DF_Classification"
dfAdd <- SampleList[[sampleNames[i]]]@meta.data[,c("DF_Classification", "DF_pANN")]
addList[[sampleNames[i]]] <- dfAdd
tempDir <- paste0(obj@parameterList$localWorkDir,"temp")
if(!dir.exists(tempDir)){
dir.create(tempDir)
}
write.table(
dfAdd,
paste0(obj@parameterList$localWorkDir,"temp/DF_", sampleNames[i],".txt"),
row.names = F,
sep = "\t"
)
## end new
## begin old
dfT <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
if (i ==1){
dfR <- dfT
} else {
dfR <- rbind(
dfT,
dfR
)
}
}
maxX <- 1.1*max(dfR$UMAP_1, na.rm = T)
minX <- 1.1*min(dfR$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfR$UMAP_2, na.rm = T)
minY <- 1.1*min(dfR$UMAP_2, na.rm = T)
for (i in 1:length(sampleNames)){
tag <- sampleNames[i]
dfPlot <- SampleList[[sampleNames[i]]]@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
## Get UMAP coordinates ##
coord <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
library(scales)
clusterCols = hue_pal()(length(clusterVec))
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
plotListDF[[tag]] <- ggplot2::ggplot(data=dfPlot[dfPlot$included == "+",], ggplot2::aes(UMAP_1, UMAP_2, color=DF_Classification)
) + ggplot2::geom_point( shape=16, size = as.numeric(dotsize)
) + ggplot2::xlab("UMAP1") + ggplot2::ylab("UMAP2") + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
legend.title = ggplot2::element_blank()
) + ggplot2::ggtitle(paste0("Sample: ", tag)
) + ggplot2::xlim(minX, maxX) + ggplot2::ylim(minY, maxY
) + ggplot2::scale_color_manual(values=c("#FF0000","#000000")
) + ggplot2::coord_fixed(ratio=1
) + ggplot2::theme_bw()
FNbase <- paste0("Sample.level.UMAP.", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" Sample-level UMAP plot for QC purposes. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r SL2_UMAP_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotListDF[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
}
returnList <- list(
"plotListDF" = plotListDF,
"chnkVec" = chnkVec,
"figureCount" = figureCount,
#"pKlist" = pKlist,
#"bcmvnList" = bcmvnList,
"addList" = addList
)
})
## Done SL UMAP ##
###############################################################################
###############################################################################
## Create integration sample list ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="createNormSampleList",
def=function(
obj,
reduce = NULL,
vars.to.regress = NULL,
g2m.genes = NULL,
s.genes = NULL,
annotateCellCyclePhase = TRUE,
integrationReduction = "cca", #options = "rpca" # options: c("cca", "rpca", "rlsi")
feature_type = "Gene Expression"
#vars to regress cell cycle options #c("S_Score", "G2M_Score) or
#"CC_Difference"
#figureCount = 1,
#VersionPdfExt = ".pdf",
#tocSubLevel = 4
) {
## Create Sample List ##
SampleList <- list()
unionVarGenes <- as.vector(NULL, mode = "character")
NtopGenes <- obj@scDetailList$NtopGenes
geneIntersectVec <- as.vector(NULL, mode="character")
for (i in 1:length(obj@sampleDetailList)){
sampleID <- names(obj@sampleDetailList)[i]
# type must be in c("TenX", "matrixFiles", "loomFiles", "hdf5Files")
if ( obj@sampleDetailList[[sampleID]]$type == "loomFiles" ){
library(loomR)
loomFN <- obj@sampleDetailList[[sampleID]]$path
lfile <- connect(filename = loomFN, mode = "r+")
fullMat <- lfile$matrix[, ]
geneNames <- lfile[["row_attrs/Gene"]][]
colnames(fullMat) <- geneNames
cellIDs <- lfile[["col_attrs/CellID"]][]
row.names(fullMat) <- cellIDs
fullMat <- t(fullMat)
} else if (obj@sampleDetailList[[sampleID]]$type == "matrixFiles") {
mFN <- obj@sampleDetailList[[sampleID]]$path
fullMat <- read.delim(
mFN,
sep="\t",
stringsAsFactors = F
)
} else if (obj@sampleDetailList[[sampleID]]$type == "hdf5Files") {
library(hdf5r)
dataDir <- obj@sampleDetailList[[sampleID]]$path
#print(paste0("Reading ", dataDir, "..."))
assign(
"fullMat", #names(obj@parameterList[[obj@parameterList$inputMode]])[i],
Seurat::Read10X_h5(filename = dataDir, use.names = TRUE, unique.features = TRUE)
)
} else {
pos <- grep("gene.column", names(obj@sampleDetailList[[sampleID]]))
if (length(pos) == 0){
gene.column = 2
} else {
gene.column <- obj@sampleDetailList[[sampleID]]$gene.column
}
dataDir <- obj@sampleDetailList[[sampleID]]$path
#print(paste0("Reading ", dataDir, "..."))
assign(
"fullMat", #names(obj@parameterList[[obj@parameterList$inputMode]])[i],
Seurat::Read10X(data.dir = dataDir, gene.column = gene.column)
)
## Cellranger multi will create a list, rather than a matrix
if (is.list(fullMat)){
pos <- grep(feature_type, names(fullMat))
if ( length( pos ) == 1){
fullMat <- fullMat[[pos]]
}
}
}
## Remove -1 cells ##
pos <- grep("-", colnames(fullMat))
if (length(pos) > 0){
repCols <- sapply(colnames(fullMat), function(x) unlist(strsplit(x, "-"))[1])
if (length(unique(colnames(fullMat))) == length(unique(repCols)) ){
colnames(fullMat) <- repCols
}
}
SampleList[[sampleID]] = Seurat::CreateSeuratObject(
counts = fullMat,
project = sampleID,
min.cells = 0,
min.features = obj@sampleDetailList[[i]]$SeuratNrnaMinFeatures
)
SampleList[[sampleID]]@meta.data[["sampleID"]] <-
sampleID
if (!is.null(reduce)){
set.seed(127)
n.cells <- round(reduce * nrow(SampleList[[sampleID]]@meta.data))
cellVec <- row.names(SampleList[[sampleID]]@meta.data)
cellVec <- cellVec[sample(1:length(cellVec), n.cells)]
SampleList[[sampleID]] <- subset(
x = SampleList[[sampleID]],
cells = cellVec
)
}
## Label mitochondrial cells ##
if (obj@parameterList$species == "mus_musculus"){
mtSel <- "^mt-"
} else if (obj@parameterList$species == "homo_sapiens") {
mtSel <- "^MT-"
} else if (obj@parameterList$species == "danio_rerio") {
mtSel <- "^mt-"
} else if (obj@parameterList$species == "gallus_gallus") {
mtSel <- "^MT-"
} else if (obj@parameterList$species == "drosophila_melanogaster") {
mtSel <- "^mt:"
} else {
stop("Mitochondrial gene identifier not specified for this species in function createNormSampleList().")
}
SampleList[[i]][["percent_mt"]] <- Seurat::PercentageFeatureSet(object =SampleList[[i]], pattern = mtSel)
## Remove contaminating cells ##
SampleList[[i]] <- subset(
x = SampleList[[i]],
subset = nFeature_RNA > obj@sampleDetailList[[i]]$SeuratNrnaMinFeatures
& nFeature_RNA < obj@sampleDetailList[[i]]$SeuratNrnaMaxFeatures
& percent_mt < obj@sampleDetailList[[i]]$singleCellSeuratMtCutoff
)
## Normalization
if (length(grep("scIntegrationMethod", names(obj@parameterList))) == 0){
obj@parameterList$scIntegrationMethod <- "standard"
}
if (obj@parameterList$scIntegrationMethod == "SCT"){
SampleList[[i]] <- Seurat::SCTransform(SampleList[[i]], verbose = FALSE)
SampleList[[i]] <- Seurat::NormalizeData(
SampleList[[i]],
verbose = FALSE,
assay = "RNA"
)
} else {
SampleList[[i]] <- Seurat::NormalizeData(
SampleList[[i]],
verbose = FALSE
)
}
SampleList[[i]] <- Seurat::FindVariableFeatures(
SampleList[[i]],
selection.method = "vst",
nfeatures = NtopGenes,
verbose = FALSE
)
unionVarGenes <- unique(
c(
unionVarGenes,
Seurat::VariableFeatures(SampleList[[i]])
)
)
geneIntersectVec <- unique(
c(
geneIntersectVec,
rownames(x = SampleList[[i]]@assays$RNA)
)
)
## Assign cell cycle scores ##
if (annotateCellCyclePhase){
if (is.null(s.genes)){
stop("S.genes required")
}
if (is.null(g2m.genes)){
stop("G2M Genes required")
}
#Idents(SampleList[[sampleID]]) <- "sampleID"
SampleList[[sampleID]] <- Seurat::CellCycleScoring(
SampleList[[sampleID]],
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = TRUE
)
## Done adding cell cycle scores ##
## Remove dots from column names ##
names(SampleList[[sampleID]]@meta.data) <- gsub("\\.", "_",names(SampleList[[sampleID]]@meta.data))
## Add G1 to G2M-S differences
SampleList[[sampleID]]$CC_Difference <- SampleList[[sampleID]]$S_Score - SampleList[[sampleID]]$G2M_Score
}
## Regress out irrelevant items, if specified ##
###################################################
## old: retired 2021-07-29 ##
# SampleList[[i]] <- Seurat::ScaleData(
# SampleList[[i]],
# verbose = FALSE,
# vars.to.regress = vars.to.regress,
# features = row.names(SampleList[[i]])
# )
##
###################################################
SampleList[[i]] <- Seurat::ScaleData(
SampleList[[i]],
verbose = FALSE,
vars.to.regress = vars.to.regress
)
## Change SB 29/07/21. Scale data ran very slow when regressing out cell cycle genes for larger datasets. As scale data is only used to create UMAP and PCA plots, the most variable features should be sufficient.
}
## Do rpca integration. Link to vignette: https://satijalab.org/seurat/articles/integration_rpca.html
if (integrationReduction == "rpca"){
features <- Seurat::SelectIntegrationFeatures(object.list = SampleList)
SampleList <- lapply(X = SampleList, FUN = function(x) {
x <- Seurat::ScaleData(x, features = features, verbose = FALSE)
x <- Seurat::RunPCA(x, features = features, verbose = FALSE)
})
}
return(SampleList)
})
## ##
###############################################################################
###############################################################################
## Create and Process sample List ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="createSampleListQC",
def=function(
obj,
reduce = NULL,
vars.to.regress = NULL,
s.genes = NULL,
g2m.genes = NULL,
annotateCellCyclePhase = TRUE,
feature_type = "Gene Expression"
#figureCount = 1,
#VersionPdfExt = ".pdf",
#tocSubLevel = 4
) {
## Create Sample List ##
#library(Seurat)
SampleList <- list()
for (i in 1:length(obj@sampleDetailList)){
sampleID <- names(obj@sampleDetailList)[i]
# type must be in c("TenX", "matrixFiles", "loomFiles", "hdf5Files")
if ( obj@sampleDetailList[[sampleID]]$type == "loomFiles" ){
#library(loomR)
loomFN <- obj@sampleDetailList[[sampleID]]$path
lfile <- loomR::connect(filename = loomFN, mode = "r+")
fullMat <- lfile$matrix[, ]
geneNames <- lfile[["row_attrs/Gene"]][]
colnames(fullMat) <- geneNames
cellIDs <- lfile[["col_attrs/CellID"]][]
row.names(fullMat) <- cellIDs
fullMat <- t(fullMat)
} else if (obj@sampleDetailList[[sampleID]]$type == "matrixFiles") {
mFN <- obj@sampleDetailList[[sampleID]]$path
fullMat <- read.delim(
mFN,
sep="\t",
stringsAsFactors = F
)
} else if (obj@sampleDetailList[[sampleID]]$type == "hdf5Files") {
library(hdf5r)
dataDir <- obj@sampleDetailList[[sampleID]]$path
#print(paste0("Reading ", dataDir, "..."))
assign(
"fullMat", #names(obj@parameterList[[obj@parameterList$inputMode]])[i],
Seurat::Read10X_h5(filename = dataDir, use.names = TRUE, unique.features = TRUE)
)
} else {
pos <- grep("gene.column", names(obj@sampleDetailList[[sampleID]]))
if (length(pos) == 0){
gene.column = 2
} else {
gene.column <- obj@sampleDetailList[[sampleID]]$gene.column
}
dataDir <- obj@sampleDetailList[[sampleID]]$path
#print(paste0("Reading ", dataDir, "..."))
assign(
"fullMat", #names(obj@parameterList[[obj@parameterList$inputMode]])[i],
Seurat::Read10X(data.dir = dataDir, gene.column = gene.column)
)
## Cellranger multi will create a list, rather than a matrix
if (is.list(fullMat)){
pos <- grep(feature_type, names(fullMat))
if ( length( pos ) == 1){
fullMat <- fullMat[[pos]]
}
}
}
## Remove -1 cells ##
pos <- grep("-", colnames(fullMat))
if (length(pos) > 0){
repCols <- sapply(colnames(fullMat), function(x) unlist(strsplit(x, "-"))[1])
if (length(unique(colnames(fullMat))) == length(unique(repCols)) ){
colnames(fullMat) <- repCols
}
}
SampleList[[sampleID]] = Seurat::CreateSeuratObject(
counts = fullMat,
project = sampleID,
min.cells = 0,
min.features = 0 #obj@parameterList$SeuratNrnaMinFeatures
)
SampleList[[sampleID]]@meta.data[["sampleID"]] <-
sampleID
if (!is.null(reduce)){
set.seed(127)
n.cells <- round(reduce * nrow(SampleList[[sampleID]]@meta.data))
cellVec <- row.names(SampleList[[sampleID]]@meta.data)
cellVec <- cellVec[sample(1:length(cellVec), n.cells)]
SampleList[[sampleID]] <- subset(
x = SampleList[[sampleID]],
cells = cellVec
)
}
## Label mitochondrial cells ##
if (obj@parameterList$species == "mus_musculus"){
mtSel <- "^mt-"
} else if (obj@parameterList$species == "homo_sapiens") {
mtSel <- "^MT-"
} else if (obj@parameterList$species == "danio_rerio") {
mtSel <- "^mt-"
} else if (obj@parameterList$species == "gallus_gallus") {
mtSel <- "^MT-"
} else if (obj@parameterList$species == "drosophila_melanogaster") {
mtSel <- "^mt:"
} else {
stop("mtSel not defined for this species in createSampleListQC")
mtSel <- "^MitoGene-"
}
SampleList[[i]][["percent_mt"]] <- Seurat::PercentageFeatureSet(object =SampleList[[i]], pattern = mtSel)
names(SampleList[[i]]@meta.data) <- gsub("percent.mt","percent_mt",names(SampleList[[i]]@meta.data))
## Normalise ##
# SampleList[[i]] <- Seurat::SCTransform(SampleList[[i]], verbose = FALSE)
SampleList[[i]] <- Seurat::NormalizeData(
SampleList[[i]],
verbose = FALSE,
assay = "RNA"
)
## Find variable features ##
SampleList[[i]] <- Seurat::FindVariableFeatures(
object = SampleList[[i]],
selection.method = 'vst',
nfeatures = obj@parameterList$NtopGenes
)
if (annotateCellCyclePhase) {
if (is.null(s.genes)){
exit("Please provide S-phase gene list")
}
if (is.null(g2m.genes)){
exit("Please provide G2M-phase gene list")
}
SampleList[[sampleID]] <- Seurat::CellCycleScoring(
SampleList[[sampleID]],
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = TRUE
)
names(SampleList[[sampleID]]@meta.data) <- gsub("\\.", "_",names(SampleList[[sampleID]]@meta.data))
## Add G1 to G2M-S differences
SampleList[[sampleID]]$CC_Difference <- SampleList[[sampleID]]$S_Score - SampleList[[sampleID]]$G2M_Score
}
#Idents(SampleList[[sampleID]]) <- "orig.ident"
#print(sampleID)
#print(names(SampleList[[sampleID]]@meta.data))
## Remove dots from column names ##
## 20201221 - added features = row.names(SampleList[[i]]) and vars to regress
SampleList[[i]] <- Seurat::ScaleData(
SampleList[[i]],
verbose = FALSE,
vars.to.regress = vars.to.regress #,
#features = row.names(SampleList[[i]])
)
nPCs <- obj@sampleDetailList[[i]]$singleCellSeuratNpcs4PCA
if (ncol(SampleList[[i]]) < 50){
nPCs <- 2
} else if (ncol(SampleList[[i]]) < 250){
nPCs <- 10
}
SampleList[[i]] <- Seurat::RunPCA(
SampleList[[i]],
npcs = nPCs,
verbose = FALSE
)
## Do tSNE ##
SampleList[[i]] <- Seurat::RunTSNE(SampleList[[i]], reduction = "pca", dims = 1:nPCs, perplexity=nPCs, check_duplicates = FALSE)
## Do UMAP ##
if (ncol(SampleList[[i]]) >= 50){
SampleList[[i]] <- Seurat::RunUMAP(SampleList[[i]], reduction = "pca", dims = 1:nPCs)
}
## Do clustering ##
SampleList[[i]] <- Seurat::FindNeighbors(SampleList[[i]], reduction = "pca", dims = 1:nPCs)
SampleList[[i]] <- Seurat::FindClusters(SampleList[[i]], resolution = obj@sampleDetailList[[i]]$singleCellClusterParameter)
## Annotated included/excluded cells ##
SampleList[[i]]@meta.data[["selected"]] <- "+"
SampleList[[i]]@meta.data[SampleList[[i]]@meta.data$percent_mt > obj@sampleDetailList[[i]]$singleCellSeuratMtCutoff ,"selected"] <- ""
SampleList[[i]]@meta.data[SampleList[[i]]@meta.data$nFeature_RNA > obj@sampleDetailList[[i]]$SeuratNrnaMaxFeatures ,"selected"] <- ""
SampleList[[i]]@meta.data[SampleList[[i]]@meta.data$nFeature_RNA < obj@sampleDetailList[[i]]$SeuratNrnaMinFeatures ,"selected"] <- ""
print(paste0(i, "th sample, ", sampleID, " done."))
}
return(SampleList)
})
## Done Create and Process sample list ##
###############################################################################
###############################################################################
## Do mt and feature selection plots ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="doPercMT_plotSL",
def=function(
SampleList,
obj,
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4
) {
if (obj@parameterList$species == "mus_musculus"){
mtSel <- "^mt-"
} else if (obj@parameterList$species == "homo_sapiens") {
mtSel <- "^MT-"
} else if (obj@parameterList$species == "danio_rerio") {
mtSel <- "^mt-"
} else if (obj@parameterList$species == "drosophila_melanogaster") {
mtSel <- "^mt:"
} else {
mtSel <- "^MitoGene-"
}
plotListRF <- list()
chnkVec <- as.vector(NULL, mode = "character")
sampleNames <- as.vector(names(obj@sampleDetailList))
for (i in 1:length(sampleNames)){
tag <- paste0("Hist_MT_", sampleNames[i])
SampleList[[sampleNames[i]]]@meta.data[["included"]] <- "+"
SampleList[[sampleNames[i]]]@meta.data[((SampleList[[sampleNames[i]]]@meta.data$nFeature_RNA < obj@sampleDetailList[[sampleNames[i]]]$SeuratNrnaMinFeatures) | (SampleList[[sampleNames[i]]]@meta.data$nFeature_RNA > obj@sampleDetailList[[sampleNames[i]]]$SeuratNrnaMaxFeatures)), "included"] <- "ex_N_Feat_RNA"
SampleList[[sampleNames[i]]]@meta.data[(SampleList[[sampleNames[i]]]@meta.data$percent_mt > obj@sampleDetailList[[sampleNames[i]]]$singleCellSeuratMtCutoff ), "included"] <- "ex_MT_Perc"
dfHist <- SampleList[[sampleNames[i]]]@meta.data
## Fit GMM
library(mixtools)
x <- as.vector( dfHist$percent_mt)
dfHist[["x"]] <- x
fitPossible <- tryCatch({fit <- normalmixEM(x, k = 2); fitPossible = T},
error=function(cond) {
message(paste("No fit possible"))
# Choose a return value in case of error
fitPossible <- FALSE
return(fitPossible)
})
#try to fit two Gaussians
if (fitPossible){
dfHist[["temp1"]] <- fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1])
dfHist[["temp2"]] <- fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2])
# https://labrtorian.com/tag/mixture-model/
## Calculate Mean for distribution 1
x1meanLine <- fit$mu[1]
x2meanLine <- fit$mu[2]
}
## Find histogram max count ##
pTest <- ggplot2::ggplot(data=dfHist, ggplot2::aes(x=percent_mt, fill = included)
) + ggplot2::geom_histogram(binwidth=0.3
)
dfT <- ggplot2::ggplot_build(pTest)$data[[1]]
yMax <- max(dfT$count)
if (fitPossible){
dMax1 <- max( fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1]))
dMax2 <- max( fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2]))
if (dMax1 > dMax2){
dMax <- dMax1
sF <- yMax/dMax
dfHist[["fitVec1"]] <- sF *fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1])
dfHist[["fitVec2"]] <- sF*fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2])
dfHist[["x"]] <- fit$x
} else {
dMax <- dMax2
sF <- yMax/dMax
dfHist[["fitVec2"]] <- sF *fit$lambda[1]*dnorm(x,fit$mu[1],fit$sigma[1])
dfHist[["fitVec1"]] <- sF*fit$lambda[2]*dnorm(x,fit$mu[2],fit$sigma[2])
dfHist[["x"]] <- fit$x
}
dfHist <- dfHist[order(dfHist$x, decreasing = F),]
}
colVec <- unique(sort(SampleList[[sampleNames[i]]]@meta.data$included))
library(RColorBrewer)
reds <- c("#FF0000", "#ffa500", "#A30000")
colVec <- c("#000000", reds[1:(length(colVec)-1)])
plotListRF[[tag]] <- ggplot2::ggplot(data=dfHist, ggplot2::aes(x=percent_mt, fill = included)
) + ggplot2::geom_histogram(binwidth=0.3, alpha = 0.5
) + ggplot2::geom_vline( xintercept = obj@sampleDetailList[[sampleNames[i]]]$singleCellSeuratMtCutoff, col="red", linetype = "dashed"
)
if (fitPossible){
plotListRF[[tag]] <- plotListRF[[tag]] + ggplot2::geom_point( ggplot2::aes(x=x, y=fitVec2), color = "#FF0000", size = 0.1
) + ggplot2::geom_vline( xintercept = c(x1meanLine, x2meanLine), col="grey", linetype = "dashed"
)
}
plotListRF[[tag]] <- plotListRF[[tag]] + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
) + ggplot2::geom_vline(xintercept = obj@sampleDetailList[[i]]$singleCellSeuratMtCutoff, col="red"
) + ggplot2::geom_hline(yintercept = 0, col="black"
) + ggplot2::labs(title = paste0("Histogram Percent Mitochondrial Genes per Cell ", names(SampleList)[i], " \n (SD1: ", round(sd(x),2),")") ,y = "Count", x = "Percent Mitochondrial Genes"
) + ggplot2::xlim(0, max(dfHist$x)
) + ggplot2::theme_bw()
###########################################################################
## Save plot to file ##
FNbase <- paste0("Historgram.MT",names(SampleList)[i], VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotListRF[[tag]])
dev.off()
## ##
###########################################################################
figCap <- paste0(
"**Figure ",
figureCount,
"C:** Histogram depicting percent mitochondrial genes for each sample ",
names(SampleList)[i],
". ",
"Download a pdf of this figure [here](", FNrel, "). "
)
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r Gene_plot_chunk_Histogram-",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
"\n",
"\n print(plotListRF[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
## Histogram Part C done ##
###########################################################################
chnkVec <- c(
chnkVec,
NewChnk
)
figureCount <- figureCount + 1
}
returnList <- list(
"plotListRF" = plotListRF,
"chnkVec" = chnkVec,
"figureCount" = figureCount
)
})
## Done mt and feature selection plots ##
###############################################################################
###############################################################################
## Do DGEsc ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="doDGEsc",
def=function(
obj,
DGEselCol = "sub_clusters_ExNeurons",
colName = "DGE_name",
contrastTag = "contrast_1_",
DGEsampleList = list(
"M1" = c(5),
"M2" = c(1)
)
) {
myPaths <- .libPaths()
myNewPaths <- c("/camp/stp/babs/working/boeings/Projects/pachnisv/song.chng/330_10xscRNAseq_DIV4_DIV11_DIV20_SC19069/basedata", myPaths)
.libPaths(myNewPaths)
library(glmGamPoi)
.libPaths(myPaths)
library(Seurat)
obj@meta.data[[colName]] <- ""
## Add DGEsamples to meta.data
for (i in 1:length(DGEsampleList)){
obj@meta.data[obj@meta.data[,DGEselCol] %in% DGEsampleList[[i]], colName] <- names(DGEsampleList)[i]
}
dfMeta <- obj@meta.data
dfMeta <- dfMeta[dfMeta[,colName] != "",]
cellVec <- dfMeta$cellID
group <- as.vector(dfMeta[,colName])
dfMatrix <- OsC[["RNA"]]@counts
dfMatrix <- data.matrix(dfMatrix[,cellVec])
dfMatrix <- dfMatrix[rowSums(dfMatrix) != 0, ]
## LRT ##
# fit <- glm_gp(dfMatrix, design = group)
# res <- test_de(fit, reduced_design = ~ 1)
## Pseudobulk ##
#sample_labels <- rep(paste0("sample_", 1:6), length = ncol(pbmcs_subset))
#cell_type_labels <- sample(c("T-cells", "B-cells", "Macrophages"), ncol(pbmcs_subset), replace = TRUE)
#group <- as.vector(dfMeta$Glia_vs_Neuro_all)
sample_labels <- group
sample_labels[sample_labels == names(DGEsampleList)[1]] <- paste0(sample_labels[sample_labels == names(DGEsampleList[1])], "_", 1:length(sample_labels[sample_labels == names(DGEsampleList)[1]]))
sample_labels[sample_labels == names(DGEsampleList)[2]] <- paste0(sample_labels[sample_labels == names(DGEsampleList[2])], "_", 1:length(sample_labels[sample_labels == names(DGEsampleList)[2]]))
cell_type_lables <- group
unique(group)
fit <- glm_gp(dfMatrix, design = group)
comparison <- names(DGEsampleList)
contrastString <- (paste0(comparison[1], " - ", comparison[2]))
res <- test_de(fit, contrast = contrastString,
pseudobulk_by = sample_labels,
#subset_to = cell_type_labels == "T-cells",
#n_max = 4,
sort_by = pval,
decreasing = FALSE
)
dfRes <- res[order(res$lfc),]
dfRes <- dfRes[dfRes$lfc > -100 & dfRes$lfc < 100, ]
dfRes[["padj"]] <- dfRes$adj_pval
minP <- min(dfRes$padj[dfRes$padj != 0])
dfRes[dfRes$padj == 0, "padj"] <- minP
dfRes[["lg10p"]] <- -1 * log10(dfRes$padj)
comp_1 <- dfRes
comp_1[["gene"]] <- dfRes$name
names(comp_1) <- gsub("lfc", paste0(contrastTag, "logFC_" ,colName), names(comp_1))
names(comp_1) <- gsub("padj", paste0(contrastTag, "padj_",colName), names(comp_1))
names(comp_1) <- gsub("lg10p", paste0(contrastTag, "lg10p_",colName), names(comp_1))
selVec <- c(
"gene",
names(comp_1)[grep(contrastTag, names(comp_1))]
)
comp_1 <- unique(comp_1[,selVec])
pos <- grep(paste0("DGE_", DGEselCol), names(obj@dataTableList))
returnList <- list(
"OsC" = obj,
DGEtable = comp_1
)
return(returnList)
})
## End doDGEsc Function ##
###############################################################################
###############################################################################
## Get Marker Gene Table ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
setGeneric(
name="addClusterMarkers2CatDisplay",
def=function(
obj,
addCorCatsToLabDb = TRUE
) {
if (obj@parameterList$geneIDcolumn != "mgi_symbol" & obj@parameterList$geneIDcolumn != "hgnc_symbol") {
queryGS <- "hgnc_symbol"
} else {
queryGS <- obj@parameterList$geneIDcolumn
}
dfGeneralMarkers <- obj@dataTableList$dfGeneralMarkers
dfGeneralMarkers <- dfGeneralMarkers[dfGeneralMarkers$direction == "positive",]
clusterVec <- sort(as.vector(unique(dfGeneralMarkers$cluster)))
gmt.list <- list()
max.length <- 0
for (i in 1:length(clusterVec)){
dfTemp <- dfGeneralMarkers[dfGeneralMarkers$cluster == clusterVec[i], ]
geneVec <- sort(unique(dfTemp$gene))
## Translate to human in non-mouse non-human species ##
if (obj@parameterList$geneIDcolumn != "mgi_symbol" & obj@parameterList$geneIDcolumn != "hgnc_symbol") {
dfTranslate <- unique(obj@dfGeneAnnotation[,c(obj@parameterList$geneIDcolumn, "hgnc_symbol")])
dfTranslate <- dfTranslate[dfTranslate$hgnc_symbol != "", ]
dfTranslate <- dfTranslate[dfTranslate[,obj@parameterList$geneIDcolumn] %in% geneVec, ]
geneVec <- unique(dfTranslate$hgnc_symbol)
}
if (length(geneVec) > 0){
head <- paste0(obj@parameterList$project_id, "_Markers_Cluster_", clusterVec[i])
cat.id <- paste0("temp_",obj@parameterList$project_id, "_cluster_marker_", clusterVec[i])
head <- append(head, paste0("https:\\/\\/biologic.crick.ac.uk\\/", obj@parameterList$project_id, "\\/category-view/", cat.id))
gmt.vec <- append(head, geneVec)
gmt.list[[cat.id]] <- gmt.vec
if (max.length < length(gmt.vec)){
max.length <- length(gmt.vec)
}
}
}
## Add empty spaces to gmt.list to fill up to max.length
for (i in 1:length(gmt.list)){
n.add <- max.length - length(gmt.list[[i]])
gmt.list[[i]] <- append(
gmt.list[[i]], rep("", n.add)
)
}
## Transform list into gmt data frame ##
for (i in 1:length(gmt.list)){
if (i ==1){
df.gmt <- t(gmt.list[[i]])
} else {
df.gmt <- rbind(df.gmt, t(gmt.list[[i]]))
}
}
## Ensure df.gmt is a data frame
df.gmt <- data.frame(df.gmt)
## Write to file #3
#setwd(obj@parameterList$localWorkDir)
FNc <- paste0(obj@parameterList$localWorkDir, obj@parameterList$projectID, ".cluster.marker.genes.gmt")
write.table(df.gmt, FNc, row.names = FALSE, col.names = FALSE, sep="\t")
## Remove old entries for this project
if (addCorCatsToLabDb){
library(RMySQL)
dbDB = RMySQL::dbConnect(
drv = RMySQL::MySQL(),
user = obj@parameterList$db.user,
password = db.pwd,
dbname = obj@dbDetailList$ref.cat.db,
host = obj@dbDetailList$host
)
dbGetQuery(dbDB, paste0("DELETE FROM ", obj@parameterList$lab.categories.table, " WHERE cat_type='temp_cluster_marker_", obj@parameterList$project_id,"'"))
RMySQL::dbDisconnect(dbDB)
## Done removing older categories for this project
msigdb.gmt2refDB(
df.gmt = df.gmt, #gmt.file = "cor.categories.gmt",
host = obj@dbDetailList$host,
db.user = obj@dbDetailList$db.user,
pwd = db.pwd,
ref.db = obj@dbDetailList$ref.cat.db,
ref.db.table = obj@parameterList$lab.categories.table,
cat_type = paste0("temp_cluster_marker_", obj@parameterList$project_id),
data_source = "DGE Cluster Marker Genes",
keep.gene.values = FALSE,
gene.id = queryGS,
create.new.table = FALSE,
mm.hs.conversion.file = paste0(hpc.mount, "Projects/reference_data/20160303.homologene.data.txt")
)
}
})
## Done ##
###############################################################################
###############################################################################
## Cell Cycle Barchart per sample ##
# This function will display cell cycle phase estimates per sample ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
#'
setGeneric(
name="doCellCycleBarchart",
def=function(
SampleList,
obj = "Obio",
figureCount = 1,
VersionPdfExt = ".pdf",
tocSubLevel = 4,
s.genes = NULL,
g2m.genes = NULL,
cellCycleRefFile = NULL #'paste0(hpc.mount, "Projects/reference_data/cell_cycle_vignette_files/nestorawa_forcellcycle_expressionMatrix.txt")'
) {
###############################################################################
## Make plots ##
#library(Seurat)
if (is.null(s.genes) | is.null(g2m.genes)){
exp.mat <- read.table(file = cellCycleRefFile, header = TRUE,
as.is = TRUE, row.names = 1)
# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
}
print(paste0("Used as S-phase marker genes: ", sort(unique(paste(s.genes, collapse = ", ")))))
print(paste0("Used as G2M-phase marker genes: ", sort(unique(paste(g2m.genes, collapse = ", ")))))
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
sampleNames <- as.vector(names(obj@sampleDetailList))
## Calculate cell cycle scores for each individual sample ##
for (i in 1:length(sampleNames)){
Idents(SampleList[[sampleNames[i]]]) <- "sampleID"
# Create our Seurat object and complete the initalization steps
SampleList[[sampleNames[i]]] <- Seurat::CellCycleScoring(
SampleList[[sampleNames[i]]],
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = TRUE
)
NcellsS <- nrow(SampleList[[sampleNames[i]]]@meta.data[SampleList[[sampleNames[i]]]@meta.data$Phase == "S",])
NcellsG2M <- nrow(SampleList[[sampleNames[i]]]@meta.data[SampleList[[sampleNames[i]]]@meta.data$Phase == "G2M",])
NcellsG1 <- nrow(SampleList[[sampleNames[i]]]@meta.data[SampleList[[sampleNames[i]]]@meta.data$Phase == "G1",])
Nsum <- sum(c(NcellsS, NcellsG2M, NcellsG1))
PercS <- NcellsS/Nsum
PercG2M <- NcellsG2M/Nsum
PercG1 <- NcellsG1/Nsum
dfTempRes <- data.frame(
sampleName = rep(sampleNames[i], 3),
Phase = c("G1", "S", "G2M"),
Ncells = c(NcellsG1, NcellsS, NcellsG2M),
PercCells = c(PercG1, PercS, PercG2M)
)
if (i ==1){
dfRes <- dfTempRes
} else {
dfRes <- rbind(dfRes, dfTempRes)
}
}
tag <- "CellCycle_Barchart_Ncells"
plotList[[tag]] <- ggplot2::ggplot(
data=dfRes, ggplot2::aes(x= sampleName, y=Ncells, fill=Phase)
) + ggplot2::geom_bar(stat="identity", colour="black"
) + ggplot2::coord_flip() + ggplot2::theme_bw() + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
) + ggplot2::labs(title = paste0(gsub("_", " ", tag)," enriched genes") ,y = "N Cells", x = ""
)
FNbase <- paste0("Sample.", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" Sample-level cell cycle estimates. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r sample_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
tag <- "CellCycle_Barchart_Percent_Cells"
plotList[[tag]] <- ggplot2::ggplot(
data=dfRes, ggplot2::aes(x= sampleName, y=PercCells, fill=Phase)
) + ggplot2::geom_bar(stat="identity", colour="black"
) + ggplot2::coord_flip() + ggplot2::theme_bw() + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12)
) + ggplot2::labs(title = paste0(gsub("_", " ", tag)," enriched genes") ,y = "Percent Cells", x = ""
)
FNbase <- paste0("Sample.", tag, VersionPdfExt)
FN <- paste0(obj@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
figLegend <- paste0(
"**Figure ",
figureCount,
":** ",
" Sample-level cell cycle estimates. Download a pdf of this figure [here](", FNrel,")."
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste(rep("#", tocSubLevel), collapse=""), " ", tag,
"\n```{r sample_",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
returnList <- list(
"plotList" = plotList,
"chnkVec" = chnkVec,
"figureCount" = figureCount
)
})
## Done Cell Cycle Barchart ##
###############################################################################
###############################################################################
## Cell Cycle UMAP per sample ##
#' @title A method
#'
#' @description Method description
#' @param agree TBD
#' @keywords TBD
#' @export
# This function will display cell cycle phase estimates per sample ##
setGeneric(
name="doUMAP_cellCyle",
def=function(
SampleList,
obj = Obio,
figureCount = figureCount,
VersionPdfExt = ".pdf",
tocSubLevel = 4,
dotsize = 0.5,
s.genes = NULL,
g2m.genes = NULL,
cellCycleRefFile = NULL #paste0(hpc.mount, "Projects/reference_data/cell_cycle_vignette_files/nestorawa_forcellcycle_expressionMatrix.txt")
) {
###############################################################################
## Make plots ##
library(Seurat)
if (is.null(s.genes) | is.null(g2m.genes)){
exp.mat <- read.table(file = cellCycleRefFile, header = TRUE,
as.is = TRUE, row.names = 1)
# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat. We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
}
print(paste0("Used as S-phase marker genes: ", sort(unique(paste(s.genes, collapse = ", ")))))
print(paste0("Used as G2M-phase marker genes: ", sort(unique(paste(g2m.genes, collapse = ", ")))))
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
sampleNames <- as.vector(names(obj@sampleDetailList))
## Calculate cell cycle scores for each individual sample ##
for (i in 1:length(sampleNames)){
# Create our Seurat object and complete the initalization steps
Idents(SampleList[[sampleNames[i]]]) <- "sampleID"
SampleList[[sampleNames[i]]] <- CellCycleScoring(
SampleList[[sampleNames[i]]],
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = TRUE
)
###############################################################################
## Make one UMAP plot per sample ##
sampleVec <- sampleNames[i]
dfPlot <- SampleList[[sampleNames[i]]]@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
## Get UMAP coordinates ##
coord <- data.frame(SampleList[[sampleNames[i]]]@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
## Add cluster colors ##
#dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
#library(scales)
#clusterCols = hue_pal()(length(clusterVec))
#dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)
tag <- paste0("UMAP_Cell_Cycle_Plot_", sampleNames[i])
plotList[[tag]] <- ggplot2::ggplot(data=dfPlot[dfPlot$included == "+",], ggplot2::aes(UMAP_1, UMAP_2, color=Phase)
) + ggplot2::geom_point( shape=16, size = as.numeric(dotsize)
) + ggplot2::xlab("UMAP1") + ggplot2::ylab("UMAP2"
) + ggplot2::theme_bw(
) + ggplot2::theme(
axis.text.y = ggplot2::element_text(size=8),
axis.text.x = ggplot2::element_text(size=8),
axis.title.y = ggplot2::element_text(size=8),
axis.title.x = ggplot2::element_text(size=8),
axis.line = ggplot2::element_line(colour = "black"),
panel.border = ggplot2::element_rect(colour = "black", fill=NA, size=1),
plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
legend.title = ggplot2::element_blank()
) + ggplot2::guides(col = ggplot2::guide_legend(override.aes = list(shape = 16, size = 5))
) + ggplot2::ggtitle(paste0(gsub("_", " ",tag))
) + ggplot2::xlim(minX, maxX) + ggplot2::ylim(minY, maxY
) + ggplot2::coord_fixed(ratio=1
)
if (length(unique(dfPlot$Cluster)) > 15){
plotList[[tag]] <- plotList[[tag]] + ggplot2::theme(legend.position = "none")
}
FNbase <- paste0(tag, VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
pdf(FN)
print(plotList[[tag]])
dev.off()
figLegend <- paste0(
'**Figure ',
figureCount,
':** ',
' Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure <a href="',FNrel,'" target="_blank">here</a>.'
)
figureCount <- figureCount + 1
NewChnk <- paste0(
paste("#### ", tag),
"\n```{r ",
tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
figLegend,"'}\n",
"\n",
"\n print(plotList[['",tag,"']])",
"\n cat( '\n')",
"\n\n\n```\n"
)
chnkVec <- c(
chnkVec,
NewChnk
)
## Done making one umap plot per sample ##
###############################################################################
} ## End looping through samples
returnList <- list(
"plotList" = plotList,
"chnkVec" = chnkVec,
"figureCount" = figureCount
)
})
## Done Cell Cycle UMAP per sample ##
###############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.