# Reading 10x cellranger data using the output directory
library(tidyverse)
get.10x.data <- function(path, mtx_name = "matrix.mtx", genes_name = "genes.tsv", barcodes_name = "barcodes.tsv"){
barcodes <- read.table(file.path(path, barcodes_name), comment.char = "%")
genes <- read.table(file.path(path, genes_name), comment.char = "%", sep = "\t")
mtx <- read.table(file.path(path, mtx_name), comment.char = "%", sep = " ")[-1,]
A <- Matrix::sparseMatrix(i = mtx[,1], j = mtx[,2], x = mtx[,3])
row.names(A) <- genes[1:range(mtx[,1])[2],1]
colnames(A) <- barcodes[1:range(mtx[,2])[2],1]
return(A)
}
# Print the left top corner of a given dataframe or matrix
#' Corner
#'
#' Getting the top left corner of a dataframe, tibble or matrix.
#'
#' @param df Dataframe, matrix or tibble to be shown
corner <- function(df) df[1:min(c(5,nrow(df))),
1:min(c(5,ncol(df)))]
#' Generates a seurat object of 10x raw data
#'
#' Using the 10x output path with the genes, barcodes and matrix files.
#'
#' @param path Path to the 10x output
#' @param cell.name.addition Prefix for each cell barcode
#' @param min.genes Minimal number of genes which need to be detected per barcode to keep the barcode
#' @return Seurat object
make.seurat.object <- function(path,
cell.name.addition = NULL, min.genes = 100, project = "SeuratProject"){
A <- get.10x.data(path)
gene.annot <- read.table(file.path(path, "genes.tsv"), comment.char = "%", sep = "\t")
colnames(gene.annot) <- c("GeneID", "GeneName")
A <- Seurat::CreateSeuratObject(raw.data = A, min.genes = min.genes, project = project)
A@misc <- list(geneAnnotation = gene.annot)
return(A)
}
# Produces a meta data table for each cell
# Can be used for the novel nuclei droplet identification
generate.frac.intron.meta <- function(exon, fullTranscript){
if(!is.matrix(fullTranscript) & !is.matrix(exon)){
ft <- fullTranscript@raw.data
ex <- exon@raw.data
}
keep.cells <- intersect(colnames(ft), colnames(ex))
cat("Number of cells in both sets:",length(keep.cells), "\n")
ex <- ex[,keep.cells]
ft <- ft[,keep.cells]
ex.sum <- as.data.frame(Matrix::colSums(ex))
colnames(ex.sum) = "exonic"
ft.sum <- as.data.frame(Matrix::colSums(ft))
colnames(ft.sum) <- "fullTranscript"
cells.meta <- merge(ft.sum, ex.sum, by = 0)
colnames(cells.meta)[1] <- "CellID"
cells.meta <- dplyr::filter(cells.meta, fullTranscript > 0)
cells.meta <- cells.meta %>% arrange(desc(fullTranscript)) %>%
mutate(CellRank = 1:nrow(cells.meta)) %>%
mutate(fracIntronic = 1 - (exonic / fullTranscript))
return(cells.meta)
}
# Plot the novel nuclei droplet identification plot
frac.intron.plot <- function(exon=NULL, fulltranscript=NULL, meta.data = NULL,
frac.cut = NULL, rank.cut = NULL,
frac.cut.max = NULL, rank.cut.min = NULL){
if(is.null(meta.data) & !is.null(exon) & !is.null(fulltranscript)){
meta.data <- generate.frac.intron.meta(exon, fulltranscript)
}else if(!is.null(meta.data) & !is.null(exon) & !is.null(fulltranscript)){
stop("Please provide either a meta.data dataframe, generated by generate.frac.intron.meta,\n or provide exon and intron matrices / seurat objects.")
}
p <- ggplot(meta.data, aes(x = CellRank, y = fracIntronic)) +
geom_point(size = 0.1) +
theme_gray() +
ylab("Fraction intronic UMI") +
xlab("Full transcript UMI cell rank")
if(!is.null(frac.cut)){
p <- p + geom_hline(yintercept = frac.cut)
}
if(!is.null(frac.cut.max)){
p <- p + geom_hline(yintercept = frac.cut.max, color = "red")
}
if(!is.null(rank.cut)){
p <- p + geom_vline(xintercept = rank.cut)
}
if(!is.null(rank.cut.min)){
p <- p + geom_vline(xintercept = rank.cut.min, color = "red")
}
return(p)
}
#' Filtering droplets
#'
#' Filters droplets based on their properties, calculated using generate.frac.intron.meta
#'
#' @param sobj.exon Seurat object, pretreated with generate.frac.intron.meta
filter.nuc.droplets <- function(sobj.exon, min.frac.intronic = 0.5, max.rank = NULL,
max.frac.intronic = NULL, min.rank = NULL){
meta.cell <- sobj.exon@misc$cells.meta
meta.cell <- meta.cell %>% filter(fracIntronic >= min.frac.intronic)
if(!is.null(max.rank)){
meta.cell <- meta.cell %>% filter(CellRank <= max.rank)
}
if(!is.null(max.frac.intronic)){
meta.cell <- meta.cell %>% filter(fracIntronic <= max.frac.intronic)
}
if(!is.null(min.rank)){
meta.cell <- meta.cell %>% filter(CellRank >= min.rank)
}
meta.cell <- filter(meta.cell, CellID %in% rownames(sobj.exon@meta.data))
return(SubsetData(sobj.exon, cells.use = meta.cell$CellID))
}
#' Classical kneepoint plot
#'
#' Generates a kneepoint plot of the seurat object.
make.knee.plot <- function(sobj, max_rank = 2e4){
meta <- sobj@meta.data
meta <- meta %>% arrange(desc(nUMI)) %>%
mutate(cumSum = cumsum(nUMI)) %>%
mutate(UMIrank = 1:nrow(meta)) %>%
filter(UMIrank < max_rank)
ggplot(meta, aes(x = UMIrank, y = cumSum)) +
geom_point() +
theme_gray()
}
#' Filters out lowly expressed genes
#'
#' Remove genes from the dataset which are lowly expressed.
#'
#' @param sobj Seurat object
#' @param min_cells_expressed In how many cells needs a gene being expressed to be accepted
filter.expressed.genes <- function(sobj, min_cells_expressed = 25){
exp <- sobj@data
exp <- exp[Matrix::rowSums(exp > 0) >= min_cells_expressed, ]
sobj@data <- exp
return(sobj)
}
#' Exports a seurat object as CSV files
#'
#' This function creates a CSV file for:
#'
#' - the data field
#' - the raw.data field
#' - the UMAP embedding
#'
#' @param obj Seurat object
#' @param folderpath Folder to export to. If not existent, it will be created.
#' @param sample.name Prefix for each exported CSV
#' @param alter.gene.names Dataframe to convert the names of the Genes.
#' @param alter.gene.names.from Column name of the alter.gene.names dataframe which corresponds to the row.names in the data and raw.data slot of the seurat object
#' @param alter.gene.names.to Column name which contains the new gene names in the alter.gene.names dataframe
#'
export.csvs <- function(obj, folderpath, sample.name = "seurat",
alter.gene.names.df = NULL,
alter.gene.names.from = "Gene.stable.ID",
alter.gene.names.to = "Gene.name",
min.cells = 10){
exprs <- obj@data
umi <- obj@raw.data
emb <- obj@dr$umap@cell.embeddings
dir.create(folderpath, recursive = T)
if(!is.null(alter.gene.names.df)){
cat("Renaming the genes\n")
exprs <- exprs[row.names(exprs) %in% alter.gene.names.df[[alter.gene.names.from]], ]
row.names(exprs) <- alter.gene.names.df[[alter.gene.names.to]][match(row.names(exprs), alter.gene.names.df[[alter.gene.names.from]])]
umi <- umi[row.names(umi) %in% alter.gene.names.df[[alter.gene.names.from]], ]
row.names(umi) <- alter.gene.names.df[[alter.gene.names.to]][match(row.names(umi), alter.gene.names.df[[alter.gene.names.from]])]
}
exprs <- exprs[,match(colnames(exprs), row.names(emb))[!is.na(match(colnames(exprs), row.names(emb)))]]
umi <- umi[,match(colnames(umi), row.names(emb))[!is.na(match(colnames(umi), row.names(emb)))]]
exprs.gz <- gzfile(file.path(folderpath, paste0(sample.name,"_exprs.csv.gz")), "w" )
umi.gz <- gzfile(file.path(folderpath, paste0(sample.name,"_umi.csv.gz")), "w")
cat("Writing the embedding\n")
write.table(as.matrix(emb), file = file.path(folderpath, paste0(sample.name, "_embedding.csv")),
quote = F, sep = ",", col.names = F)
cat("Writing the expression matrix\n")
write.table(as.matrix(exprs), file = exprs.gz, quote = F, sep = ",")
close(exprs.gz)
cat("Writing the UMI counts\n")
write.table(as.matrix(umi[Matrix::rowSums(umi > 0) > min.cells, ]), file = umi.gz, quote = F, sep = ",")
close(umi.gz)
}
MergeSeuratMulti <- function(sobj.list){
out.merged <- NULL
for(n in names(sobj.list)){
tmp = sobj.list[[n]]
cat("Processing", n, "\n")
if(is.null(out.merged)){
out.merged <- tmp
}else{
out.merged <- MergeSeurat(out.merged, tmp, do.normalize = F, do.scale = F, do.center = F)
}
}
return(out.merged)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.