#' @importFrom utils read.table
#' @importFrom Matrix readMM
NULL
#' @title Read 10x matrix
#' @description This function reads a matrix generated by the 10x processing pipeline
#' from the specified directory and returns it. It aborts if one of the required
#' files in the specified directory do not exist.
#' @param path location of 10x output
#' @param version version of 10x output to read, must be one of 'V2' or 'V3'
#' @param transcript.id transcript identifier to use, can be SYMBOL or ENSEMBL
#' @return read matrix
#' @import Matrix
#' @import methods
#' @export read10xMatrix
read10xMatrix <- function(path, version='V2', transcript.id = 'SYMBOL') {
if(version == 'V2') {
unpackFunction <- I
suffix <- ''
} else if (version == 'V3') {
unpackFunction <- gzfile
suffix <- '.gz'
} else {
stop('Unknown file version!')
}
if(transcript.id == 'SYMBOL') {
transcript.id.col.idx = 2
} else if (transcript.id == 'ENSEMBL') {
transcript.id.col.idx = 1
} else {
stop('Unknown transcript identifier')
}
matrixFile <- paste0(path, '/matrix.mtx', suffix);
if (version == 'V2') {
genesFile <- paste0(path, '/genes.tsv', suffix);
} else if (version == 'V3') {
genesFile <- paste0(path, '/features.tsv', suffix);
}
barcodesFile <- paste0(path, '/barcodes.tsv', suffix);
if (!file.exists(matrixFile)) { stop('Matrix file does not exist'); }
if (!file.exists(genesFile)) { stop('Genes file does not exist'); }
if (!file.exists(barcodesFile)) { stop('Barcodes file does not exist'); }
x <- as(Matrix::readMM(unpackFunction(matrixFile)), 'dgCMatrix')
genes <- read.table(unpackFunction(genesFile));
rownames(x) <- genes[,transcript.id.col.idx];
barcodes <- read.table(unpackFunction(barcodesFile));
colnames(x) <- barcodes[,1]
invisible(x);
}
#' @title read multiple 10x matrices into a single sparse array
#' @description given a named list of paths of 10X matrices return a single large matrix
#' with all the data and cell prefixed with the corresponding sample name
#' @param paths named vector of location of the data (readable by read10Xmatrix())
#' @param min.common.genes minimum number of common genes to allow
#' @param common.genes logical, subset all matrices to common genes, required for merge
#' @param merge logical, merge all the matrices to one, requires common.genes and prefix.cells
#' @param prefix.cells prefix all cells with the name of the respective path in paths
#' @param prefix.sep separator for prefix of cells
#' @return a sparce matrix of the Matrix package that contains all the data prefixes by the corresponding sample name
#' @export read10Xmatrices
read10Xmatrices <- function(paths, min.common.genes = 1000, common.genes = FALSE, merge =FALSE, prefix.cells=FALSE,
prefix.sep = '_') {
if (merge && !common.genes) stop("Can't merge matrices if common.genes is not set. Aborting.");
if (merge && !prefix.cells) stop("Can't merge matrices if prefix.cells is not set. Aborting.");
# Read the matrices one by one
matrices <- sapply(paths, read10xMatrix)
## Prefix the arrays
if (prefix.cells) {
matrices <- mapply(
function(m, name) {
colnames(m) <- paste(name, colnames(m), sep=prefix.sep);
m
},
matrices,
names(matrices)
)
}
## Merge the arrays
if (merge) {
## Get the genes in each array
genelists <- lapply(matrices, function(x) rownames(x))
## Find the common genes
commongenes <- Reduce(intersect,genelists)
## Stop if common genes too low
if (length(commongenes) < min.common.genes) stop('The number of common genes is too low!');
# Subset to common genes
matrices <- mapply(
function(m, name) {
m[commongenes,]
},
matrices,
names(matrices)
)
if (merge) {
matrices <- Reduce(cbind, matrices)
}
}
## Return
matrices
}
#' @title read multiple 10x matrices and return as a list
#' @description given a named list of paths of 10X matrices return a list of matrices
#' @param matrices a names list of paths to the matrices (that can be read by read10XMatrix)
#' @return a list
#' @export readMultiple10XmatricesAsList
readMultiple10XmatricesAsList <- function(pathList) {
# Read the matrices one by one
matrices <- sapply(pathList, read10xMatrix)
invisible(matrices)
}
#' Reads in an expression matrix as formatted by the Klein lab
#' pipeline
#' @param path path of the file
#' @param prefix prefix to add to the cell names
#' @return a sparse matrix
#' @import Matrix
#' @export readKleinMatrix
readKleinMatrix <- function(path, prefix) {
require(Matrix)
matrix <- read.table(path, header=T, row.names=1, sep='\t', as.is=T, stringsAsFactors=F)
matrix <- data.matrix(matrix)
rownames(matrix) <- paste0(prefix, '_' ,rownames(matrix))
matrix <- Matrix(t(matrix), sparse=T)
matrix
}
#' Reads in multiple klein matrices and returns them in a list
#' @param file.names named list of the files to load, the names will become prefixes
#' @return a list of sparse matrices
#' @export readMultipleKleinMatrices
readMultipleKleinMatrices <- function(file.names) {
mapply(readKleinMatrix, file.names, names(file.names))
}
#' Reads in a data matrix from the in house indrop pipeline
#' @param name prefix to give to cells
#' @param path the file path
#' @return a sparse expression matrix
#' @export readInDropMatrix
readInDropMatrix <- function(name, path) {
m <- readRDS(path);
m <- m$cm
colnames(m) <- paste(name, colnames(m), sep='_')
m
}
#' Reads in multiple indrop matrices
#' @param file.names named vector of filenames, names will become prefixes
#' @return list of matrices
#' @export readMultipleInDropMatrices
readMultipleInDropMatrices <- function(file.names) {
mapply(readInDropMatrix, names(file.names),file.names)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.