MixedSpeciesSample <- setClass(Class = "MixedSpeciesSample",
slots = c(species2 = "character"),
contains = "SingleSpeciesSample"
)
setMethod("initialize",
"MixedSpeciesSample",
function (.Object, species1="", species2="", cells=c(), genes=c(),
dge=data.frame()) {
.Object@species1 = species1
.Object@species2 = species2
.Object@cells = names(dge)
.Object@genes = rownames(dge)
.Object@dge = dge
.Object
})
#' Split DGE by genes of species
#'
#' Splits the DGE of a \code{MixedSpeciesSample} into two \code{SingleSpeciesSample} DGEs.
#' The splitting is done by the prefixes of the gene names. If no prefixes are provided
#' the defaults are \code{hg_} for human and \code{mm_} for mouse respectively.
#'
#' @param object A \code{MixedSpeciesSample} object.
#' @param prefix1 The prefix of the gene names for species1.
#' @param prefix2 The prefix of the gene names for species2.
#' @return A list of \code{data.frames} corresponding to the genes of the two species.
setGeneric(name = "splitDgeByGenesOfSpecies",
def = function(object, prefix1=NULL, prefix2=NULL) {
standardGeneric("splitDgeByGenesOfSpecies")})
setMethod(f = "splitDgeByGenesOfSpecies",
signature = "MixedSpeciesSample",
function(object, prefix1, prefix2) {
if (is.null(prefix1) & is.null(prefix2)) {
if (object@species1 == "human" & object@species2 == "mouse") {
object.species1 <- object@dge[grep("hg_", object@genes), ]
rownames(object.species1) <- gsub("hg_", "", rownames(object.species1))
object.species2 <- object@dge[grep("mm_", object@genes), ]
rownames(object.species2) <- gsub("mm_", "", rownames(object.species2))
}
if (object@species1 == "melanogaster" & object@species2 == "virilis") {
object.species2 <- object@dge[grep("Dvir_", object@genes), ]
object.species1 <- object@dge[setdiff(1:(length(object@genes)),
grep("Dvir_", object@genes)), ]
}
}
else {
object.species1 <- object@dge[grep(prefix1, object@genes), ]
rownames(object.species1) <- gsub(prefix1, "", rownames(object.species1))
object.species2 <- object@dge[grep(prefix2, object@genes), ]
rownames(object.species2) <- gsub(prefix2, "", rownames(object.species2))
}
return (list(object.species1, object.species2))
})
#' Classify cells to species
#'
#'
#' Classify the cells to species according to the transcripts they express.
#' The differentiation of species is performed internally and according to
#' the \code{species1} and \code{species2} labels of the sample. If the
#' classificiation is not confident enough, the cell is characterized as doublet.
#'
#' @param object A \code{MixedSpeciesSample} object.
#' @param threshold The threshold which the ratio of transcripts of one species
#' over the other has to surpass in order to succesfully assign a cell to a species.
#' Below that threshold the cell is characterized as a doublet.
#' @param min.trans The minimum number of transcripts (UMIs) required for a cell.
#' @return A \code{data.frame} containing the cell barcodes, the number of transcripts
#' per species and the characterization of the cell.
setGeneric("classifyCellsAndDoublets",
function(object, threshold=0.9, min.trans=500) {
standardGeneric("classifyCellsAndDoublets")})
setMethod("classifyCellsAndDoublets",
"MixedSpeciesSample",
function(object, threshold, min.trans) {
object.species1 <- splitDgeByGenesOfSpecies(object)[[1]]
object.species2 <- splitDgeByGenesOfSpecies(object)[[2]]
df <- data.frame("cell" = names(object@dge),
"s1" = as.vector(colSums(object.species1)),
"s2" = as.vector(colSums(object.species2)),
"species" = "", stringsAsFactors = F)
for (i in 1:dim(df)[1]) {
if (df$s1[i] + df$s2[i] < min.trans) {
df$species[i] = "undefined"
next
}
if (df$s1[i] > df$s2[i] & df$s1[i]/(df$s1[i] + df$s2[i]) > threshold) {
df$species[i] = object@species1
next
}
if (df$s2[i] > df$s1[i] & df$s2[i]/(df$s1[i] + df$s2[i]) > threshold) {
df$species[i] = object@species2
next
}
df$species[i] = "mixed"
}
names(df) <- c("cell", object@species1, object@species2, "species")
return (df)
})
#' Split DGE by genes and cells of species
#'
#' @param object A \code{MixedSpeciesSample} object.
#' @param threshold The threshold which the ratio of transcripts of one species
#' over the other has to surpass in order to succesfully assign a cell to a species.
#' @return A list \code{data.frames} representing the DGEs for each species.
setGeneric("splitDgeByGenesAndCellsOfSpecies",
function(object, threshold=0.9) {
standardGeneric("splitDgeByGenesAndCellsOfSpecies")})
setMethod("splitDgeByGenesAndCellsOfSpecies",
"MixedSpeciesSample",
function(object, threshold) {
split.dge <- splitDgeByGenesOfSpecies(object)
object.species1 <- split.dge[[1]]
object.species2 <- split.dge[[2]]
df <- classifyCellsAndDoublets(object, threshold)
object.species1 <- object.species1[, df[df$species == object@species1, ]$cell]
object.species2 <- object.species2[, df[df$species == object@species2, ]$cell]
object.species1 <- object.species1[which(rowSums(object.species1) != 0), ]
object.species2 <- object.species2[which(rowSums(object.species2) != 0), ]
return (list(object.species1, object.species2))
})
#' Separate the mixed species sample into two single species samples
#'
#' @param object A \code{MixedSpeciesSample} object.
#' @param threshold The threshold which the ratio of transcripts of one species
#' over the other has to surpass in order to succesfully assign a cell to a species.
#' @return A list of two \code{SingleSpeciesSample} objects.
setGeneric("splitMixedSpeciesSampleToSingleSpecies",
function(object, threshold=0.9) {
standardGeneric("splitMixedSpeciesSampleToSingleSpecies")})
setMethod("splitMixedSpeciesSampleToSingleSpecies",
"MixedSpeciesSample",
function(object, threshold) {
splitted.species <- splitDgeByGenesAndCellsOfSpecies(object, threshold)
s1 <- new("SingleSpeciesSample",
species1 = object@species1,
cells = object@cells,
genes = rownames(splitted.species[[1]]),
dge = splitted.species[[1]])
s2 <- new("SingleSpeciesSample",
species1 = object@species2,
cells = object@cells,
genes = rownames(splitted.species[[2]]),
dge = splitted.species[[2]])
return (list(s1, s2))
})
setMethod("computeGenesPerCell",
"MixedSpeciesSample",
function(object, min.umis, threshold=0.9) {
return(rbind.fill(lapply(splitMixedSpeciesSampleToSingleSpecies(object, threshold),
computeGenesPerCell, min.umis=min.umis)))
})
setMethod("computeTranscriptsPerCell",
"MixedSpeciesSample",
function(object, threshold=0.9) {
return (rbind.fill(lapply(splitMixedSpeciesSampleToSingleSpecies(object, threshold),
computeTranscriptsPerCell)))
})
setMethod("removeCells",
"MixedSpeciesSample",
function(object, cells) {
return (new("MixedSpeciesSample", species1=object@species1, species2=object@species2,
dge=removeCells(object@dge, cells)))
})
setMethod("removeLowQualityCells",
"MixedSpeciesSample",
function(object, min.genes) {
return (new("MixedSpeciesSample", species1=object@species1, species2=object@species2,
dge=removeLowQualityCells(object@dge, min.genes)))
})
setMethod("keepBestCells",
"MixedSpeciesSample",
function(object, num.cells, min.num.trans) {
return (new("MixedSpeciesSample", species1=object@species1, species2=object@species2,
dge=keepBestCells(object@dge, num.cells, min.num.trans)))
})
setMethod("removeLowQualityGenes",
"MixedSpeciesSample",
function(object, min.cells) {
return (new("MixedSpeciesSample", species1=object@species1, species2=object@species2,
dge=removeLowQualityGenes(object@dge, min.cells)))
})
#' List cells that are candidates for collapsing.
#'
#' Identify and list cells which share 11 bases in their barcodes and only the last
#' one is different. The cells are marked as candidates if and only if they're classified
#' as belonging to the same species.
#' @param A \code{MixedSpeciesSample} object.
setMethod("listCellsToCollapse",
"MixedSpeciesSample",
function (object, threshold=0.9) {
return(unlist(lapply(splitMixedSpeciesSampleToSingleSpecies(object, threshold),
listCellsToCollapse), recursive = F))
})
setMethod("collapseCellsByBarcode",
"MixedSpeciesSample",
function(object, threshold=0.9) {
listOfCells <- listCellsToCollapse(object, threshold)
if (length(listOfCells) == 0) {
return(object)
}
for (index in 1:length(listOfCells)) {
object@dge <- cbind(object@dge, rowSums(object@dge[, listOfCells[[index]]]))
}
object@dge <- object@dge[, !names(object@dge) %in% unlist(listOfCells)]
names(object@dge)[(length(names(object@dge)) - length(listOfCells)
+ 1):length(names(object@dge))] <- unlist(listOfCells)[seq(1, length(unlist(listOfCells)), 2)]
object@cells <- names(object@dge)
return (object)
})
#' Compare gene expression levels between two mixed species samples
#'
#' @param object1 A \code{MixedSpeciesSample} object.
#' @param object2 A \code{MixedSpeciesSample} object.
#' @param threshold1 The threshold for the firs sample.
#' @param threshold2 The threshold for the second sample.
#' @return A plot comparing the gene expression levels by species.
setMethod("compareGeneExpressionLevels",
"MixedSpeciesSample",
function(object1, object2, threshold1, threshold2, name1="sample1", name2="sample2") {
object1.species1 <- splitDgeByGenesAndCellsOfSpecies(object1, threshold1)[[1]]
object1.species2 <- splitDgeByGenesAndCellsOfSpecies(object1, threshold1)[[2]]
object2.species1 <- splitDgeByGenesAndCellsOfSpecies(object2, threshold2)[[1]]
object2.species2 <- splitDgeByGenesAndCellsOfSpecies(object2, threshold2)[[2]]
common.genes.species1 <- intersect(row.names(object1.species1), row.names(object2.species1))
common.genes.species2 <- intersect(row.names(object1.species2), row.names(object2.species2))
object1.species1 <- object1.species1[row.names(object1.species1) %in% common.genes.species1, ]
object1.species2 <- object1.species2[row.names(object1.species2) %in% common.genes.species2, ]
object2.species1 <- object2.species1[row.names(object2.species1) %in% common.genes.species1, ]
object2.species2 <- object2.species2[row.names(object2.species2) %in% common.genes.species2, ]
object1.species1 <- max(colSums(object1.species1)) * sweep(object1.species1, MARGIN=2,
FUN='/', colSums(object1.species1))
object1.species2 <- max(colSums(object1.species2)) * sweep(object1.species2, MARGIN=2,
FUN='/', colSums(object1.species2))
object2.species1 <- max(colSums(object2.species1)) * sweep(object2.species1, MARGIN=2,
FUN='/', colSums(object2.species1))
object2.species2 <- max(colSums(object2.species2)) * sweep(object2.species2, MARGIN=2,
FUN='/', colSums(object2.species2))
big.df <- data.frame("genes" = c(common.genes.species1, common.genes.species2),
"sample1" = c(log2(rowSums(object1.species1)+1),
log2(rowSums(object1.species2)+1)),
"sample2" = c(log2(rowSums(object2.species1)+1),
log2(rowSums(object2.species2)+1)),
"species" = c(rep(object1@species1, length(common.genes.species1)),
rep(object1@species2, length(common.genes.species2))))
cor.species1 <- signif(cor(big.df$sample1[big.df$species == object1@species1],
big.df$sample2[big.df$species == object1@species1],
method="pearson"), 2)
cor.species2 <- signif(cor(big.df$sample1[big.df$species == object1@species2],
big.df$sample2[big.df$species == object1@species2],
method="pearson"), 2)
levels(big.df$species) <- c(paste0(object1@species1, " (R=", cor.species1, ")"),
paste0(object1@species2, " (R=", cor.species2, ")"))
comp.plot <- (ggplot(data = big.df, aes(x = sample1, y = sample2))
+ xlab(paste0(expression(log2), " (ATPM+1) [", name1, "]"))
+ ylab(paste0(expression(log2), " (ATPM+1) [", name2, "]"))
+ geom_point(aes(col=species), alpha = 1, size = 2)
+ facet_grid(~ species, labeller = ) + guides(col = F)
+ theme_minimal() + plotCommonGrid + plotCommonTheme
+ theme(axis.ticks.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=1)))
return (comp.plot)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.