library(data.table)
library(dropbead)
# load a DGE to test
d <- data.frame(fread("zcat < /data/rajewsky/home/nkarais/projects/hek3t3/raw/macosko/SpeciesMix_ThousandSTAMPs_50cellspermicroliter/SRR1748411/subset/dge.txt.gz"), row.names = 1)
m <- new("MixedSpeciesSample", species1="human", species2="mouse", dge=d)
sh <- splitMixedSpeciesSampleToSingleSpecies(m, 0.9)[[1]]
sm <- splitMixedSpeciesSampleToSingleSpecies(m, 0.9)[[2]]
do <- data.frame(fread("zcat < /mydaten/projects/hek3t3/data/ds_012_50/dge.txt.gz"), row.names = 1)
mo <- new("MixedSpeciesSample", species1="human", species2="mouse", dge=do)
sho <- splitMixedSpeciesSampleToSingleSpecies(mo, 0.9)[[1]]
smo <- splitMixedSpeciesSampleToSingleSpecies(mo, 0.9)[[2]]
d.hek.50 <- data.frame(fread("zcat < /mydaten/hek3t3fly/Rajewsky/NR_CK_020/ds_009_50/dge.txt.gz"), row.names = 1)
o.50 <- new("MixedSpeciesSample", species1="human", species2="mouse", dge=d.hek.50)
createSamplesForTesting <- function() {
genes <- c(sample(letters, 25), sample(paste0(letters, letters), 25),
sample(LETTERS, 25), sample(paste0(LETTERS, LETTERS), 25))
cells <- c()
for (i in 1:510) {
cells <- c(cells, paste(sample(c("A", "C", "G", "T"), 12, replace=T), collapse=""))
}
for (m in c(57, 172, 311, 489)) {
cells[m] <- paste0(substr(cells[m-1], 1, 11), "N")
}
dge.human.cells <- data.frame(matrix(0, ncol=240, nrow=50))
dge.mouse.cells <- data.frame(matrix(0, ncol=260, nrow=50))
dge.full <- data.frame(matrix(0, ncol=510, nrow=100))
for (i in 1:10) {
dge.full[, i] <- round(runif(100, 0, 2*i))
}
for (i in 11:240) {
dge.full[, i] <- c(sample(4, 50, replace = T, prob = c(0.6, .15, .15, .1))-1, sample(i/2, 50, replace = T)-1)
dge.human.cells[, i-10] <- dge.full[51:100, i]
}
for (i in 241:250) {
dge.full[, i] <- c(sample(4, 50, replace = T, prob = c(0.6, .15, .15, .1))-1, sample(i/2, 50, replace = T)-1)
dge.human.cells[, i-10] <- dge.full[51:100, i]
}
for (i in 251:280) {
dge.full[, i] <- c(sample((i-240)/2, 50, replace = T)-1, sample(4, 50, replace = T, prob = c(0.6, .15, .15, .1))-1)
dge.mouse.cells[, i-250] <- dge.full[1:50, i]
}
for (i in 281:510) {
dge.full[, i] <- c(sample((i-240)/2, 50, replace = T)-1, sample(4, 50, replace = T, prob = c(0.6, .15, .15, .1))-1)
dge.mouse.cells[, i-250] <- dge.full[1:50, i]
}
rownames(dge.full) <- genes
rownames(dge.human.cells) <- genes[51:100]
rownames(dge.mouse.cells) <- genes[1:50]
names(dge.full) <- cells
names(dge.human.cells) <- names(dge.full)[11:250]
names(dge.mouse.cells) <- names(dge.full)[251:510]
single.human <- new("SingleSpeciesSample", species1="human", dge=dge.human.cells)
single.mouse <- new("SingleSpeciesSample", species1="mouse", dge=dge.mouse.cells)
mixed <- new("MixedSpeciesSample", species1="human", species2="mouse", dge=dge.full)
return (list(mixed, single.human, single.mouse))
}
testSplittingMixedToSingleSpecies <- function() {
in.silico.samples <- createSamplesForTesting()
mixed <- in.silico.samples[[1]]
single.human <- in.silico.samples[[2]]
single.mouse <- in.silico.samples[[3]]
# Test number of cells for each species
if (!table(classifyCellsAndDoublets(mixed, 0.6, min.trans=0)$species)[[1]] == dim(single.human@dge)[2]) {
print ("Error! Number of human cells doesn't match.")
} else {
print ("Testing number of human cells... Passed")
}
if (!table(classifyCellsAndDoublets(mixed, 0.6, min.trans=0)$species)[[3]] == dim(single.mouse@dge)[2]) {
print ("Error! Number of mouse cells doesn't match.")
} else {
print ("Testing number of mouse cells... Passed")
}
# Test number of genes per cell per species
if (sum(computeGenesPerCell(mixed, 0.6, min.reads = 0)[1:240, 'counts'] == computeGenesPerCell(single.human, min.reads = 0)$counts) != 240) {
print ("Error! The number of genes in human cells don't match")
} else {
print ("Testing number of genes in human cells... Passed")
}
if (sum(computeGenesPerCell(mixed, 0.6, min.reads=0)[241:500, 'counts'] == computeGenesPerCell(single.mouse, min.reads = 0)$counts) != 260) {
print ("Error! The number of genes in mouse cells don't match")
} else {
print ("Testing number of genes in mouse cells... Passed")
}
# Test number of transcripts per cell per species
if (sum(computeTranscriptsPerCell(mixed, 0.6)[1:240, 'counts'] == computeTranscriptsPerCell(single.human)$counts) != 240) {
print ("Error! The number of transcripts in human cells don't match")
} else {
print ("Testing number of transcripts in human cells... Passed")
}
if (sum(computeTranscriptsPerCell(mixed, 0.6)[241:500, 'counts'] == computeTranscriptsPerCell(single.mouse)$counts) != 260) {
print ("Error! The number of transcripts in mouse cells don't match")
} else {
print ("Testing number of transcripts in mouse cells... Passed")
}
# Testing collapsing of cells by barcodes
collapseDf <- classifyCellsAndDoublets(collapseCellsByBarcode(mixed, 0.6), 0.6)
collapseDfS <- collapseCellsByBarcode(single.human)@cells
if (sum(collapseDf[collapseDf$species == "human", ]$cell == collapseCellsByBarcode(single.human)@cells) == 238) {
print ("Testing collapsing human cells by barcode... Passed")
} else {
print ("Error! Collapsing human cells by barcodes is not the same for single and mixed species.")
}
if (sum(collapseDf[collapseDf$species == "mouse", ]$cell == collapseCellsByBarcode(single.mouse)@cells) == 258) {
print ("Testing collapsing mouse cells by barcode... Passed")
} else {
print ("Error! Collapsing mouse cells by barcodes is not the same for single and mixed species.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.