In MyersGroup/testisAtlas: testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

Merge together all experimental files which are from mice testis to create largest possible data set.

Get files from server

```{bash, eval=FALSE} find . -name "*_out_gene_exon_tagged.dge.txt.gz" -print0 | cpio -o -H ustar -0 > count_matrices.tar

# Load individual DGEs

```r
library(data.table)

files_to_merge <- list.files(path="../data/count_matrices/",pattern="*_out_gene_exon_tagged.dge.txt.gz", recursive = TRUE)

files_to_merge

# load each file
individual_dges <- list()

for (file in files_to_merge){
individual_dges[[length(individual_dges)+1]] <- fread(paste0("zcat < ../data/count_matrices/", file))
names(individual_dges)[length(individual_dges)] <- strsplit(file,"/")[[1]][1] #gsub("_out_gene_exon_tagged.dge.txt.gz","",file)
}

cell_names <- list()

for (i in 1:length(individual_dges)){
setkey(individual_dges[[i]],GENE)  
key(individual_dges[[i]])

colnames(individual_dges[[i]]) <- paste0(colnames(individual_dges[[i]]),".",names(individual_dges)[i])
colnames(individual_dges[[i]])[1] <- "GENE"

cell_names[[i]] <- colnames(individual_dges[[i]])
}

names(cell_names) <- names(individual_dges)

#check duplicates in cell barcode
unlist(cell_names)[duplicated(unlist(cell_names))]

Summarise each DGE

& check for possible duplicates

merged2 <- data.table(GENE = character(), experiment = character(), expression=numeric())
setkey(merged2,GENE)

for (i in 1:length(individual_dges)){
  experiment_summary <- data.table(GENE = individual_dges[[i]]$GENE,
                                   experiment = colnames(individual_dges[[i]])[2],
                                   expression = rowSums(individual_dges[[i]][,-"GENE",with=FALSE]))
  merged2 <- rbind(merged2, experiment_summary)
}

merged_cast <- dcast(merged2, GENE ~ substring(experiment, 14), value.var="expression")

duplicated_genes <- merged_cast[duplicated(tolower(GENE))]$GENE

merged_cast[toupper(GENE) %in% toupper(duplicated_genes)][order(toupper(GENE))]

Merge DGEs

merged <- data.table(GENE = c("NOT_A_GENE_TEMP"))
setkey(merged,GENE)

for (i in 1:length(individual_dges)){
merged <- merge(merged, individual_dges[[i]], all=TRUE)
}

merged <- merged[GENE != "NOT_A_GENE_TEMP"]

individual_dges <- NULL
gc()

recheck for duplicate genes

sum(duplicated(merged$GENE))
merged$GENE[duplicated(toupper(merged$GENE))]
#data.table(t(merged[toupper(GENE)=="ATF7"]), keep.rownames = T)[is.na(V2)]

Convert NA to 0

library(Matrix)
# remove cells with no counts!
zero_cells <- which(colSums(merged[,!"GENE", with=FALSE], na.rm=TRUE)==0)

merged[, (zero_cells+1) := NULL]

# replace NA with 0
f_dowle3 = function(DT) {
  for (j in seq_len(ncol(DT))){
    set(DT,which(is.na(DT[[j]])),j,0)
  }
  return(DT)
}

merged <- f_dowle3(merged)

Save as sparse matrix

merged_M <- Matrix(as.matrix(merged[, -c("GENE"), with=FALSE]), sparse = TRUE)
rownames(merged_M) <- merged$GENE

str(merged_M)
saveRDS(merged_M, "../data/count_matrices/merged_dge_sparse.rds")

digest::digest(merged_M)

MyersGroup/testisAtlas documentation built on Nov. 29, 2020, 9:21 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

MyersGroup/testisAtlas
testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

In MyersGroup/testisAtlas: testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

Get files from server

Summarise each DGE

Merge DGEs

Convert NA to 0

Save as sparse matrix

R Package Documentation

Browse R Packages

We want your feedback!

MyersGroup/testisAtlas testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

In MyersGroup/testisAtlas: testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq

Get files from server

Summarise each DGE

Merge DGEs

Convert NA to 0

Save as sparse matrix

R Package Documentation

Browse R Packages

We want your feedback!

MyersGroup/testisAtlas
testisAtlas: R functions and analysis scripts used in the analysis of testis single cell RNAseq