#' @title Generate gene/microRNA expression matrix from single sequencing data files downloaded from GDC Data Portal.
#'
#' @description \code{tcgaTableGenerator} Generates gene/microRNA expression matrix from single sequencing data files downloaded from GDC Data Portal. The MANIFEST.txt file should be in the same directory.
#'
#' @param dataDir The directory of downloaded sequencing data files as well as the MANIFEST.txt file.
#' @param dataType A string, 'microRNA' for microRNA-seq data or 'mRNA' for RNA-seq data.
#'
#' @return A gene/microRNA expression data matrix, with rows referring to genes/microRNAs and columns to samples.
#'
#' @export tcgaTableGenerator
#'
#' @examples
#' tcgaTableGenerator(dataDir = './TCGA_RNAseq_LUAD', dataType = 'mRNA')
#' tcgaTableGenerator(dataDir = './TCGA_miRNAseq_LUAD', dataType = 'microRNA')
tcgaTableGenerator <- function(dataDir, dataType){
manifestFile <- paste0(dataDir, '/MANIFEST.txt')
mani.v <- as.character(read.table(manifestFile, header = TRUE)[, 'id'])
mani <- mani.v[mani.v != '\\N']
exp.l <- vector(length = length(mani), mode = 'list')
for(i in 1:length(mani)){
dataDir_i <- paste0(dataDir, '/', mani[i])
f.v <- paste0(dataDir_i, '/', list.files(dataDir_i))
if(dataType == 'mRNA'){
f <- f.v[grep('FPKM.txt.gz', f.v)]
exp.l[[i]] <- read.table(gzfile(f),sep="\t", row.names = 1)
}
if(dataType == 'microRNA'){
f <- f.v[grep('.mirbase21.mirnas.', f.v)]
exp.l[[i]] <- read.table(f, sep="\t", row.names = 1, header = TRUE)
}
print(paste0('read in file ', i))
}
exp.m <- matrix(nrow = unique(unlist(lapply(exp.l, nrow))), ncol = length(exp.l), dimnames = list(row.names(exp.l[[1]]), mani))
for(i in 1:length(exp.l)){
if(dataType == 'mRNA'){
exp.m[, i] <- exp.l[[i]][, 1]
}
if(dataType == 'microRNA'){
exp.m[, i] <- exp.l[[i]][, 'reads_per_million_miRNA_mapped']
}
}
exp.m
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.