#' @title Filter a count table using a table of DE from wrapperDeseq2
#' @description This function generates counts, FPKM and TPM tables including only the differentially expressed genes present in the set of DE generated with wrapperDeseq2.
#' @param data.folder, a character string indicating the paths of rnaseqCounts/mirnaCounts output folders
#' @param type, character with three options: gene, isoform, mirna.
#' @author Raffaele Calogero
#'
#' @return Returns counts, fpkm, tpm data frames for gene and isoforms in countsDE.txt, log2fpkmDE.txt and in log2TPMDE.txt
#' @examples
#'\dontrun{
#' system("wget 130.192.119.59/public/test.analysis.zip")
#' unzip("test.analysis.zip")
#' setwd("test.analysis")
#' library(docker4seq)
#' wrapperDeseq2(output.folder=getwd(), group="docker", experiment.table="_counts.txt", log2fc=1,
#' fdr=0.1, ref.covar="Cov.1", type="gene", batch=FALSE))
#'
#' filterCounts(data.folder=getwd(), type="gene")
#'
#' }
#' @export
filterCounts <- function(data.folder, type=c("gene", "isoform", "mirna")){
#remembering actual folder
home <- getwd()
#setting rsem output folder as working dir
setwd(data.folder)
#initialize status
system("echo 1 > ExitStatusFile 2>&1")
dir <- dir()
# print(dir,"\n")
if(type=="mirna"){
counts.file <- dir[grep("counts.txt", dir)]
cpm.file <- dir[grep("cpm.txt", dir)]
if(length(counts.file) == 0){
cat("\nIt seems that counts files generated by mirnaCounts function are not present\n")
system("echo 1 > ExitStatusFile 2>&1")
setwd(home)
return(1)
}
if(length(cpm.file) == 0){
cat("\nIt seems that cpm file generated by mirnaCounts function is not present\n")
system("echo 1 > ExitStatusFile 2>&1")
setwd(home)
return(2)
}
de.file <- dir[grep("^DEfiltered", dir)]
if(length(de.file) == 0){
cat("\nIt seems that DE generated by wrapperDeseq2 function is not present\n")
system("echo 3 > ExitStatusFile 2>&1")
setwd(home)
return(3)
}
if(length(de.file) > 1){
cat("\nYou have more then one file with prefix Defilter, please remove all except one.\n")
system("echo 4 > ExitStatusFile 2>&1")
setwd(home)
return(4)
}
for(i in counts.file){
counts.df <- read.table(i, sep="\t", header=T, row.names=1, stringsAsFactors = F)
de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
counts.df <- counts.df[which(rownames(counts.df)%in%rownames(de.df)),]
write.table(counts.df, paste("DEfiltered_",i, sep=""),sep="\t", col.names=NA)
}
cpm.df <- read.table(cpm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
cpm.df <- cpm.df[which(rownames(cpm.df)%in%rownames(de.df)),]
write.table(cpm.df, paste("DEfiltered_",cpm.file, sep=""),sep="\t")
cpm.mean <- apply(cpm.df, 1, function(x){log2(x+1) - mean(log2(x+1))})
cpm.mean <- t(cpm.mean)
write.table(cpm.mean, paste("DEfiltered-mean-centered_",cpm.file, sep=""),sep="\t", col.names=NA)
}
else if(type=="gene"){
de.file <- dir[intersect(grep("^DEfiltered", dir),grep("gene", dir))]
if(length(de.file) == 0){
de.file <- dir[grep("^filtered_ANOVAlike_", dir)]
}
if(length(de.file) == 0){
cat("\nIt seems that DE generated by wrapperDeseq2 function is not present\n")
system("echo 3 > ExitStatusFile 2>&1")
setwd(home)
return(3)
}
counts.file <- dir[grep("^_counts.txt$", dir)]
if(length(counts.file) == 0){
cat("\nIt seems that _counts.txt file is not present\n")
system("echo 2 > ExitStatusFile 2>&1")
setwd(home)
return(2)
}
fpkm.file <- dir[grep("^_log2FPKM.txt$", dir)]
if(length(counts.file) == 0){
cat("\nIt seems that _log2FPKM.txt is not present\n")
system("echo 1 > ExitStatusFile 2>&1")
setwd(home)
return(1)
}
tpm.file <- dir[grep("^_log2TPM.txt$", dir)]
if(length(counts.file) == 0){
cat("\nIt seems that _log2TPM.txt is not present\n")
system("echo 1 > ExitStatusFile 2>&1")
setwd(home)
return(1)
}
counts.df <- read.table(counts.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
counts.df <- counts.df[which(rownames(counts.df)%in%rownames(de.df)),]
write.table(counts.df, paste("DEfiltered",counts.file, sep=""),sep="\t", col.names=NA)
fpkm.df <- read.table(fpkm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
fpkm.df <- fpkm.df[which(rownames(fpkm.df)%in%rownames(de.df)),]
write.table(fpkm.df, paste("DEfiltered_",fpkm.file, sep=""),sep="\t", col.names=NA)
fpkm.mean <- apply(fpkm.df, 1, function(x){x - mean(x)})
fpkm.mean <- t(fpkm.mean)
write.table(fpkm.mean, paste("DEfiltered-mean-centered",fpkm.file, sep=""),sep="\t", col.names=NA)
tpm.df <- read.table(tpm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
tpm.df <- tpm.df[which(rownames(tpm.df)%in%rownames(de.df)),]
write.table(tpm.df, paste("DEfiltered_",tpm.file, sep=""),sep="\t", col.names=NA)
tpm.mean <- apply(tpm.df, 1, function(x){x - mean(x)})
tpm.mean <- t(tpm.mean)
write.table(tpm.mean, paste("DEfiltered-mean-centered",tpm.file, sep=""),sep="\t", col.names=NA)
}else if(type=="isoform"){
de.file <- dir[intersect(grep("^DEfiltered", dir),grep("isoform", dir))]
if(length(de.file) == 0){
cat("\nIt seems that DE generated by wrapperDeseq2 function is not present\n")
system("echo 3 > ExitStatusFile 2>&1")
setwd(home)
return(3)
}
counts.file <- dir[grep("^_isoforms_counts.txt$", dir)]
if(length(counts.file) == 0){
cat("\nIt seems that _counts.txt file is not present\n")
system("echo 2 > ExitStatusFile 2>&1")
setwd(home)
return(2)
}
fpkm.file <- dir[grep("^_isoforms_log2FPKM.txt$", dir)]
if(length(counts.file) == 0){
cat("\nIt seems that _log2FPKM.txt is not present\n")
system("echo 1 > ExitStatusFile 2>&1")
setwd(home)
return(1)
}
tpm.file <- dir[grep("^_isoforms_log2TPM.txt$", dir)]
if(length(counts.file) == 0){
cat("\nIt seems that _log2TPM.txt is not present\n")
system("echo 1 > ExitStatusFile 2>&1")
setwd(home)
return(1)
}
counts.df <- read.table(counts.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
counts.df <- counts.df[which(rownames(counts.df)%in%rownames(de.df)),]
write.table(counts.df, paste("DEfiltered",counts.file, sep=""),sep="\t", col.names=NA)
fpkm.df <- read.table(fpkm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
fpkm.df <- fpkm.df[which(rownames(fpkm.df)%in%rownames(de.df)),]
write.table(fpkm.df, paste("DEfiltered_",fpkm.file, sep=""),sep="\t", col.names=NA)
fpkm.mean <- apply(fpkm.df, 1, function(x){x - mean(x)})
fpkm.mean <- t(fpkm.mean)
write.table(fpkm.mean, paste("DEfiltered-mean-centered",fpkm.file, sep=""),sep="\t")
tpm.df <- read.table(tpm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
tpm.df <- tpm.df[which(rownames(tpm.df)%in%rownames(de.df)),]
write.table(tpm.df, paste("DEfiltered_",tpm.file, sep=""),sep="\t", col.names=NA)
tpm.mean <- apply(tpm.df, 1, function(x){x - mean(x)})
tpm.mean <- t(tpm.mean)
write.table(tpm.mean, paste("DEfiltered-mean-centered",tpm.file, sep=""),sep="\t", col.names=NA)
}
system("echo 0 > ExitStatusFile 2>&1")
setwd(home)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.