R/filterCounts.R

Defines functions filterCounts

Documented in filterCounts

#' @title Filter a count table using a table of DE from wrapperDeseq2
#' @description This function generates counts, FPKM and TPM tables including only the differentially expressed genes present in the set of DE generated with wrapperDeseq2.
#' @param data.folder, a character string indicating the paths of rnaseqCounts/mirnaCounts output folders
#' @param type, character with three options: gene, isoform, mirna.
#' @author Raffaele Calogero
#'
#' @return Returns counts, fpkm, tpm data frames for gene and isoforms in countsDE.txt, log2fpkmDE.txt and in log2TPMDE.txt
#' @examples
#'\dontrun{
#'     system("wget 130.192.119.59/public/test.analysis.zip")
#'     unzip("test.analysis.zip")
#'     setwd("test.analysis")
#'     library(docker4seq)
#'     wrapperDeseq2(output.folder=getwd(), group="docker", experiment.table="_counts.txt", log2fc=1,
#'     fdr=0.1, ref.covar="Cov.1", type="gene", batch=FALSE))
#'
#'     filterCounts(data.folder=getwd(), type="gene")
#'
#' }
#' @export
filterCounts <- function(data.folder, type=c("gene", "isoform", "mirna")){



  #remembering actual folder
  home <- getwd()
  #setting rsem output folder as working dir
  setwd(data.folder)

  #initialize status
  system("echo 1 > ExitStatusFile 2>&1")

  dir <- dir()
#  print(dir,"\n")


  if(type=="mirna"){
    counts.file <- dir[grep("counts.txt", dir)]
    cpm.file <- dir[grep("cpm.txt", dir)]
    if(length(counts.file) == 0){
      cat("\nIt seems that counts files generated by mirnaCounts function are not present\n")
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(1)
    }
    if(length(cpm.file) == 0){
      cat("\nIt seems that cpm file generated by mirnaCounts function is not present\n")
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(2)
    }
    de.file <- dir[grep("^DEfiltered", dir)]
    if(length(de.file) == 0){
      cat("\nIt seems that DE generated by wrapperDeseq2 function is not present\n")
      system("echo 3 > ExitStatusFile 2>&1")
      setwd(home)
      return(3)
    }
    if(length(de.file) > 1){
      cat("\nYou have more then one file with prefix Defilter, please remove all except one.\n")
      system("echo 4 > ExitStatusFile 2>&1")
      setwd(home)
      return(4)
    }

    for(i in counts.file){
      counts.df <- read.table(i, sep="\t", header=T, row.names=1, stringsAsFactors = F)
      de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
      counts.df <- counts.df[which(rownames(counts.df)%in%rownames(de.df)),]
      write.table(counts.df, paste("DEfiltered_",i, sep=""),sep="\t", col.names=NA)
    }

    cpm.df <- read.table(cpm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    cpm.df <- cpm.df[which(rownames(cpm.df)%in%rownames(de.df)),]
    write.table(cpm.df, paste("DEfiltered_",cpm.file, sep=""),sep="\t")
    cpm.mean <- apply(cpm.df, 1, function(x){log2(x+1) - mean(log2(x+1))})
    cpm.mean <- t(cpm.mean)
    write.table(cpm.mean, paste("DEfiltered-mean-centered_",cpm.file, sep=""),sep="\t", col.names=NA)

  }
  else if(type=="gene"){

    de.file <- dir[intersect(grep("^DEfiltered", dir),grep("gene", dir))]
    if(length(de.file) == 0){
        de.file <- dir[grep("^filtered_ANOVAlike_", dir)]
    }
    if(length(de.file) == 0){
      cat("\nIt seems that DE generated by wrapperDeseq2 function is not present\n")
      system("echo 3 > ExitStatusFile 2>&1")
      setwd(home)
      return(3)
    }

    counts.file <- dir[grep("^_counts.txt$", dir)]
    if(length(counts.file) == 0){
      cat("\nIt seems that _counts.txt file is not present\n")
      system("echo 2 > ExitStatusFile 2>&1")
      setwd(home)
      return(2)
    }

    fpkm.file <- dir[grep("^_log2FPKM.txt$", dir)]
    if(length(counts.file) == 0){
      cat("\nIt seems that _log2FPKM.txt is not present\n")
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(1)
    }

    tpm.file <- dir[grep("^_log2TPM.txt$", dir)]
    if(length(counts.file) == 0){
      cat("\nIt seems that _log2TPM.txt is not present\n")
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(1)
    }

    counts.df <- read.table(counts.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    counts.df <- counts.df[which(rownames(counts.df)%in%rownames(de.df)),]
    write.table(counts.df, paste("DEfiltered",counts.file, sep=""),sep="\t", col.names=NA)

    fpkm.df <- read.table(fpkm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    fpkm.df <- fpkm.df[which(rownames(fpkm.df)%in%rownames(de.df)),]
    write.table(fpkm.df, paste("DEfiltered_",fpkm.file, sep=""),sep="\t", col.names=NA)

    fpkm.mean <- apply(fpkm.df, 1, function(x){x - mean(x)})
    fpkm.mean <- t(fpkm.mean)
    write.table(fpkm.mean, paste("DEfiltered-mean-centered",fpkm.file, sep=""),sep="\t", col.names=NA)

    tpm.df <- read.table(tpm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    tpm.df <- tpm.df[which(rownames(tpm.df)%in%rownames(de.df)),]
    write.table(tpm.df, paste("DEfiltered_",tpm.file, sep=""),sep="\t", col.names=NA)

    tpm.mean <- apply(tpm.df, 1, function(x){x - mean(x)})
    tpm.mean <- t(tpm.mean)
    write.table(tpm.mean, paste("DEfiltered-mean-centered",tpm.file, sep=""),sep="\t", col.names=NA)

  }else if(type=="isoform"){

    de.file <- dir[intersect(grep("^DEfiltered", dir),grep("isoform", dir))]
    if(length(de.file) == 0){
      cat("\nIt seems that DE generated by wrapperDeseq2 function is not present\n")
      system("echo 3 > ExitStatusFile 2>&1")
      setwd(home)
      return(3)
    }

    counts.file <- dir[grep("^_isoforms_counts.txt$", dir)]
    if(length(counts.file) == 0){
      cat("\nIt seems that _counts.txt file is not present\n")
      system("echo 2 > ExitStatusFile 2>&1")
      setwd(home)
      return(2)
    }

    fpkm.file <- dir[grep("^_isoforms_log2FPKM.txt$", dir)]
    if(length(counts.file) == 0){
      cat("\nIt seems that _log2FPKM.txt is not present\n")
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(1)
    }

    tpm.file <- dir[grep("^_isoforms_log2TPM.txt$", dir)]
    if(length(counts.file) == 0){
      cat("\nIt seems that _log2TPM.txt is not present\n")
      system("echo 1 > ExitStatusFile 2>&1")
      setwd(home)
      return(1)
    }

    counts.df <- read.table(counts.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    de.df <- read.table(de.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    counts.df <- counts.df[which(rownames(counts.df)%in%rownames(de.df)),]
    write.table(counts.df, paste("DEfiltered",counts.file, sep=""),sep="\t", col.names=NA)

    fpkm.df <- read.table(fpkm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    fpkm.df <- fpkm.df[which(rownames(fpkm.df)%in%rownames(de.df)),]
    write.table(fpkm.df, paste("DEfiltered_",fpkm.file, sep=""),sep="\t", col.names=NA)

    fpkm.mean <- apply(fpkm.df, 1, function(x){x - mean(x)})
    fpkm.mean <- t(fpkm.mean)
    write.table(fpkm.mean, paste("DEfiltered-mean-centered",fpkm.file, sep=""),sep="\t")

    tpm.df <- read.table(tpm.file, sep="\t", header=T, row.names=1, stringsAsFactors = F)
    tpm.df <- tpm.df[which(rownames(tpm.df)%in%rownames(de.df)),]
    write.table(tpm.df, paste("DEfiltered_",tpm.file, sep=""),sep="\t", col.names=NA)

    tpm.mean <- apply(tpm.df, 1, function(x){x - mean(x)})
    tpm.mean <- t(tpm.mean)
    write.table(tpm.mean, paste("DEfiltered-mean-centered",tpm.file, sep=""),sep="\t", col.names=NA)

  }
  system("echo 0 > ExitStatusFile 2>&1")
  setwd(home)


}
kendomaniac/docker4seq documentation built on April 8, 2024, 5:39 p.m.