R/EBseq.R

Defines functions ebseq

Documented in ebseq

#' ebseq
#'
#' @importFrom EBSeq MedianNorm
#' @importFrom EBSeq GetNormalizedMat
#' @importFrom EBSeq EBTest
#' @importFrom EBSeq PostFC
#' @importFrom EBSeq GetPatterns
#' @importFrom EBSeq EBMultiTest
#' @importFrom EBSeq GetMultiPP
#' @importFrom EBSeq GetMultiFC
#' @importFrom EBSeq GetPPMat
#' @importFrom utils read.table
#' @importFrom utils write.table
#' @examples library(rnaseqviewer)
#'
#' #pairwise DEG analysis
#'
#' data(Row_count_data)
#' write.table(Row_count_data, file = "Row_count_data.txt", sep = "\t", quote = FALSE)
#' ebseq("Row_count_data.txt")
#'
#' #three conditions DEG analysis
#'
#' data("Row_count_3conditions")
#' write.table(Row_count_3conditions, file = "Row_count_3conditions.txt", sep = "\t", quote = FALSE)
#' ebseq("Row_count_3conditions.txt")
#'
#' @references Ning Leng and Christina Kendziorski (2020). EBSeq: An R package for gene and isoform differential expression analysis of RNA-seq data.
#' @param Row_count_matrix Row Count matrix txt file (Not normalized count matrix)
#' @export
#'
ebseq <- function(Row_count_matrix){
  DataMat <- data.matrix(read.table(Row_count_matrix, header = T, row.names = 1, sep = "\t"))
  if(length(grep("/", Row_count_matrix) == 1)){
    dir_name <- paste0(gsub("/[^/]+$", "", Row_count_matrix), "/")
    file_name <- gsub(gsub("/[^/]+$", "", Row_count_matrix), "", Row_count_matrix)
    file_name <- gsub("/", "",file_name)
  }else{
    dir_name <- ""
    file_name <- Row_count_matrix
  }
  file_name <- gsub("\\..+$", "", file_name)
  collist <- gsub("\\_.+$", "", colnames(DataMat))
  if (length(unique(collist)) == 2) {
    name <- paste0(paste0(unique(collist)[1], "-vs-"), paste0(unique(collist)[2], "_EBseq"))}
  if (length(unique(collist)) == 3) {
    name <- paste0(paste0(unique(collist)[1], "-vs-"),
                   paste0(paste0(unique(collist)[2], "-vs-"),
                          paste0(unique(collist)[3], "_EBseq")))}

  print(name)
  vec <- c()
  for (i in 1:length(unique(collist))) {
    num <- length(collist[collist == unique(collist)[i]])
    vec <- c(vec, num)
  }
  ngvector <- NULL
  conditions <- as.factor(rep(paste("C", 1:length(unique(collist)), sep=""), times = vec))
  Sizes <- MedianNorm(DataMat)
  NormMat <- GetNormalizedMat(DataMat, Sizes)
  output_file <-paste0(paste0(dir_name, paste0("result_of_", file_name)), paste0("_", name))
  if (length(unique(collist)) == 2) {
    EBOut <- NULL
    EBOut <- EBTest(Data = DataMat, NgVector = ngvector, Conditions = conditions, sizeFactors = Sizes, maxround = 5)
    stopifnot(!is.null(EBOut))

    PP <- as.data.frame(GetPPMat(EBOut))
    fc_res <- PostFC(EBOut)

    results <- cbind(PP, fc_res$PostFC, fc_res$RealFC,unlist(EBOut$C1Mean)[rownames(PP)], unlist(EBOut$C2Mean)[rownames(PP)])
    colnames(results) <- c("PPEE", "PPDE", "PostFC", "RealFC","C1Mean","C2Mean")
    results <- results[order(results[,"PPDE"], decreasing = TRUE),]
    write.table(results, file = paste0(output_file, ".txt"), sep = "\t")

  } else {
    patterns <- GetPatterns(conditions)
    eename <- rownames(patterns)[which(rowSums(patterns) == length(unique(collist)))]
    stopifnot(length(eename) == 1)

    MultiOut <- NULL
    MultiOut <- EBMultiTest(Data = DataMat, NgVector = ngvector, Conditions = conditions, AllParti = patterns, sizeFactors = Sizes, maxround = 5)
    stopifnot(!is.null(MultiOut))

    MultiPP <- GetMultiPP(MultiOut)

    PP <- as.data.frame(MultiPP$PP)
    pos <- which(names(PP) == eename)
    probs <- rowSums(PP[,-pos])

    results <- cbind(PP, MultiPP$MAP[rownames(PP)], probs)
    colnames(results) <- c(colnames(PP), "MAP", "PPDE")
    ord <- order(results[,"PPDE"], decreasing = TRUE)
    results <- results[ord,]
    write.table(results, file = paste0(output_file, ".txt"), sep = "\t")

    write.table(MultiPP$Patterns, file = paste(output_file, ".pattern", sep = ""), sep = "\t")

    MultiFC <- GetMultiFC(MultiOut)
    write.table(MultiFC$CondMeans[ord,], file = paste(output_file, ".condmeans", sep = ""), sep = "\t")
  }
  norm_out_file <- paste0(paste0(dir_name, paste0("Normalized_count_matrix_from_"), file_name),
                          paste0(paste0("_", name), ".txt"))
  write.table(NormMat, file = norm_out_file, sep = "\t")
}
Kan-E/rnaseqviewer documentation built on May 30, 2022, 10:34 a.m.