R/transcriptome.R

Defines functions DESeq2_ysx multipleGroupDEgenes normalizedExpr2DistribBoxplot deseq2normalizedExpr readscount2deseq salmon2deseq

Documented in deseq2normalizedExpr DESeq2_ysx multipleGroupDEgenes normalizedExpr2DistribBoxplot readscount2deseq salmon2deseq

# Some useful keyboard shortcuts for package authoring:
#
#   Build and Reload Package:  'Ctrl + Shift + B'
#   Check Package:             'Ctrl + Shift + E'
#   Test Package:              'Ctrl + Shift + T'
#   Generate DOC:              'Ctrl + Shift + Alt + r'



#' Iniitialize a DESeq2 object from salmon output.
#'
#' @param salmon_file_list A two-column file with the first column as sample names and
#' second column containing the path of quant.sf generated by salmon. Header line
#' is required but column names do not matter.
#'
#' Below is an example (pay attention to the **path** of quant.sf)
#'
#' ```
#' Samp	path_quant.sf
#' untrt_N61311	untrt_N61311/untrt_N61311.salmon.count/quant.sf
#' untrt_N052611	untrt_N052611/untrt_N052611.salmon.count/quant.sf
#' untrt_N080611	untrt_N080611/untrt_N080611.salmon.count/quant.sf
#' untrt_N061011	untrt_N061011/untrt_N061011.salmon.count/quant.sf
#' trt_N61311	trt_N61311/trt_N61311.salmon.count/quant.sf
#' trt_N052611	trt_N052611/trt_N052611.salmon.count/quant.sf
#' trt_N080611	trt_N080611/trt_N080611.salmon.count/quant.sf
#' trt_N061011	trt_N061011/trt_N061011.salmon.count/quant.sf
#' ```
#'
#' @param sampleFile A file containing at least two columns. The first column is sample name just
#' like the first column of `salmon_file_list`. Other columns are sample attributes.
#' Normally one of sample attributes should contain the group information each sample belongs to.
#'
#' One simple example (conditions represent group information)
#'
#' ```
#' Samp	conditions
#' untrt_N61311	untrt
#' untrt_N052611	untrt
#' untrt_N080611	untrt
#' untrt_N061011	untrt
#' trt_N61311	trt
#' trt_N052611	trt
#' trt_N080611	trt
#' trt_N061011	trt
#' ```
#'
#' Another example (3rd column meaning samples from two batches)
#'
#' ```
#' Samp	conditions  batch
#' untrt_N61311	untrt A
#' untrt_N052611	untrt A
#' untrt_N080611	untrt B
#' untrt_N061011	untrt B
#' trt_N61311	trt A
#' trt_N052611	trt A
#' trt_N080611	trt B
#' trt_N061011	trt B
#' ```
#'
#' @param design A column name from "sampleFile" like "conditions" in example.
#' This will be used as group variable for DE tests. Currently only simple
#' design is allowed. If one wants to model multiple variables, construct
#' one representation of super variable as indicated in
#' \url{https://support.bioconductor.org/p/67600/#67612} may be useful.
#'
#' @param covariate Names of columns containing informations maybe covariates
#' like batch effects or other sample info. Multiple covariates should be
#' supplied as a vector.
#'
#'
#' @param tx2gene Optional and only used if one want to get gene expression
#' instead of transcript expression.
#' A two-column file with the first column as transcript names and
#' second column as gene names. Header line is required but column names do not matter.
#'
#' Below is an example of file contents.
#'
#' ```
#' txname	gene
#' ENST00000456328	ENSG00000223972
#' ENST00000450305	ENSG00000223972
#' ENST00000488147	ENSG00000227232
#' ENST00000619216	ENSG00000278267
#' ENST00000473358	ENSG00000243485
#' ```
#'
#' @param filter Filter genes with low read counts. Default genes with total
#' reads count lower than half of number of samples will be filtered out.
#' One can give any number here. Normally default is OK. The DESeq2 will ao
#' auto filter too. Check \url{https://www.bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html}.
#'
#' @param rundeseq Default \code{TRUE}. The function will perfrom deseq analysis
#' using \code{\link[DESeq2]{DESeq}} and return analyzed DESeqDataSet object.
#' If \code{FALSE}, just return a DESeqDataSet object and one can
#' run \code{\link[DESeq2]{DESeq}}on it with more customed parameters.
#'
#' @return A DESeqDataSet object.
#' @export
#'
#' @examples
#'
#' dds <- salmon2deseq(salmon_file_list, sampleFile, "conditions")
#' dds <- salmon2deseq(salmon_file_list, tx2gene=tx2gene,
#'                     sampleFile, "conditions")
#'
#'
salmon2deseq <- function(salmon_file_list, sampleFile, design, covariate=NULL,
                         tx2gene=NULL, filter=NULL, rundeseq=T) {
  salmon_file <- read.table(salmon_file_list, header=T,  row.names=1, sep="\t")
  salmon_file_rowname <- rownames(salmon_file)
  salmon_file <- as.vector(salmon_file[,1])
  names(salmon_file) <- salmon_file_rowname

  exist_or_not <- file.exists(salmon_file)
  if(any(exist_or_not)==FALSE){
    stop(paste("The following files can not be found:",
               paste(salmon_file[!exist_or_not], collapse="\n"), sep="\n"))
  }

  if(!is.null(covariate)){
    covariate <- paste(covariate, collapse="+")
    formula <- as.formula(paste("~", covariate,"+", design))
  } else {
    formula <- as.formula(paste("~", design))
  }

  if (!is.null(tx2gene)){
    tx2gene <- read.table(tx2gene, header=T, row.names=NULL, sep="\t")
  }
  # 整合读入的salmon文件和transcript2gene文件
  txi <- tximport::tximport(salmon_file, type = "salmon", tx2gene = tx2gene)

  sample <- read.table(sampleFile, header=T, row.names=1, com='',
                       quote='', check.names=F, sep="\t")
  sample <- sample[match(colnames(txi$counts), rownames(sample)),, drop=F]

  dds <- DESeq2::DESeqDataSetFromTximport(txi, colData=sample, design=formula)
  print(paste("Read in", nrow(dds),"genes"))

  if(is.null(filter)){
    filter = nrow(sample)/2
  }

  keep <- rowSums(DESeq2::counts(dds))>filter
  dds <- dds[keep,]
  print(paste(nrow(dds),"genes remained after filtering of genes with all counts less than", nrow(sample)/2, "in all samples."))

  if(rundeseq){
    print("Perform DESeq on given datasets.")
    dds <- DESeq(dds)
  }
  return(dds)
}



#' Iniitialize a DESeq2 object from raw reads count matrix.
#'
#' @param count_matrix_file A multiple column file with the first column as gene
#' names (must be unique) and other columns as gene expression reads count in
#' related samples.
#'
#' ```
#' Gene untrt_N61311  untrt_N052611 ... trt_N61311  trt_N052611 ...
#' GeneA  2 3 ... 10  20  ...
#' GeneB  2 3 ... 100  220  ...
#' GeneC  12 33 ... 10  20  ...
#' GeneD  222 301 ... 10  20  ...
#' ```
#'
#' @inheritParams salmon2deseq
#'
#' @return A DESeqDataSet object.
#' @export
#'
#' @examples
#'
#'
#' dds <- readscount2deseq(count_matrix_file, sampleFile, "conditions")
#'
#'
readscount2deseq <- function(count_matrix_file, sampleFile, design, covariate=NULL,
                         filter=NULL, rundeseq=T) {

  data <- read.table(count_matrix_file, header=T, row.names=1, com='', quote='',
                     check.names=F, sep="\t")

  if(!is.null(covariate)){
    covariate <- paste(covariate, collapse="+")
    formula <- as.formula(paste("~", covariate,"+", design))
  } else {
    formula <- as.formula(paste("~", design))
  }

  sample <- read.table(sampleFile, header=T, row.names=1, com='',
                       quote='', check.names=F, sep="\t")
  sample <- sample[match(colnames(data), rownames(sample)),, drop=F]

  dds <- DESeqDataSetFromMatrix(countData = data,
                                colData = sample,  design=formula)

  print(paste("Read in", nrow(dds),"genes"))

  if(is.null(filter)){
    filter = nrow(sample)/2
  }

  keep <- rowSums(DESeq2::counts(dds))>filter
  dds <- dds[keep,]
  print(paste(nrow(dds),"genes remained after filtering of genes with all counts less than", nrow(sample)/2, "in all samples."))

  if(rundeseq){
    print("Perform DESeq on given datasets.")
    dds <- DESeq(dds)
  }
  return(dds)
}




#' To output normalized results to files named by given "output_prefix",
#' and also return a list containing normalized counts for downstream analysis.
#'
#' @param dds \code{\link{salmon2deseq}} or \code{\link{readscount2deseq}} or
#' \code{\link[DESeq2]{DESeq}} generated dataset.
#' @param output_prefix A string, will be used as output file name prefix.
#' @param rlog Get "rlog" transformed value for downstream correlation like analysis.
#' @param vst Get "vst" transformed value for downstream correlation like analysis. Normally faster than "rlog".
#' @param savemat Save normalized and rlog/vst matrix to file. Default T.
#' The file would be named like `output_prefix.DESeq2.normalized.xls`,
#' `output_prefix.DESeq2.normalized.rlog.xls`.
#'
#' @return A list containing normalized expression values and/or rlog, vst transformed normalized expression values.
#' @export
#'
#' @examples
#'
#' nomrexpr <- deseq2normalizedExpr(dds)
#'
deseq2normalizedExpr <- function(dds, output_prefix='ehbio', rlog=T, vst=F, savemat=T){
  #标准化后的结果按整体差异大小排序,同时输出对数转换的结果。

  # Get normalized counts
  normalized_counts <- DESeq2::counts(dds, normalized=TRUE)

  # 标准化的结果按整体差异大小排序
  normalized_counts_mad <- apply(normalized_counts, 1, mad)
  normalized_counts <- normalized_counts[order(normalized_counts_mad, decreasing=T), ]

  # 常规R输出忽略左上角(输出文件第一列也就是基因列的列名字)
  # 输出结果为完整矩阵,保留左上角的id。
  normalized_counts_output = data.frame(id=rownames(normalized_counts), normalized_counts)
  if(savemat){
	print("Output normalized counts")
    write.table(normalized_counts_output, file=paste0(output_prefix,".DESeq2.normalized.xls"),
                quote=F, sep="\t", row.names=F, col.names=T)
  }

  normexpr <- list(normalized=normalized_counts, normalizedSave=normalized_counts_output)

  if (rlog) {
    rld <- DESeq2::rlog(dds, blind=FALSE)
    rlogMat <- assay(rld)
    rlogMat_mad <- apply(rlogMat, 1, mad)
    rlogMat <- rlogMat[order(rlogMat_mad, decreasing=T), ]


    rlogMat_output = data.frame(id=rownames(rlogMat), rlogMat)
	if(savemat) {
      print("Output rlog transformed normalized counts")
      write.table(rlogMat_output, file=paste0(output_prefix,".DESeq2.normalized.rlog.xls"),
                 quote=F, sep="\t", row.names=F, col.names=T)
	}
    normexpr$rlog <- rlogMat
    normexpr$rlogSave <- rlogMat_output

  }


  if (vst) {
    rld <- DESEq2::vst(dds, blind=FALSE)
    vstMat <- assay(rld)
    vstMat_mad <- apply(vstMat, 1, mad)
    vstMat <- vstMat[order(vstMat_mad, decreasing=T), ]

    vstMat_output = data.frame(id=rownames(vstMat), vstMat)

    if(savemat){
      print("Output vst transformed normalized counts")
      write.table(vstMat_output, file=paste0(output_prefix,".DESeq2.normalized.vst.xls"),
                quote=F, sep="\t", row.names=F, col.names=T)
	}

    normexpr$vst <- vstMat
    normexpr$vstSave <- vstMat_output
  }

  return(normexpr)
}



#' Plot distribution of normalzied expression to check the normalization effect of DESeq2.
#'
#' @param normexpr A list returned by \code{\link{deseq2normalizedExpr}}.
#' @param saveplot Save plot to given file "a.pdf", "b.png".
#' @param ... Additional parameters given to \code{\link{ggsave}}.
#'
#' @return A ggplot2 object
#' @export
#'
#' @examples
#'
#'
#' normexpr <- deseq2normalizedExpr(dds)
#' normalizedExpr2DistribBoxplo(normexpr)
#'
#'
normalizedExpr2DistribBoxplot <- function(normexpr, saveplot=NULL, ...) {
  if ('rlog' %in% names(normexpr)){
    p <- widedataframe2boxplot(as.data.frame(normexpr$rlog), saveplot=saveplot, ylab="rLog transformed expression value")
  } else if ('vst' %in% names(normexpr)){
    p <- widedataframe2boxplot(as.data.frame(normexpr$vst), saveplot=saveplot, ylab="VST transformed expression value")
  }
  return(p)
}






#' DE genes analysis for two groups.
#'
#' @param dds \code{\link{DESeq}} function returned object.
#' @param groupA Group name 1.
#' @param groupB Group name 2.
#' @param design The group column name. Default "conditions".
#' @param padj Multiple-test corrected p-value. Default 0.05.
#' @param log2FC Log2 transformed fold change. Default 1.
#' @param dropCol Columns to drop in final output. Default \code{c("lfcSE", "stat")}.
#' Other options \code{"ID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj"}.
#' This has no specific usages except make the table clearer.
#' @param output_prefix A string as prefix of output files.
#' @param ... Additional parameters given to \code{\link{ggsave}}.
#'
#' @import ggplot2
#' @import pheatmap
#' @return NULL
#' @export
#'
#' @examples
#'
#' twoGroupDEgenes(dds, "trt", "untrt")
#'
twoGroupDEgenes <- function
(
  dds,
  groupA,
  groupB,
  design="conditions",
  padj=0.05,
  log2FC=1,
  # "ID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj"
  dropCol=c("lfcSE", "stat"),
  output_prefix="ehbio",
  ...
){
  #print(sampleV)
  #groupA <- as.vector(sampleV$sampA)
  #groupB <- as.vector(sampleV$sampB)
  #groupA = 'trt'
  #groupB = 'untrt'
  #design = "conditions"
  #padj = 0.05
  #log2FC = 1
  # "ID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj"
  #dropCol = c("lfcSE", "stat")
  #output_prefix = "ehbio"

  # print(groupA)
  # if(is.list(groupA)){
  #   groupA <- unlist(groupA)
  # }
  # print(groupA)
  # print(groupB)
  # if(is.list(groupB)){
  #   groupB <- unlist(groupB)
  # }
  # print(groupB)
  print(paste("DE genes between", groupA, groupB, sep=" "))
  contrastV <- c(design, groupA, groupB)
  print(contrastV)
  res <- DESeq2::results(dds,  contrast=contrastV)

  normalized_counts <- DESeq2::counts(dds, normalized=TRUE)

  baseA <- normalized_counts[, colData(dds)[[design]] == groupA]
  if (is.vector(baseA)){
    baseMeanA <- as.data.frame(baseA)
  } else {
    baseMeanA <- as.data.frame(rowMeans(baseA))
  }
  baseMeanA <- round(baseMeanA, 3)
  colnames(baseMeanA) <- groupA
  baseB <- normalized_counts[, colData(dds)[[design]] == groupB]
  if (is.vector(baseB)){
    baseMeanB <- as.data.frame(baseB)
  } else {
    baseMeanB <- as.data.frame(rowMeans(baseB))
  }


  baseMeanB <- round(baseMeanB, 3)
  colnames(baseMeanB) <- groupB
  res <- cbind(baseMeanA, baseMeanB, as.data.frame(res))
  res <- data.frame(ID=rownames(res), res)
  res$baseMean <- round(rowMeans(cbind(baseA, baseB)),3)
  res$padj[is.na(res$padj)] <- 1
  res$pvalue[is.na(res$pvalue)] <- 1
  res$log2FoldChange <- round(res$log2FoldChange,3)
  res$padj <- as.numeric(formatC(res$padj))
  res$pvalue <- as.numeric(formatC(res$pvalue))

  res <- res[order(res$padj),]

  comp314 <- paste(groupA, "_vs_", groupB, sep=".")

  file_base <- paste(output_prefix, "DESeq2", comp314, sep=".")
  file_base1 <- paste(file_base, "results.xls", sep=".")

  res_output <- res[, !(names(res) %in% dropCol), drop=F]
  write.table(res_output, file=file_base1, sep="\t", quote=F, row.names=F)

  res_de <- res[res$padj<padj, !(names(res) %in% dropCol), drop=F]
  res_de_up <- subset(res_de, res_de$log2FoldChange>=log2FC)

  file <- paste(output_prefix, "DESeq2",groupA, "_higherThan_", groupB,
                'xls', sep=".")
  write.table(as.data.frame(res_de_up), file=file, sep="\t", quote=F, row.names=F)

  res_de_up_id <- subset(res_de_up, select=c("ID"))
  file <- paste(output_prefix, "DESeq2",groupA, "_higherThan_", groupB,
                'id.xls', sep=".")
  write.table(as.data.frame(res_de_up_id), file=file, sep="\t",
              quote=F, row.names=F, col.names=F)

  if(dim(res_de_up_id)[1]>0) {
    res_de_up_id_l <- cbind(res_de_up_id, paste(groupA, "_higherThan_",groupB, sep="."))
    write.table(as.data.frame(res_de_up_id_l),
                file=paste0(output_prefix,".DESeq2.all.DE"),
                sep="\t",quote=F, row.names=F, col.names=F, append=T)
  }

  res_de_dw <- subset(res_de, res_de$log2FoldChange<=(-1)*log2FC)
  file <- paste(output_prefix, "DESeq2",groupA, "_lowerThan_", groupB,
                'xls', sep=".")
  write.table(as.data.frame(res_de_dw), file=file, sep="\t", quote=F, row.names=F)

  res_de_dw_id <- subset(res_de_dw, select=c("ID"))
  file <- paste(output_prefix, "DESeq2",groupA, "_lowerThan_", groupB,
                'id.xls', sep=".")
  write.table(as.data.frame(res_de_dw_id), file=file, sep="\t",
              quote=F, row.names=F, col.names=F)

  if(dim(res_de_dw_id)[1]>0) {
    res_de_dw_id_l <- cbind(res_de_dw_id, paste(groupA, "_lowerThan_",groupB, sep="."))
    write.table(as.data.frame(res_de_dw_id_l),
                file=paste0(output_prefix,".DESeq2.all.DE"),
                sep="\t",quote=F, row.names=F, col.names=F, append=T)
  }

  res_output$level <- ifelse(res_output$padj<=padj,
                             ifelse(res_output$log2FoldChange>=log2FC,
                                    paste(groupA,"UP"),
                             ifelse(res_output$log2FoldChange<=(-1)*(log2FC),
                                    paste(groupB,"UP"), "NoDiff")) , "NoDiff")

  volcanoPlot(res_output, "log2FoldChange", "padj",
              "level", saveplot=paste0(file_base1,".volcano.pdf"), ...)

  rankPlot(res_output, label=10, saveplot=paste0(file_base1,".rankplot.pdf"), width=20, ...)


  res_de_up_top20_id <- as.vector(head(res_de_up$ID,20))
  res_de_dw_top20_id <- as.vector(head(res_de_dw$ID,20))

  res_de_top20 <- c(res_de_up_top20_id, res_de_dw_top20_id)


  res_de_top20_expr <- normalized_counts[res_de_top20,]

  sample = as.data.frame(dds@colData)

  pheatmap::pheatmap(res_de_top20_expr, cluster_row=T, scale="row",
                     annotation_col=sample,
                     filename=paste0(file_base1,".top20DEgenes.heatmap.pdf"))

  res_de_top20_expr2 <- data.frame(Gene=rownames(res_de_top20_expr), res_de_top20_expr)
  res_de_top20_expr2 <- reshape2::melt(res_de_top20_expr, id=c("Gene"))

  colnames(res_de_top20_expr2) <- c("Gene", "Sample", "Expression")

  res_de_top20_expr2$Group <- sample[match(res_de_top20_expr2$Sample, rownames(sample)),design]

  p = ggplot(res_de_top20_expr2, aes(x=Gene, y=Expression)) +
    geom_point(aes(color=Group), alpha=0.5) +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
          axis.title.x = element_blank()) +
    ylab("Normalized xpression value") + scale_y_log10()
  ggsave(p, file=paste0(file_base1,".top20DEgenes.dotplot.pdf"), width=20,
         height=14, units="cm", ...)

}



# geneExprColDotplot <- function(data,x,y,color,ylab){
#   ggplot(res_de_top20_expr2, aes(x=Gene, y=Expression)) +
#     geom_point(aes(color=Group), alpha=0.5) + theme_classic() +
#     theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
#     axis.title.x = element_blank()) + ylab("Normalized xpression value") +
#     scale_y_log10()
# }




#' DE genes analysis for multiple groups.
#'
#' @param dds \code{\link{DESeq}} function returned object.
#' @param comparePairFile A file containing sample groups for comparing. Optional.
#' If not given, the function will use \code{colData} information in \code{dds}
#' and perform group compare for all possible combinations.
#'
#' ```
#' groupA groupB
#' groupA groupC
#' groupC groupB
#' ```
#' @inheritParams twoGroupDEgenes
#'
#' @return NULL
#' @export
#'
#' @examples
#'
#' multipleGroupDEgenes(dds)
#'
multipleGroupDEgenes <- function(
  dds,
  comparePairFile=NULL,
  design="conditions",
  padj=0.05,
  log2FC=1,
  # "ID", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj"
  dropCol=c("lfcSE", "stat"),
  output_prefix="ehbio",
  ...
){

  if (file.exists(paste0(output_prefix,".DESeq2.all.DE"))) {
    file.remove(paste0(output_prefix,".DESeq2.all.DE"))
  }

  if(!is.null(comparePairFile)){
    compare_data <- read.table(comparePairFile, sep="\t",
                               check.names=F, quote='', com='')
    colnames(compare_data) <- c("sampA", "sampB")
  } else {
    sampleGroup <- as.data.frame(dds@colData)
    compare_data <- as.vector(unique(sampleGroup[[design]]))
    #compare_data <- letters[1:3]
    len_compare_data <- length(compare_data)
    compareL <- list()

    count = 1
    for(i in 1:(len_compare_data-1)) {
      for(j in (i+1):len_compare_data) {
        tmp_compare <- list(sampA=compare_data[i],sampB=compare_data[j])
        compareL[[paste(i,j)]] <- tmp_compare
        count = count + 1
      }
    }
    compare_data <- as.data.frame(do.call(rbind, compareL))
  }

  unused <- by(compare_data, 1:nrow(compare_data), function (x)
    twoGroupDEgenes(dds, groupA=unlist(x[1,1]), groupB=unlist(x[1,2]), design=design, padj=padj,
                    log2FC=log2FC, dropCol=dropCol,
                    output_prefix=output_prefix, ...))

  #twoGroupDEgenes(dds, tmp_compare, design=design, padj=padj, log2FC=log2FC,
  #                dropCol=dropCol, output_prefix=output_prefix, ...)
}



#' One step DEseq2 DE genes analysis for salmon output.
#'
#' @param file A file containing salmon output file lists if "type=salmon"
#' with format described in \code{\link{salmon2deseq}}. Or
#' reads count matrix file if "type=readscount" with format described in
#' \code{\link{readscount2deseq}}.
#'
#' @param type Specify input file type, either "salmon" or "readscount".
#' "tx2gene" currently has no effects for "type=readscount".
#'
#' @inheritParams salmon2deseq
#' @inheritParams deseq2normalizedExpr
#' @inheritParams multipleGroupDEgenes
#'
#' @return NULL
#' @export
#'
#' @examples
#'
#' DESeq2_ysx(salmon_file_list, sampleFIle, conditions, type="salmon")
#' DESeq2_ysx(count_matrix_file, sampleFIle, conditions, type="readscount")
#'
DESeq2_ysx <- function(file, sampleFile, design, type,
                       covariate = NULL,
                       tx2gene=NULL, filter=NULL, output_prefix='ehbio',
                       rlog=T, vst=F, comparePairFile=NULL, padj=0.05,
                       log2FC=1, dropCol=c("lfcSE", "stat")){
  if(type == "salmon"){
    dds <- salmon2deseq(file, sampleFile, design, covariate=covariate,
                      tx2gene=tx2gene, filter=filter, rundeseq=T)

  } else if(type == "readscount"){
    dds <- readscount2deseq(file, sampleFile, design, covariate=covariate,
                      filter=filter, rundeseq=T)
  }

  normexpr <- deseq2normalizedExpr(dds, output_prefix, rlog=rlog, vst=vst)

  normalizedExpr2DistribBoxplot(normexpr,
                                saveplot=paste(output_prefix, "DESeq2.normalizedExprDistrib.pdf", sep="."))

  clusterSampleHeatmap2(normexpr$rlog,
                        cor_file=paste(output_prefix, "DESeq2.sampleCorrelation.txt", sep="."),
                        saveplot=paste(output_prefix, "DESeq2.sampleCorrelation.pdf", sep="."))


  multipleGroupDEgenes(dds, comparePairFile=comparePairFile, design=design,
                       padj=padj, log2FC=log2FC, dropCol=dropCol,
                       output_prefix=output_prefix)

}
Tong-Chen/YSX documentation built on Jan. 25, 2021, 2:49 a.m.