R/DEGs.R

Defines functions seekDE_cite seekViolin seekDEtransformations seekDEcluster seekDEma seekDEvolcano seekDEseq seekCPM

Documented in seekDE_cite seekDEcluster seekDEma seekDEseq seekDEtransformations seekDEvolcano

# Install Package: Ctrl + Shift + B
# devtools::document()

seekCPM <- function(countmatrix){
  cpm <- apply(countmatrix,2,function(x) x/sum(x, na.rm=T)*(10^6) )
}

#' differential expression, using the DESeq2
#'
#' Imports:
#' DESeq2,
#' AnnotationDbi,
#' org.Hs.eg.db,
#' org.Mm.eg.db,
#' biomaRt
#' apeglm
#'
#' @param countTable table with raw counts, gene IDs as rownames (defined by id_in), and sample names as colnames
#' @param conditions vector of integers (1,2), indicating which column belongs to which condition. Must be in the same order as the columns. Note that the calculation is condition2/condition1, so a negative log2FC means that the gene is downregulated in condition2.
#' @param cond0 integer or character same as on of the conditions. Indicates that all samples in the condition are of the control condition (e.g. wt, untreated, etc.) by which the 2nd condition will be divided to get the log2FC. Mixing up condition 1 and 2 will result in opposite signs.
#' @param cond1 integer or character same as on of the conditions. Indicates that all samples in the condition are of the test condition  (e.g. ko, treated, etc.) which will be divided by the control condition.
#' @param species string, either "human" or "mouse". Needed to generate gene symbols from Entrez IDs.
#' @param batches vector of integers (1,2,3,etc.), indicating which column belongs to which batch. Must be in the same order as the columns. Is used to remove batch effects. Note that two batches must have at least one shared condition.
#' @param id_in gene ID provided as rownames in the countTable
#' @param id_out gene ID for the output (will create an additional column)
#' @param exclude vector of integers (1,2,3,etc.), indicating which samples should be excluded from the analysis
#' @param getBiotype boolean, gets the biotype (miRNA, lncRNA, etc.) for all significantly regulated genes
#' @param shrink either NA or a string specifying the shrink method: "apeglm" (default), "ashr" or "normal")
#' @return A list with 3 items: 1) A data.frame where each row is a gene, and each column is a parameter of information about it (log2FC, symbol, etc.). 2) A DESeq results object from which the data.frame was calculated. 3) A DESeq object from which the DESeq results object calculated.
#' @details indirect batch effect removal: if cond1 and cond2 have differnt batches (all samples of cond1 are from batch1 and all samples of cond2 are from batch2) the batch effect can still be removed if there is a third condition that has samples from batch 1 & 2. These samples have to be in the count matrix and their condition and batches have to be given. DESeq2 will remove the batch effect based on their counts.
#' @examples
#' myCounts <- sqCounts
#' myCondition <- c(0,0,0,2,2,2,2,3,3,3,3)
#' myBatch <- c(1,1,1,1,1,2,2,2,2,2,2)
#' myDE <- seekDEseq(myCounts, conditions=myCondition, batches=myBatch, species="mouse", cond0=0, cond1=3, shrink=F)
seekDEseq <- function(countTable, conditions, batches=NA, cond0=NA, cond1=NA, exclude=NA, species="NA",id_in="symbol",id_out="entrezid",
                      batchCorrection=T, getBiotype=F, shrink="apeglm"){
  #==Input=Check==#
  checks <- c(
    length(conditions) == ncol(countTable),
    all(is.na(batches)) | length(batches)==length(conditions),
    species %in% c(NA,"NA","human","mouse"),
    if(is.na(exclude)){T}else{max(exclude) <= ncol(countTable)}
  )

  if(!checks[1]) stop("!!!Input error: wrong length of conditions")
  if(!checks[2]) stop("!!!Input error: wrong length of batches")
  if(!checks[3]) stop("!!!Input error: unknown species selected. Select 'human' or 'mouse'")
  if(!checks[4]) stop("!!!Input error: position of at least one excluded sample is not available")
  if(species %in% c(NA, "NA")) message("Input information: No species selected, Symbols cannot be generated.")
  ################################################################
  #==Data=preparation==#
  included <- seq(ncol(countTable))
  if(!is.na(exclude)){included <- included[!included==exclude]} # vector that determines which samples columns will be included

  ColData <- data.frame(condition=factor(as.character(conditions)),batch=factor(as.character(batches)),row.names=colnames(countTable)) #design matrix
  ColData <- ColData[included,] #keep only included samples in the design matrix
  countTable <- countTable[rowSums(countTable)>(length(conditions)/length(unique(conditions))) & #count matrix only with included samples and genes that are above a certain threshold
                     apply(countTable,1,function(x) sum(x>=1)>=length(unique(conditions))),
                   included]
  ################################################################
  #==DESeq2==#
  if(is.na(batches[1])){batchCorrection <- FALSE}
  if(batchCorrection==F){message("Input information: No batch correction!")}
  dds0 <- switch(as.character(batchCorrection),
                 "FALSE" = DESeq2::DESeqDataSetFromMatrix(countTable, colData=ColData, design=~condition),
                 "TRUE"  = DESeq2::DESeqDataSetFromMatrix(countTable, colData=ColData, design=~batch+condition)
                 )

  dds <- DESeq2::DESeq(dds0)
  #if the comparisons are not correct by default, they have to ve releveled
  if(!grepl(paste0(cond0,"$"), DESeq2::resultsNames(dds)[2])){
    print(paste("releveling to achieve comparisons against:",cond0))
    dds$condition <- relevel(dds$condition, ref=cond0)
    dds <- DESeq2::DESeq(dds)
  }
  print(DESeq2::resultsNames(dds))

  if(!is.na(shrink)){
    res <- DESeq2::lfcShrink(dds, coef=paste0("condition_",cond1,"_vs_",cond0), type=shrink)
  }
  else{
    res <- DESeq2::results(dds, contrast=c("condition", cond1, cond0))
  }
  ################################################################

  #==Matrix=refinement==#
  DE <- as.data.frame(res)
  colnames(DE)[2] <- "log2FC"

  if(!id_in %in% id_out){
    DE <- cbind(oldID     =rownames(DE),
                newID     = switch(species,
                                   "NA"=NA,
                                   "human"=as.character(AnnotationDbi::mapIds(org.Hs.eg.db::org.Hs.eg.db,keys=rownames(DE),column=toupper(id_out),keytype=toupper(id_in),multiVals="first")),
                                   "mouse"=as.character(AnnotationDbi::mapIds(org.Mm.eg.db::org.Mm.eg.db,keys=rownames(DE),column=toupper(id_out),keytype=toupper(id_in),multiVals="first"))),
                DE)
    colnames(DE)[1] <- tolower(id_in)
    colnames(DE)[2] <- tolower(id_out)
  }else{
    DE <- cbind(oldID=rownames(DE),DE)
    colnames(DE)[1] <- tolower(id_in)
  }
  DE <- cbind(DE,
              log2FC.abs= abs(DE$log2FC),
              log10padj = (-log10(DE$padj))
              )

  ################################################################

  #==biotype==#
  if(getBiotype & !is.na(species)){
    message("#==Adding biotype==#")
    ensembl <- switch(species,
                      "human"=biomaRt::useMart("ensembl", dataset = "hsapiens_gene_ensembl"),
                      "mouse"=biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl"))
    biotype <- biomaRt::getBM(attributes = c('entrezgene_id','gene_biotype'),
                              filters = 'entrezgene_id',
                              values = DE$entrezid,
                              mart = ensembl)
    DE <- merge(DE, biotype, by.x="entrezid", by.y="entrezgene_id", all.x=T)
  }
  ################################################################

  #==output==#
  if(sum(ifelse(res$pvalue>res$padj, 1, 0), na.rm=T)){message("!!!Warning, at least one padj<pvalue")}

  DE <- DE[order(DE$padj),]
  list(DE=DE, res=res, dds=dds)
}


#' creates a volcano plot from diff.expr. data
#'
#' Imports:
#' ggplot2,
#' ggrepel,
#' scales
#'
#' @param DE data.frame with at least 4 columns: Symbol, log2FC, log10padj, color
#' @param genes character vector, genes of interest that will have their label shown
#' @param cutP number, p value cutoff, will result in a dashed line
#' @param cutFC number, fold change cutoff will result in 2 dashed lines (positive and negative cutoff)
#' @param ggplot logical, if FALSE, output will be plotly, if TRUE, output will be ggplot2 with labels
#' @param tophits either an integer, or the string "custom". If integer, genes with the n-th most positive and n-th most negative log2FC values and/or lowest padj value will be labeled. If "custom", there must already be a column named "tophitsR" and one named "topgitsL".
#' @param Title string, for ggTitle
#' @param textSize integer, textsize of title and axis labels
#' @param labelSize integer, textsize of individual gene labels
#' @param annoSize integer, textsize of the annotation showing the amount of genes for each group
#' @param xLim vector of two numbers, limits to the x axis (log2FC). Defaults to the maximum absolute of log2FC expanded by 3 (same value on on each side)
#' @param colors vector of strings, determines colors for significant genes 1) up and 2) down, and for 3) unsignificant genes
#' @param lineSize number, regulates width of both axis lines
#' @param dotSize number, regulates the size of the dots
#' @param dotOpacity number between 0 and 1, regulates opacity of dots
#' @param tickSize number, regulates width of ticks
#' @param annoX vector of numbers, regulates where the annotation for point number is situated on the x axis
#' @param annoY vector of numbers, regulates where the annotation for point number is situated on the y axis
#' @return a plotly object
#' @examples
#' myDE <- sqDE
#' seekDEvolcano(myDE[[1]])
seekDEvolcano <- function(DE, genes=NA, cutP=0.05, cutFC=1.5, tophitsL=5, tophitsR=5, Title="", textSize=25, labelSize=7, annoSize=8,
                        xLim=c(min(subset(DE, !is.na(padj))$log2FC)-0.5, max(subset(DE, !is.na(padj))$log2FC))+0.5, yLim=c(min(DE$log10padj), max(DE$log10padj)), colors=c("red","blue","gray20"),
                        lineSize=1, line2Size=1, dotSize=1, dotOpacity=0.6, tickSize=1, annoX=c(2,-2,0), annoY=c(0.1,0.1,0.01)){
  message(paste0("using p value cutoff ", cutP, " and FC cutoff ", cutFC, " (log2FC cutoff of ", round(log2(cutFC),3), ")"))
  cutFC <- log2(cutFC)
  cutP <- (-log10(cutP))
  DEsig <- subset(DE, log10padj>=cutP & abs(log2FC)>=cutFC)
  anno <- c(as.integer(nrow(subset(DEsig, log2FC>=0))), as.integer(nrow(subset(DEsig, log2FC<= 0))), as.integer(nrow(DE)-nrow(DEsig)))

  # if(ggplot){
    if(!tophitsL %in% "custom"){
      DE$tophitsR <- ifelse(DE$log2FC>= cutFC & DE$log10padj>=cutP &
                              (DE$log10padj>=dplyr::nth(sort(DEsig$log10padj, decreasing=T), tophitsR) |
                                 DE$log2FC>=dplyr:: nth(sort(DEsig$log2FC, decreasing=T), tophitsR)), as.character(DE$symbol), NA)
      DE$tophitsL <- ifelse(DE$log2FC<= -cutFC & DE$log10padj>=cutP &
                              (DE$log10padj>=dplyr::nth(sort(DEsig$log10padj, decreasing=T), tophitsL) |
                                 DE$log2FC<=dplyr:: nth(sort(DEsig$log2FC, decreasing=F), tophitsL)), as.character(DE$symbol), NA)
    }
  if(!is.na(genes)){
    DE$tophitsR <- ifelse(as.character(DE$symbol) %in% genes & DE$log2FC>=0, as.character(DE$symbol), NA)
    DE$tophitsL <- ifelse(as.character(DE$symbol) %in% genes & DE$log2FC<= -0, as.character(DE$symbol), NA)
  }
    myPlot <- ggplot2::ggplot(DE, ggplot2::aes(x=log2FC, y=log10padj)) +
      ggplot2::geom_vline(xintercept= cutFC, linetype="dashed", color = "black", size=line2Size) +
      ggplot2::geom_vline(xintercept=-cutFC, linetype="dashed", color = "black", size=line2Size) +
      ggplot2::geom_hline(yintercept= cutP, linetype="dashed", color = "black", size=line2Size) +
      ggplot2::geom_point(data=subset(DE, abs(log2FC)<cutFC | log10padj<cutP), ggplot2::aes(x=log2FC, y=log10padj), size=dotSize, alpha=dotOpacity, color=colors[3]) +
      ggplot2::geom_point(data=subset(DEsig, log2FC>0), ggplot2::aes(x=log2FC, y=log10padj), size=dotSize, alpha=dotOpacity, color=colors[1]) +
      ggplot2::geom_point(data=subset(DEsig, log2FC<0), ggplot2::aes(x=log2FC, y=log10padj), size=dotSize, alpha=dotOpacity, color=colors[2]) +
      ggplot2::ggtitle(Title) +
      ggplot2::scale_y_log10(name=expression(-log[10]~italic(p~adjust)), limits=c(yLim[1], yLim[2])) +
      ggplot2::scale_x_continuous(name=expression(log[2]~italic(FC)), limits=c(xLim[1], xLim[2]), expand=c(0,0), labels=scales::comma) +
      ggrepel::geom_label_repel(data = DE, ggplot2::aes(label=tophitsR), min.segment.length = 0.25, force = 6, point.padding = 0.5, size=labelSize,
                                direction="y", hjust=0, segment.size=0.5, nudge_x=5, fill="white") + # labels
      ggrepel::geom_label_repel(data = DE, ggplot2::aes(label=tophitsL), min.segment.length = 0.25, force = 6, point.padding = 0.5, size=labelSize,
                                direction="y", hjust=1, segment.size=0.5, nudge_x=-5, fill="white") + # labels
      ggplot2::theme(panel.grid=ggplot2::element_blank(), panel.background=ggplot2::element_blank(),
                     legend.position="none", axis.line=ggplot2::element_line(size=lineSize), axis.ticks=ggplot2::element_line(size=tickSize),
                     axis.text= ggplot2::element_text(size=textSize),
                     axis.title=ggplot2::element_text(size=textSize),
                     plot.title = ggplot2::element_text(size = textSize))
    if(annoSize>0){myPlot <- myPlot + ggplot2::annotate(geom="label",label=anno,y=annoY,x=annoX,fill=colors,color="white",size=annoSize)}
  # Plotly:
  #   myPlot <- plotly::plot_ly(DE, x=~log2FC, y=~log10padj, color=~color, colors=levels(DE$color), text=~symbol, hoverinfo="text", marker=list(opacity=0.4), size=1)
  #   myPlot <- plotly::layout(myPlot,
  #                            yaxis=list(type="log", title="-log10(p adjust)"),
  #                            xaxis=list(title="log2(fold change)"),
  #                            showlegend=FALSE,
  #                            shapes=list(list(type="line", line=list(dash="dash"), x0= cutFC, x1= cutFC, y0=min(subset(DE, log10padj>0)$log10padj, na.rm=T), y1=max(DE$log10padj, na.rm=T)),
  #                                        list(type="line", line=list(dash="dash"), x0=-cutFC, x1=-cutFC, y0=min(subset(DE, log10padj>0)$log10padj, na.rm=T), y1=max(DE$log10padj, na.rm=T)),
  #                                        list(type="line", line=list(dash="dash"), x0=min(subset(DE, !is.na(padj))$"log2FC", na.rm=T) , x1=max(subset(DE, !is.na(padj))$"log2FC", na.rm=T), y0=cutP, y1=cutP))
  #   )
  #   myPlot <- plotly::layout(myPlot, title=Title, annotations= list(list(y=annoY[1], x=annoX[1], text=anno[1], color=colors[1])))
  # }
  myPlot
}

#' creates a MA plot from diff.expr. data
#'
#' Imports:
#' DESeq2,
#' apeglm
#'
#' @param dds DESeq object as created as 3rd item with seekDeseq()
#' @param compare string indicating which comparison should be used. Use this function with only dds, to see options.
#' @param alpha p value cutoff used to indicate significance by color in the plot
#' @return a graph
#' @examples
#' myCounts <- sqCounts
#' myCondition <- c(0,0,0,2,2,2,2,3,3,3,3)
#' myBatch <- c(1,1,1,1,1,2,2,2,2,2,2)
#' myDE <- seekDeseq(myCounts, conditions=myCondition, batches=myBatch, species="mouse", cond0=0, cond1=3)
#' dds <- myDE[[3]]
#' myNames <- seekMA(dds)
#' seekMA(dds, myNames[2])
seekDEma <- function(dds, compare=NA, alpha=0.05){
  if(is.na(compare)){
    DESeq2::resultsNames(dds)
  }else{
    resLFC <- DESeq2::lfcShrink(dds, coef=compare, type="apeglm")
    DESeq2::plotMA(resLFC, alpha=alpha)
  }
}

#' PCA, heatmap and sample distance matrix
#'
#' Imports:
#' DESeq2,
#' limma,
#' vsn,
#' pheatmap,
#' SummarizedExperiment,
#' ggplot2,
#' ggpubr
#'
#' @param dds DESeq object: can be obtained as 3rd item from seekDEseq(), or from the normal DESeq2 pipeline
#' @param Title string: a title for the final figure
#' @param transformation string: what transformation to use ("rlog", "vst" or "norm"). Check which is best by using clTrans()
#' @param removeBatchEffect boolean: TRUE removes the batch effect
#' @param nHeatmap integer: number of genes to consider in the heatmap
#' @return ggarrange object with 3 ggpplots to determine clustering of the samples.
#' @examples
#' Counts
#' myGroup <- c(1,1,2,1,2,2) #(e.g. you have WT,WT,KO,WT,KO,KO)
#' myBatch <- c(1,1,1,2,2,2)
#' myDE <- seekDeseq(Counts = myCounts, Groups = myGroups)
#' myDEcluster <- seekDEcluster(myDE[[3]])
seekDEcluster <- function(dds, Title="", transformation="rlog", removeBatchEffect=FALSE, heatmapN=20, heatmapClusterCols=F){
  message(paste("chosen transformation:",transformation))
  if(removeBatchEffect){message("batch effect removed")}else{message("no removal of batch effects")}
  #==transformation==#
  data <- switch(transformation,
                 "vst"=DESeq2::vst(dds, blind=FALSE),
                 "rlog"=DESeq2::rlog(dds, blind=FALSE),
                 "ntd"=DESeq2::normTransform(dds))

  #==batch==#
  if(removeBatchEffect){SummarizedExperiment::assay(data) <- limma::removeBatchEffect(SummarizedExperiment::assay(data), data$batch)} #as recommended in DESeq2 manual

  #==plot distance==#
  sampleDists <- dist(t(SummarizedExperiment::assay(data)))
  sampleDistMatrix <- as.matrix(sampleDists)
  colors <- colorRampPalette(c("blue3", "white"))(255)
  pDist <- pheatmap::pheatmap(sampleDistMatrix, cluster_rows=FALSE, clustering_distance_rows=sampleDists, clustering_distance_cols=sampleDists, col=colors, show_rownames=T, show_colnames=T, silent=T)[[4]]

  #==plot heatmap==#
  df <- as.data.frame(SummarizedExperiment::colData(data)[,c("condition","batch")])
  if(is.na(df$batch[1])) df$batch=1
  rownames(df) <- colnames(dds)
  select1 <- order(rowMeans(DESeq2::counts(dds,normalized=TRUE)), decreasing=TRUE)[1:heatmapN]
  pHeat <- pheatmap::pheatmap(SummarizedExperiment::assay(data)[select1,], cluster_rows=FALSE, show_rownames=FALSE,cluster_cols=heatmapClusterCols, annotation_col=df, border_color=NA, silent=T)[[4]]

  #==plot PCA==#
  pcaData <- DESeq2::plotPCA(data, intgroup=c("condition", "batch"), returnData=TRUE)
  if(is.na(pcaData$batch[1])) pcaData$batch="no_batch"
  percentVar <- round(100 * attr(pcaData, "percentVar"))
  pPCA <- ggplot2::ggplot(pcaData, ggplot2::aes(PC1, PC2, color=condition, shape=batch)) +
    ggplot2::geom_point(size=3) +
    ggplot2::xlab(paste0("PC1: ",percentVar[1],"% variance")) +
    ggplot2::ylab(paste0("PC2: ",percentVar[2],"% variance"))

  #==output==#
  ggpubr::annotate_figure(ggpubr::ggarrange(ggpubr::ggarrange(pDist,pHeat,nrow=1),pPCA,nrow=2), top=ggpubr::text_grob(Title, face="bold"))
}

#' Transformation variance graphs
#'
#' Imports:
#' DESeq2,
#' vsn
#' ggpubr
#' ggplot2
#' SummarizedExperiment
#'
#' @param dds DESeq object: can be obtained as 3rd item from seekDEseq(), or from the normal DESeq2 pipeline
#' @return ggarrange object with variance plots for 3 transformations
#' @examples
#' myCounts
#' myGroups
#' myDE <- seekDeseq(Counts = myCounts, Groups = myGroups)
#' seekTransformations(myDE[[3]])
seekDEtransformations <- function(dds){
  #==transformations==#
  vsd <- DESeq2::vst(dds, blind=FALSE)
  rld <- DESeq2::rlog(dds, blind=FALSE)
  ntd <- DESeq2::normTransform(dds)

  #==output==#
  ggpubr::ggarrange(
    vsn::meanSdPlot(SummarizedExperiment::assay(ntd), plot=FALSE)[[5]] + ggplot2::ggtitle("ntd", "normalized counts transformation"),
    vsn::meanSdPlot(SummarizedExperiment::assay(vsd), plot=FALSE)[[5]] + ggplot2::ggtitle("vst", "variance stabilizing transformation"),
    vsn::meanSdPlot(SummarizedExperiment::assay(rld), plot=FALSE)[[5]] + ggplot2::ggtitle("rlog", "regularized log transformation"),
    nrow=1, common.legend=T, legend="right"
  )
}

seekViolin <- function(data1, data2, mergeColumn, yTitle, myTextSize=10, ylimit, cutP=0.05, cutFC=1.5, yticks=5){
  cutFC <- log2(cutFC)
  data <- merge(data1, data2, by=mergeColumn)
  data <- subset(data, !is.na(log2FC.x) & !is.na(padj.x) & !is.na(log2FC.y) & !is.na(padj.y))

  data$regulation <- ifelse(data$padj.x<=cutP & abs(data$log2FC.x)>=cutFC, ifelse(data$log2FC.x>0, "up","down"), "n.s.")
  data$regulation <- as.factor(data$regulation)
  data$color <- ifelse(data$padj.x<=cutP & abs(data$log2FC.x)>=cutFC, ifelse(data$log2FC.x>0, "red","blue"), "gray")
  data$color <- as.factor(data$color)

  plot <- ggplot2::ggplot(data, ggplot2::aes(x=regulation, y=log2FC.y, fill=regulation)) +
    ggplot2::geom_violin() +
    #geom_boxplot(width=0.3) +
    ggplot2::geom_hline(yintercept=cutFC, linetype="dashed", color = "black") +
    ggplot2::geom_hline(yintercept=-cutFC, linetype="dashed", color = "black") +
    ggplot2::scale_y_continuous(limits=c(-ylimit,ylimit), expand = c(0, 0), breaks=seq(-ylimit,ylimit,by=yticks)) +
    ggplot2::scale_fill_manual(values = levels(data$color)) +
    ggplot2::xlab(yTitle) +
    ggplot2::theme(axis.text.y = ggplot2::element_blank(),
          axis.text.x = ggplot2::element_text(angle=45, hjust=1),
          #axis.ticks.y = element_blank(),
          axis.title.y = ggplot2::element_blank(),
          axis.line= ggplot2::element_line(colour = "black", size=0.5),
          panel.grid = ggplot2::element_blank(),
          panel.background = ggplot2::element_blank(),
          panel.border=ggplot2::element_rect(fill=NA, size=0.5),
          legend.position = "none",
          axis.text=ggplot2::element_text(size=myTextSize),
          axis.title=ggplot2::element_text(size=myTextSize),
          plot.title=ggplot2::element_text(size=myTextSize))
  list(plot, data)
}


#' AnnotationDbi,
#' org.Hs.eg.db,
#' org.Mm.eg.db,
#' biomaRt


#' How to cite
#'
#' @return information on how to cite packages used in seeqR's DE functions
#' @examples
#' seekDE_cite()
seekDE_cite <- function(){
  message(paste(
    "The function seekDEseq used the packages DESeq2, AnnotationDbi, biomaRt, org.Hs.eg.db and org.Mm.eg.db:",
    "  Love MI, Huber W, Anders S (2014). Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15, 550. doi: 10.1186/s13059-014-0550-8.",
    "  Durinck S, Spellman P, Birney E, Huber W (2009). Mapping identifiers for the integration of genomic datasets with the R/Bioconductor package biomaRt. Nature Protocols, 4, 11841191.",
    "  Hervé Pagès, Marc Carlson, Seth Falcon and Nianhua Li (2019). AnnotationDbi: Manipulation of SQLite-based annotations in Bioconductor. R package version 1.46.1.",
    "  Marc Carlson (2019). org.Hs.eg.db: Genome wide annotation for Human. R package version 3.8.2.",
    "  Marc Carlson (2019). org.Mm.eg.db: Genome wide annotation for Mouse. R package version 3.8.2.",
    "The function seekDEma used the packages DESeq2, and apeglm:",
    "  Zhu A, Ibrahim JG, Love MI (2018). Heavy-tailed prior distributions for sequence count data: removing the noise and preserving large differences. Bioinformatics. doi: 10.1093/bioinformatics/bty895.",
    "The function seekDEcluster used the function DESeq2, limma, pheatmap, vsn, SummarizedExperiment, ggplot2, ggpubr",
    "The function seekDEtransformations used the function DESeq2, vsn, SummarizedExperiment, ggplot2, ggpubr",
    sep="\n"))
}
Solatar/seeqR documentation built on Feb. 19, 2021, 8:07 p.m.