ideal-report

This content has been loaded from the template report .Rmd file, provided with the ideal package. Please edit it at your best convenience!

opts_chunk$set(
  echo=input$report_echo,
  error = TRUE # to continue generating the report

)

If you are viewing this report in the Preview, you might require the installation of the PhantomJS to render correctly some HTML widgets (datatable objects created with the r BiocStyle::CRANpkg("DT") package). This can be done by using the r BiocStyle::CRANpkg("webshot") package and calling webshot::install_phantomjs().

Data Setup

Following objects were provided to the initial call of the function:

dds_obj
res_obj
head(annotation_obj)
head(countmatrix)
expdesign

The data provided was used to construct the following objects

head(values$countmatrix)
values$expdesign
values$dds_obj
values$res_obj
head(values$annotation_obj)
design(values$dds_obj)

values$cur_species

input$idtype

# load according annotation package? use annospecies_df

if(!is.null(mcols(values$dds_obj)$dispGeneEst))
  plotDispEsts(values$dds_obj)

if(!is.null(values$dds_obj))
  paste0(nrow(values$dds_obj), " genes - ",ncol(values$dds_obj)," samples")


if(!is.null(values$annotation_obj))
  paste0(nrow(values$annotation_obj), " genes - ",ncol(values$annotation_obj)," ID types")

DEregu <- sum(values$res_obj$padj < input$FDR & values$res_obj$log2FoldChange != 0, na.rm = TRUE)
if(!is.null(values$res_obj))
  paste0(DEregu, " DE genes - out of ",nrow(values$res_obj),"")

Counts Overview

if(input$countstable_unit=="raw_counts")
  cur_mat <- counts(values$dds_obj,normalized=FALSE)
if(input$countstable_unit=="normalized_counts")
  cur_mat <- counts(values$dds_obj,normalized=TRUE)
if(input$countstable_unit=="vst_counts")
  cur_mat <- vst(values$dds_obj)
if(input$countstable_unit=="log10_counts")
  cur_mat <- log10(1 + counts(values$dds_obj,normalized=TRUE))

input$countstable_unit

head(cur_mat)
t1 <- rowSums(counts(values$dds_obj))
t2 <- rowMeans(counts(values$dds_obj,normalized=TRUE))

thresh_rowsums <- input$threshold_rowsums
thresh_rowmeans <- input$threshold_rowmeans
abs_t1 <- sum(t1 > thresh_rowsums)
rel_t1 <- 100 * mean(t1 > thresh_rowsums)
abs_t2 <- sum(t2 > thresh_rowmeans)
rel_t2 <- 100 * mean(t2 > thresh_rowmeans)

cat("Number of detected genes:\n")
cat(abs_t1,"genes have at least a sample with more than",thresh_rowsums,"counts\n")
cat(paste0(round(rel_t1,3),"%"), "of the",nrow(values$dds_obj),
    "genes have at least a sample with more than",thresh_rowsums,"counts\n")
cat(abs_t2,"genes have more than",thresh_rowmeans,"counts (normalized) on average\n")
cat(paste0(round(rel_t2,3),"%"), "of the",nrow(values$dds_obj),
    "genes have more than",thresh_rowsums,"counts (normalized) on average\n")
cat("Counts are ranging from", min(counts(values$dds_obj)),
    "to",max(counts(values$dds_obj)))

Extract results

if(!is.null(values$res_obj)) {
  print(design_factors())

  print(input$choose_expfac)

  fac1 <- input$choose_expfac
  nrl <- length(levels(colData(values$dds_obj)[,fac1]))

  print(input$fac1_c1)
  print(input$fac1_c2)
  print(values$res_obj)

  print(sub(".*p-value: (.*)","\\1",mcols(values$res_obj, use.names=TRUE)["pvalue","description"]))
  print(summary(values$res_obj,alpha = input$FDR))

  cur_res <- values$res_obj

  if(!is.null(values$annotation_obj)) {
    cur_res$symbol <- values$annotation_obj$gene_name[match(rownames(cur_res),
                                                          rownames(values$annotation_obj))]
  }
}
if(!is.null(values$res_obj)) {
  res_df <- as.data.frame(values$res_obj)
  res_df <- dplyr::filter(res_df, !is.na(pvalue))

  p1 <- ggplot(res_df, aes_string("pvalue")) +
    geom_histogram(binwidth = 0.01, boundary = 0) + theme_bw()
  # for visual estimation of the false discovery proportion in the first bin
  alpha <- binw <- input$FDR
  pi0 <- 2*mean(res_df$pvalue > 0.5)
  p1 <- p1 + geom_hline(yintercept = pi0 * binw * nrow(res_df), col = "steelblue") + 
    geom_vline(xintercept = alpha, col = "red")

  p1 <- p1 + ggtitle(
    label = "p-value histogram",
    subtitle = paste0(
      "Expected nulls = ", pi0 * binw * nrow(res_df), 
      " - #elements in the selected bins = ", sum(res_df$pvalue < alpha)
    ))

  print(p1)

  res_df <- mutate(
    res_df, 
    stratum = cut(baseMean, include.lowest = TRUE, 
                  breaks = signif(quantile(baseMean, probs = seq(0,1, length.out = 10)),2)))

  p2 <- ggplot(res_df, aes_string("pvalue")) +
    geom_histogram(binwidth = 0.01, boundary = 0) + 
    facet_wrap(~stratum) + 
    theme_bw()

  p2 <- p2 + ggtitle(
    label = "p-value histogram",
    subtitle = "stratified on the different value classes of mean expression values")

  phi <- input$FDR
  res_df <- mutate(res_df, rank = rank(pvalue))
  m <- nrow(res_df)

  p3 <- ggplot(filter(res_df, rank <= 6000), 
              aes(x = rank, y = pvalue)) + 
    geom_line() + 
    geom_abline(slope = phi/m, col = "red") + 
    theme_bw()

  p3 <- p3 + ggtitle(
    label = "Schweder-Spjotvoll plot",
    subtitle = paste0(
      "Intersection point at rank ", with(arrange(res_df,rank), last(which(pvalue <= phi * rank / m))))
    )

  p4 <- ggplot(res_df, aes_string("log2FoldChange")) +
    geom_histogram(binwidth = 0.1) + theme_bw()
  p4 <- p4 + ggtitle("Histogram of the log2 fold changes")
  print(p4)

  mydf <- as.data.frame(values$res_obj[order(values$res_obj$padj),])#[1:500,]
  rownames(mydf) <- createLinkENS(rownames(mydf),species = annoSpecies_df$ensembl_db[match(input$speciesSelect,annoSpecies_df$species)]) ## TODO: check what are the species from ensembl and
      ## TODO: add a check to see if wanted?
  mydf$symbol <- createLinkGeneSymbol(mydf$symbol)
  datatable(mydf, escape = FALSE)                       
}

Summary Plots

if(!is.null(values$res_obj)) {
  print(plot_ma(values$res_obj,annotation_obj = values$annotation_obj))

  if(!is.null(input$ma_brush)){
    if(!is.null(values$annotation_obj))
        print(plot_ma(values$res_obj,annotation_obj = values$annotation_obj,FDR = input$FDR) +
        coord_cartesian(xlim = c(input$ma_brush$xmin,input$ma_brush$xmax),
                        ylim = c(input$ma_brush$ymin,input$ma_brush$ymax)) +
        geom_text(aes_string(label="genename"),size=3,hjust=0.25, vjust=-0.75))
      else
        print(plot_ma(values$res_obj,annotation_obj = values$annotation_obj,FDR = input$FDR) +
        coord_cartesian(xlim = c(input$ma_brush$xmin,input$ma_brush$xmax),
                        ylim = c(input$ma_brush$ymin,input$ma_brush$ymax)))
  }

  print(plot_volcano(values$res_obj, FDR = input$FDR))

  print(head(curData()))

  if(!(is.null(input$ma_brush)) & !is.null(values$dds_obj)) { 

      brushedObject <- curData()

      selectedGenes <- as.character(brushedObject$ID)
      toplot <- assay(values$dds_obj)[selectedGenes,]
      rownames(toplot) <- values$annotation_obj$gene_name[match(rownames(toplot),rownames(values$annotation_obj))]

      if(input$pseudocounts) toplot <- log2(1+toplot)

      mat_rowscale <- function(x)
      {
        m <- apply(x, 1, mean, na.rm = TRUE)
        s <- apply(x, 1, sd, na.rm = TRUE)
        return((x - m)/s)
      }

      if(input$rowscale) toplot <- mat_rowscale(toplot)

      mycolss <- c("#313695","#4575b4","#74add1","#abd9e9","#e0f3f8","#fee090","#fdae61","#f46d43","#d73027","#a50026") # to be consistent with red/blue usual coding


      pheatmap(toplot,cluster_cols = as.logical(input$heatmap_colv))

      heatmaply(toplot,Colv = as.logical(input$heatmap_colv),colors = mycolss)
  }
}

Gene Finder

if(length(input$color_by) > 0 &
   (length(input$avail_symbols)>0 | length(input$avail_ids)>0)
  ){

  if(length(input$avail_symbols)>0) {
        # got the symbol, look for the id
        mysyms <- input$avail_symbols
        myids <- values$annotation_obj$gene_id[match(mysyms, values$annotation_obj$gene_name)]
      } else {
        myids <- input$avail_ids
        # make it optional if annot is available
        if(!is.null(values$annotation_obj)) {
          mysims <- values$annotation_obj$gene_name[match(myids, values$annotation_obj$gene_id)]
        } else {
          mysims <- ""
        }
      }

  print(length(myids))

  for(myid in myids) {
      p <- ggplotCounts(values$dds_obj, myid, intgroup = input$color_by,annotation_obj=values$annotation_obj)
      if(input$ylimZero_genefinder)
        p <- p + ylim(0.1, NA)
      print(p)
  }

  if("symbol" %in% names(values$res_obj)) {
    print(plot_ma(values$res_obj,
                intgenes = input$avail_symbols,annotation_obj = values$annotation_obj,FDR = input$FDR))
      } else {
        print(plot_ma(values$res_obj,
                intgenes = input$avail_ids,annotation_obj = values$annotation_obj,FDR = input$FDR))
      }

  if(!is.null(values$genelist_ma)) {
    print(values$genelist_ma)
    if("symbol" %in% names(values$res_obj)) {
        print(plot_ma(values$res_obj,
                intgenes = values$genelist_ma$`Gene Symbol`,annotation_obj = values$annotation_obj,FDR = input$FDR))
      }
  }
  datatable(cur_combires())
}
# if selected, maplot with annot plus table

Functional Analysis

if(!is.null(values$res_obj)) {
  values$genelistUP()
  values$genelistDOWN()
  values$genelistUPDOWN()
  as.character(values$genelist1$`Gene Symbol`)
  as.character(values$genelist2$`Gene Symbol`)

  gplots::venn(gll())

  UpSetR::upset(fromList(gll()))
}

# 3x5 tables if available

# here only for goana
if(!is.null(values$gse_up)) {
  mytbl <- values$gse_up
  rownames(mytbl) <- createLinkGO(rownames(mytbl))
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$gse_down)) {
  mytbl <- values$gse_down
  rownames(mytbl) <- createLinkGO(rownames(mytbl))
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$gse_updown)) {
  mytbl <- values$gse_updown
  rownames(mytbl) <- createLinkGO(rownames(mytbl))
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$gse_list1)) {
  mytbl <- values$gse_list1
  rownames(mytbl) <- createLinkGO(rownames(mytbl))
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$gse_list2)) {
  mytbl <- values$gse_list2
  rownames(mytbl) <- createLinkGO(rownames(mytbl))
  datatable(mytbl,escape=FALSE)
}

# here only for topgo
if(!is.null(values$topgo_up)) {
  mytbl <- values$topgo_up
  mytbl$GO.ID <- createLinkGO(mytbl$GO.ID)
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$topgo_down)) {
  mytbl <- values$topgo_down
  mytbl$GO.ID <- createLinkGO(mytbl$GO.ID)
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$topgo_updown)) {
  mytbl <- values$topgo_updown
  mytbl$GO.ID <- createLinkGO(mytbl$GO.ID)
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$topgo_list1)) {
  mytbl <- values$topgo_list1
  mytbl$GO.ID <- createLinkGO(mytbl$GO.ID)
  datatable(mytbl,escape=FALSE)
}
if(!is.null(values$topgo_list2)) {
  mytbl <- values$topgo_list2
  mytbl$GO.ID <- createLinkGO(mytbl$GO.ID)
  datatable(mytbl,escape=FALSE)
}

# similarly for goseq...
if(!is.null(values$gse_updown_goseq)) {
  mytbl <- values$gse_updown_goseq
  mytbl$category <- createLinkGO(mytbl$category)
  datatable(mytbl,escape=FALSE)
}

# for the selected lines, also do heatmaps?

Signatures explorer

if(!is.null(values$gene_signatures) & 
   !(is.null(values$vst_obj)) & 
   !(is.null(values$anno_vec)) &
   !is.null(input$sig_selectsig)) {

  print(input$sig_selectsig)

  sig_sigmembers <- values$gene_signatures[[input$sig_selectsig]]

  id_type_data <- input$sig_id_data
  id_type_sigs <- input$sig_id_sigs

  head(values$anno_vec)

  sig_members <- values$gene_signatures[[input$sig_selectsig]]
  print(sig_members)

  sig_heatmap(
          values$vst_obj,
          my_signature = values$gene_signatures[[input$sig_selectsig]],
          res_data = values$res_obj,
          FDR = input$FDR,
          de_only = input$sig_useDEonly,
          annovec = values$anno_vec,
          # anno_colData = colData(values$vst_obj)[,input$sig_annocoldata, drop = FALSE],
          title = names(values$gene_signatures)[match(input$sig_selectsig,names(values$gene_signatures))],
          cluster_rows = input$sig_clusterrows,
          cluster_cols = input$sig_clustercols,
          center_mean = input$sig_centermean,
          scale_row = input$sig_scalerow
        )
}

About ideal

ideal is a Bioconductor package containing a Shiny application for interactive and reproducible Differential Expression analysis.

ideal is developed in the Bioinformatics Division (led by Harald Binder) of the Institute of Medical Biostatistics, Epidemiology and Informatics (IMBEI) at the University Medical Center of the Johannes Gutenberg University Mainz.

Developers

ideal is currently maintained by me, Federico Marini, at the IMBEI (www.imbei.uni-mainz.de). You can contact me by clicking on the button below.

Federico Marini

Code availability

ideal is a part of the Bioconductor project (www.bioconductor.org). All code for ideal, especially for the development version, is available on GitHub.

Citation info

If you use ideal for your analysis, please cite it as here below:

citation("ideal")

Session Info

sessionInfo()
library(shiny)
footertemplate <- function(){
  tags$div(
    class = "footer",
    style = "text-align:center",
    tags$div(
      class = "foot-inner",
      list(
        hr(),
        "This report was generated with",
        tags$a(href="https://github.com/federicomarini/ideal", "ideal"), br(),
        "ideal is a project developed by Federico Marini in the Bioinformatics division of the ",
        tags$a(href="http://www.unimedizin-mainz.de/imbei","IMBEI"),br(),
        "Development of the pcaExplorer package is on ",
        tags$a(href="https://github.com/federicomarini/ideal", "GitHub")
      )
    )
  )
}
footertemplate()


Try the ideal package in your browser

Any scripts or data that you put into this service are public.

ideal documentation built on Nov. 8, 2020, 5:02 p.m.