R/reportingTools.R

Defines functions tableFilt makeGoTable fixHeaderAndGo makeGoGeneTable theme_blank remove.axis.and.padding makeImages venn4Way addLines inLineVennLinks vennLinks drawVenn vennInLine vennPage vennSelect2 ensemblLinks nuccoreLinks goLinks affyLinks

Documented in affyLinks ensemblLinks fixHeaderAndGo goLinks makeGoGeneTable makeGoTable makeImages nuccoreLinks tableFilt venn4Way vennInLine vennPage vennSelect2

## code for integrating with ReportingTools package




#' Add links to data when using ReportingTools
#' 
#' These functions are intended to add links to the Affymetrix, Entrez Gene, Nuccore,
#' and AmiGO databases when creating HTML tables using ReportingTools.
#' 
#' These functions are not actually intended to be called directly. Instead,
#' they are used as targets for the .modifyDF argument of the \code{publish}
#' function of ReportingTools. See the example below for more detail.
#' 
#' @aliases entrezLinks affyLinks goLinks nuccoreLinks ensemblLinks
#' @param df A data.frame, usually created using the \code{select} function of
#' the AnnotationDbi package. For Entrez ID data, the column name must be
#' ENTREZID. For Affy data, the column name must be PROBEID, and for GO data
#' the column name must be Term. For Nuccore the column name can be any of
#' "GI", "REFSEQ", "ACCNUM". Any other names will fail.
#' @param ... Allows one to pass arbitrary arguments to lower level functions.
#' Currently unsupported.
#' @return A data.frame is returned, with links included.
#' @author James W. MacDonald \email{jmacdon@@u.washington.edu}
#' @keywords manip
#' @examples
#' 
#' \dontrun{
#' ## say we have an ExpressionSet from HuGene 1.0 ST array
#' dat <- read.celfiles()
#' eset <- rma(dat)
#' ## annotate the ExpressionSet
#' eset <- annotateEset(eset, hugene10sttranscriptcluster.db,
#' columns = c("ENTREZID","ACCNUM","SYMBOL"))
#' ## and fit a model using limma
#' fit <- lmFit(eset, design)
#' fit2 <- eBayes(fit)
#' ## and create an HTML page with links to Affy and Entrez
#' out <- topTable(fit2, coef=2)
#' htab <- HTMLReport("The title","a_short_name")
#' publish(out, htab, .modifyDF = list(affyLinks, entrezLinks, nuccoreLinks))
#' finish(htab)}
#' 
#' @export entrezLinks affyLinks goLinks nuccoreLinks ensemblLinks
entrezLinks <- function (df, ...) {
    naind <- is.na(df$ENTREZID)
    df$ENTREZID <- hwrite(as.character(df$ENTREZID), link = paste0("http://www.ncbi.nlm.nih.gov/gene/", 
        as.character(df$ENTREZID)), table = FALSE)
    df$ENTREZID[naind] <- ""
    return(df)
}

affyLinks <- function(df, ...){
    df$PROBEID <- hwrite(as.character(df$PROBEID),
                         link = paste0("https://www.affymetrix.com/LinkServlet?probeset=", 
                         as.character(df$PROBEID)), table = FALSE)
    return(df)
}

goLinks <- function(df, ...){
    ## first column likely to be links. Extract out the GO IDs
    linkpart <- as.character(df[,1])
    if(length(grep("href", linkpart)) > 0)
        linkpart <- sapply(strsplit(linkpart, ">|<"), "[", 3)
    df$Term <- hwrite(as.character(df$Term),
                      link = paste0("http://amigo.geneontology.org/amigo/term/",
                      linkpart), table = FALSE)
    return(df)
}

nuccoreLinks <- function(df, ...){
    col <- grep("GI|REFSEQ|ACCNUM", colnames(df), ignore.case = TRUE)
    if(length(col) == 0) {
        stop(paste("There needs to be a column labeled 'GI', 'REFSEQ', or 'ACCNUM'",
                   "to add links to the nuccore database."), call. = FALSE)
    } else if(length(col) > 1) {
        stop(paste("There can only be one column labeled 'GI', 'REFSEQ', or 'ACCNUM'",
                   "to add links to the nuccore database."), call. = FALSE)
    }
    naind <- is.na(df[,col])
    df[,col] <- hwrite(as.character(df[,col]),
                       link = paste0("http://www.ncbi.nlm.nih.gov/nuccore/",
                                     as.character(df[,col])), table = FALSE)
    df[naind,col] <- ""
    return(df)
}

ensemblLinks <- function(df, ...){
    naind <- is.na(df$ENSEMBL)
    df$ENSEMBL <- hwriter::hwrite(as.character(df$ENSEMBL),
                                 link = paste0("http://www.ensembl.org/Gene/Summary?db=core;g=",
                                               as.character(df$ENSEMBL)), table = FALSE)
    df$ENSEMBL[naind] <- ""
    return(df)
}


#' Select and Output Genelists Based on Venn Diagrams
#' 
#' This function is designed to output text and/or HTML tables based on the
#' results of a call to \code{\link[limma]{decideTests}}, using the
#' ReportingTools package.
#' 
#' The purpose of this function is to output HTML and text tables with lists of
#' genes that fulfill the criteria of a call to
#' \code{\link[limma]{decideTests}} as well as the direction of differential
#' expression.
#' 
#' Some important things to note: First, the names of the HTML and text tables
#' are extracted from the \code{colnames} of the \code{TestResults} object,
#' which come from the contrasts matrix, so it is important to use something
#' descriptive. Second, the method argument is analogous to the \code{include}
#' argument from \code{\link[limma:venn]{vennCounts}} or
#' \code{\link[limma:venn]{vennDiagram}}. Choosing "both" will select genes
#' that are differentially expressed in one or more comparisons, regardless of
#' direction. Choosing "up" or "down" will select genes that are only
#' differentially expressed in one direction. Choosing "same" will select genes
#' that are differentially expressed in the same direction. Choosing "sameup"
#' or "samedown" will select genes that are differentially expressed in the
#' same direction as well as 'up' or 'down'.
#' 
#' Note that this is different than sequentially choosing "up" and then "down".
#' For instance, a gene that is upregulated in one comparison and downregulated
#' in another comparison will be listed in the intersection of those two
#' comparisons if "both" is chosen, it will be listed in only one comparison
#' for both the "up" and "down" methods, and it will be listed in the union
#' (e.g., not selected) if "same" is chosen.
#' 
#' Unlike \code{vennSelect}, this function automatically creates both HTML and
#' CSV output files.
#' 
#' @param fit An \code{\link[limma:marraylm]{MArrayLM}} object, from a call to
#' \code{\link[limma:ebayes]{eBayes}}.
#' @param contrast A contrasts matrix, produced either by hand, or by a call to
#' \code{\link[limma]{makeContrasts}}
#' @param design A design matrix.
#' @param groups This argument is used when creating a legend for the resulting
#' HTML pages. If NULL, the groups will be generated using the column names of
#' the design matrix.
#' @param cols A numeric vector indicating which columns of the fit, contrast
#' and design matrix to use. If \code{NULL}, all columns will be used.
#' @param p.value A p-value to filter the results by.
#' @param lfc A log fold change to filter the results by.
#' @param method One of "same", "both", "up", "down", "sameup", or "samedown".
#' See details for more information.
#' @param adj.meth Method to use for adjusting p-values. Default is 'BH', which
#' corresponds to 'fdr'. Ideally one would set this value to be the same as was
#' used for \code{\link[limma]{decideTests}}.
#' @param titleadd Additional text to add to the title of the HTML tables.
#' Default is NULL, in which case the title of the table will be the same as
#' the filename.
#' @param fileadd Additional text to add to the name of the HTML and CSV
#' tables. Default is NULL.
#' @param baseUrl A character string giving the location of the page in terms
#' of HTML locations. Defaults to "."
#' @param reportDirectory A character string giving the location that the
#' results will be written. Defaults to "./venns"
#' @param affy Boolean; are these Affymetrix arrays, and do you want hyperlinks
#' for each probeset to the Affy website to be generated for the resulting HTML tables?
#' @param probecol If the "affy" argument is \code{TRUE}, what is the column header
#' for the Affymetrix probeset IDs? Defaults to "PROBEID", which is the default if
#' the data are annotated using a Bioconductor annotation package.
#' @param \dots Used to pass arguments to lower level functions. 
#' @return A list with two items. First, a list of \code{HTMLReport} objects
#' from the ReportingTools package, which can be used to create an index page
#' with links to the HTML pages created by this function. See the help page for
#' HTMLReport in ReportingTools as well as the vignettes for more information.
#' The second item is a \code{vennCounts} object from limma, which can be used
#' to create a Venn diagram, e.g., in a report if this function is called
#' within a Sweave or knitR pipeline.
#' @author James W. MacDonald \email{jmacdon@@u.washington.edu}
#' @keywords manip
#' @export vennSelect2
vennSelect2 <- function(fit, contrast, design,  groups = NULL, cols = NULL, p.value = 0.05,
                        lfc = 0, method = "same", adj.meth = "BH", titleadd = NULL, fileadd = NULL, 
                        baseUrl = ".",  reportDirectory = "./venns", affy = TRUE, probecol = "PROBEID", ...){

    ## design is a design matrix from limma
    ## contrast is a conrast matrix
    ## output is a list containing the probe IDs of genes from each comparison
        
    lattice.options(default.theme = reporting.theme())  
    ## do this before we subset
    if(is.null(groups))
        groups <- factor(colnames(design)[apply(design, 1, function(x) which(x !=0))])
    else
        groups <- factor(groups)

    if(!is.null(cols)){
        contrast <- contrast[,cols]
        fit <- fit[,cols]
    }
    
    dtmat <- decideTests(fit, adjust.method = adj.meth, p.value = p.value, lfc = lfc)
    if(all(apply(dtmat, 2, sum) == 0)) return(list(vennout = NULL, venncounts = vennCounts2(dtmat, method = method)))

    colind <- getCols(design, contrast)
    ncontrasts <- ncol(dtmat)
    if(ncontrasts < 2 || ncontrasts > 4)
        stop("This function only works for 2-4 comparisons at a time.\n",
             call. = FALSE)
    if(ncontrasts == 2)
        name <- c(paste("Genes unique to", colnames(dtmat)),
                  paste("Genes in intersection of", intNames(dtmat)))
    if(ncontrasts == 3)
        name <- c(paste("Genes unique to", colnames(dtmat)),
                  paste("Genes in intersection of", intNames(dtmat)),
                  "Genes common to all comparisons")
    if(ncontrasts == 4)
        return(venn4Way(fit = fit, contrast = contrast, p.value = p.value,
                        lfc = lfc, adj.meth = adj.meth, baseUrl = baseUrl, reportDirectory = reportDirectory,
                        affy = affy, probecol = probecol, ...))
        
    
    coefind <- switch(ncol(dtmat)-1,
                      list(1,2,1:2),
                      list(1,2,3,1:2,c(1,3), 2:3, 1:3))
    
    ## Remove illegal characters from filenames
    if(length(grep("[/|\\|?|*|:|<|>|\"|\\|]", name)) > 0)
        warning(paste("Some illegal characters have been removed from the filenames",
                      name, sep = " "), call. = FALSE)
    name <- gsub("[/|\\|?|*|:|<|>|\"|\\|]", "", name)

    if(!is.null(titleadd))
        titles <- paste(name, titleadd)
    else
        titles <- name

    if(!is.null(fileadd))
        name <- paste(name, fileadd)
    
    
    indices <- makeIndices(dtmat, method = method)
    ind <- which(sapply(indices, sum) > 0)
    venncounts <- vennCounts2(dtmat, method = method)
    
    vennout <- lapply(seq(along = indices), function(x) HTMLReport(gsub(" ", "_", name[x]), titles[x],
                          baseUrl = baseUrl,reportDirectory = reportDirectory))
    ## require(annotation(eset), character.only = TRUE, quietly = TRUE)
    ps <- apply(fit$p.value, 2, p.adjust, method = adj.meth)
    colnames(ps) <- paste0(colnames(ps), ".p.value")
    coefs <- fit$coefficients
    colnames(coefs) <- paste0(colnames(coefs), ".logFC")
    csvlst <- lapply(seq(along = indices), function(x) cbind(coefs[indices[[x]], coefind[[x]], drop = FALSE],
                         ps[indices[[x]], coefind[[x]], drop = FALSE]))
    ## previously we used the ExpressionSet only to figure out the annotation source.
    ## instead of that, we now rely upon the MArrayLM object containing the annotations we want to use
    rn <- row.names(fit$coef)
    if(is.null(rn)) rn <- seq_len(nrow(fit$coef))
    if(is.null(fit$genes)){
        warning(paste("\nThere are no annotations in the MArrayLM object, using",
                      if(is.null(row.names(fit$coef))) "row indices" else "row names",
                      "from the MArrayLM object instead"))
        annotlst <- lapply(indices, function(x) if(sum(x) > 0) data.frame(ID = rn[x]))
    } else {
        annotlst <- lapply(indices, function(x) if(sum(x) > 0) fit$genes[x,,drop = FALSE])
    }
                       
    csvlst <- mapply(data.frame, annotlst, csvlst, SIMPLIFY = FALSE)
    fixed.dflst <- lapply(csvlst[ind], fixHeaderAndGo, affy = affy, probecol = probecol)
    csvlst[ind] <- lapply(fixed.dflst, function(x) x$df)
    mdf <- fixed.dflst[[1]]$mdf
    
    if(length(mdf) > 0)
        lapply(ind, function(x) publish(csvlst[[x]], vennout[[x]],  .modifyDF = mdf))
    else
        lapply(ind, function(x) publish(csvlst[[x]], vennout[[x]]))
    
    vennpaths <- gsub("html$", "txt", sapply(vennout, path))
    lapply(ind, function(x) write.table(csvlst[[x]], vennpaths[x],
                                        sep = "\t", quote = FALSE, row.names = FALSE, na = ""))
    
    lapply(vennout[ind], finish)
    return(list(vennout = vennout, venncounts = venncounts))
}

           
    
## makeLegend <- function(groups, dir, fname){
##     nam <- paste(dir, fname, sep = "/")
##     png(nam, height = 250, width = 300)
##     plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
##     cols <- lattice.options()$default.theme$superpose.symbol$col
##     legend(1,1, levels(groups), pch = 16, col = cols[sort(unique(as.numeric(groups)))],
##            xjust = 0.5, yjust=0.5, cex = 1.5, bty="n")
##     dev.off()
##     fname
## }



##' This function is designed to output CSV and HTML tables based on an analysis
##' using the limma or edgeR packages, with output generated using the ReportingTools
##' package. Please note that a DGEGLM object from edgeR is simply converted to an
##' MArrayLM object from limma and then used in the default MArrayLM method, so all
##' arguments for the MArrayLM object pertain to the DGEGLM method as well.
##' 
##' The purpose of this function is to output HTML and text tables with lists of
##' genes that fulfill the criteria of a call to
##' \code{\link[limma]{decideTests}} as well as the direction of differential
##' expression. This is a high-level function that calls \code{vennSelect2}
##' internally, and is intended to be used with \code{vennPage} to create a set
##' of Venn diagrams (on an HTML page) that have clickable links in each cell of
##' the diagram. The links will then pass the end user to individual HTML pages
##' that contain the genes that are represented by the counts in a given cell of
##' the Venn diagram.
##' 
##' In general, the only thing that is needed to create a set of Venn diagrams
##' is a list of numeric vectors that indicate the columns of the contrast
##' matrix that are to be used for a given diagram. See the example below for a
##' better explanation.
##' 
##' Some important things to note: First, the names of the HTML and text tables
##' are extracted from the \code{colnames} of the \code{TestResults} object,
##' which come from the contrasts matrix, so it is important to use something
##' descriptive. Second, the method argument is analogous to the \code{include}
##' argument from \code{\link[limma:venn]{vennCounts}} or
##' \code{\link[limma:venn]{vennDiagram}}. Choosing "both" will select genes
##' that are differentially expressed in one or more comparisons, regardless of
##' direction. Choosing "up" or "down" will select genes that are only
##' differentially expressed in one direction. Choosing "same" will select genes
##' that are differentially expressed in the same direction. Choosing "sameup"
##' or "samedown" will select genes that are differentially expressed in the
##' same direction as well as 'up' or 'down'.
##' 
##' Note that this is different than sequentially choosing "up" and then "down".
##' For instance, a gene that is upregulated in one comparison and downregulated
##' in another comparison will be listed in the intersection of those two
##' comparisons if "both" is chosen, it will be listed in only one comparison
##' for both the "up" and "down" methods, and it will be listed in the union
##' (e.g., not selected) if "same" is chosen.
##' 
##' Unlike \code{vennSelect}, this function automatically creates both HTML and
##' CSV output files.
##'
##' Also please note that this function relys on annotation information contained in
##' the "genes" slot of the "fit" object. If there are no annotation data, then
##' just statistics will be output in the resulting HTML tables.
##'
##' @title High-level function for making Venn diagrams and outputting the results from
##' the diagrams in HTML and CSV files.
##' @param object An \code{\link[limma:marraylm]{MArrayLM}} or \code{\link[edgeR:DGEGLM-class]{DGEGLM}} object.
##' @param contrast A contrasts matrix, produced either by hand, or by a call to
##' \code{\link[limma]{makeContrasts}}
##' @param design A design matrix.
##' @param groups This argument is used when creating a legend for the resulting
##' HTML pages. If NULL, the groups will be generated using the column names of
##' the design matrix. In general it is best to leave this NULL.
##' @param collist A list containing numeric vectors indicating which columns of
##' the fit, contrast and design matrix to use. If \code{NULL}, all columns will
##' be used.
##' @param p.value A p-value to filter the results by.
##' @param lfc A log fold change to filter the results by.
##' @param method One of "same", "both", "up", "down", "sameup", or "samedown".
##' See details for more information.
##' @param adj.meth Method to use for adjusting p-values. Default is 'BH', which
##' corresponds to 'fdr'. Ideally one would set this value to be the same as was
##' used for \code{\link[limma]{decideTests}}.
##' @param titleadd Additional text to add to the title of the HTML tables.
##' Default is NULL, in which case the title of the table will be the same as
##' the filename.
##' @param fileadd Additional text to add to the name of the HTML and CSV
##' tables. Default is NULL.
##' @param baseUrl A character string giving the location of the page in terms
##' of HTML locations. Defaults to "."
##' @param reportDirectory A character string giving the location that the
##' results will be written. Defaults to "./venns"
##' @param affy Boolean. Are these Affymetrix data, and should hyperlinks to the affy website
##' be generated in the HTML tables?
##' @param probecol This argument is used in concert with the preceding argument. If these are Affymetrix data
##' , then specify the column header in the \code{\link[limma:marraylm]{MArrayLM}} object that contains the Affymetrix IDs. Defaults to
##' "PROBEID", which is the expected result if the data are annotated using a BioC annotation package.
##' @param ... Used to pass other arguments to lower level functions.
##' @return A list containing the output from calling \code{vennSelect2} on the
##' columns specified by the collist argument. This is intended as input to
##' \code{vennPage}, which will use those data to create the HTML page with Venn
##' diagrams with clickable links.
##' @author James W. MacDonald \email{jmacdon@@u.washington.edu}
##' @keywords manip
##' @examples
##' 
##'   \dontrun{
##'     mat <- matrix(rnorm(1e6), ncol = 20)
##'     design <- model.matrix(~factor(1:4, each=5))
##'     colnames(design) <- LETTERS[1:4]
##'     contrast <- matrix(c(1,-1,0,0,1,0,-1,0,1,0,0,-1,0,1,-1,0,0,1,0,-1),
##'     ncol = 5)
##'     colnames(contrast) <- paste(LETTERS[c(1,1,1,2,2)],
##'     LETTERS[c(2,3,4,3,4)], sep = " vs ")
##'     fit <- lmFit(mat, design)
##'     fit2 <- contrasts.fit(fit, contrast)
##'     fit2 <- eBayes(fit2)
##'     ## two Venn diagrams - a 3-way Venn with the first three contrasts
##'     ## and a 2-way Venn with the last two contrasts
##'     collist <- list(1:3,4:5)
##'     venn <- makeVenn(fit2, contrast, design, collist = collist)
##'     vennPage(venn, "index.html", "Venn diagrams")
##'     }
##' 
##' @describeIn makeVenn Make a Venn diagram using an MArrayLM object.
##' @export makeVenn
setMethod("makeVenn", "MArrayLM",
          function(object, contrast, design, groups = NULL, collist = NULL,
                   p.value = 0.05, lfc = 0, method = "both", adj.meth = "BH",
                   titleadd = NULL, fileadd = NULL, baseUrl = ".", reportDirectory = "./venns",
                   affy = TRUE, probecol = "PROBEID", ...){
    if(is.null(collist)){
        if(ncol(object$coef) > 4) stop("You can only make Venn diagrams with four or fewer contrasts!\n\n",
                                    call. = FALSE)
        collist <- list(seq_len(ncol(object$coef)))
    }
    vennlst <- lapply(seq(along = collist), function(x) 
        vennSelect2(fit = object, contrast = contrast, design = design, 
                    groups = groups, cols = collist[[x]], p.value = p.value, lfc = lfc,
                    method = method, adj.meth = adj.meth, titleadd = titleadd,
                    fileadd = fileadd, baseUrl = baseUrl, 
                    reportDirectory = paste0(reportDirectory, "/venn", x),
                    affy = affy, probecol = probecol, ...))
    vennlst
})


##' @param comp.method Character. For DGEGLM objects, the DGEGLM object must first be processed using one of \code{\link[edgeR:glmfit]{glmLRT}},
##' \code{\link[edgeR]{glmQLFTest}}, or \code{\link[edgeR]{glmTreat}}. Choose glmLRT if you fit a model using
##' \code{\link[edgeR:glmfit]{glmFit}}, glmQLFTest if you fit a model using \code{\link[edgeR]{glmQLFit}}, or glmTreat if
##' you fit either of those models, but want to incorporate the log fold change into the comparison.
##'  
##' 
##' @describeIn makeVenn Make a Venn diagram using a DGEGLM object.
##' @export
setMethod("makeVenn", "DGEGLM",
          function(object, contrast, design, comp.method = c("glmLRT","glmQLFTest", "glmTreat"), lfc = 0, ...){
    comp.method <-  match.arg(comp.method, c("glmLRT","glmQLFTest", "glmTreat"))
    if(comp.method == "glmTreat" && lfc <= 0)
        stop(paste("When using the glmTreat method you must also choose a non-zero lfc value."), call. = FALSE)
    lst <- switch(comp.method,
                  glmLRT = lapply(seq_len(ncol(contrast)), function(x) glmLRT(object, contrast = contrast[,x])),
                  glmQLFTest = lapply(seq_len(ncol(contrast)), function(x) glmQLFTest(object, contrast = contrast[,x])),
                  glmTreat = lapply(seq_len(ncol(contrast)), function(x) glmTreat(object, contrast = contrast[,x], lfc = lfc)))
    object <- new("MArrayLM", list(p.value = do.call(cbind, lapply(lst, function(x) x$table$PValue)),
                                   coefficients = do.call(cbind, lapply(lst, function(x) x$table$logFC)),
                                   t = do.call(cbind, lapply(lst, function(x) {col <- grep("LR|F", names(x$table))
                                                          return(x$table[,col])})),
                                   genes = object$genes))
    colnames(object$p.value) <- colnames(object$coefficients) <- colnames(contrast)
    makeVenn(object, contrast, design, lfc = lfc, affy = FALSE, ...)
})
   


##' High-level function for making Venn diagrams with clickable links to HTML
##' pages with the underlying genes.
##' 
##' This function is designed to be used in conjunction with the \code{makeVenn}
##' function, to first create a set of HTML pages containing the genes that are
##' represented by the cells of a Venn diagram, and then create an HTML page
##' with the same Venn diagrams, with clickable links that will point the end
##' user to the HTML pages.
##' 
##' This function is intended to be used as part of a pipeline, by first calling
##' \code{makeVenn} and then using the output from that function as input to
##' this function to create the HTML page with clickable links.
##' 
##' @param vennlst The output from \code{makeVenn}.
##' @param pagename Character. The file name for the resulting HTML page.
##' Something like 'venns' is reasonable. Note that the .html will automatically
##' be appended.
##' @param pagetitle Character. The heading for the HTML page.
##' @param cex.venn Numeric. Adjusts the size of the font in the Venn diagram.
##' Usually the default is OK.
##' @param shift.title Boolean. Should the right contrast name of the Venn
##' diagram be shifted down? Useful for long contrast names. If a two-way Venn
##' diagram, this will shift the right name down so they don't overlap. If a
##' three-way Venn diagram, this will shift the top right name down.
##' @param baseUrl Character. The base URL for the resulting HTML page. The
##' default of "." is usually optimal.
##' @param reportDirectory If \code{NULL}, the reportDirectory will be extracted
##' from the vennlst. This is usually what one should do.
##' @param ... To allow passing other arguments to lower level functions.
##' Currently not used.
##' @return An HTMLReport object. If used as input to the ReportingTools
##' \code{publish} function, this will create a link on an index page to the
##' Venn diagram HTML page. See e.g., the microarray analysis vignette for
##' ReportingTools for more information.
##' @author James W. MacDonald \email{jmacdon@@u.washington.edu}
##' @keywords manip
##' @examples
##' 
##'   \dontrun{
##'     mat <- matrix(rnorm(1e6), ncol = 20)
##'     design <- model.matrix(~factor(1:4, each=5))
##'     colnames(design) <- LETTERS[1:4]
##'     contrast <- matrix(c(1,-1,0,0,1,0,-1,0,1,0,0,-1,0,1,-1,0,0,1,0,-1),
##'     ncol = 5)
##'     colnames(contrast) <- paste(LETTERS[c(1,1,1,2,2)],
##'     LETTERS[c(2,3,4,3,4)], sep = " vs ")
##'     fit <- lmFit(mat, design)
##'     fit2 <- contrasts.fit(fit, contrast)
##'     fit2 <- eBayes(fit2)
##'     ## two Venn diagrams - a 3-way Venn with the first three contrasts
##'     ## and a 2-way Venn with the last two contrasts
##'     collist <- list(1:3,4:5)
##'     venn <- makeVenn(fit2, contrast, design, eset, collist = collist)
##'     vennreport <- vennPage(venn, "index.html", "Venn diagrams")
##'     indexPage <- HTMLReport("index", "My results", reportDirectory =
##'     ".", baseUrl = ".")
##'     publish(vennreport)
##'     finish(indexPage)
##'     }
##' 
##' @export vennPage

vennPage <- function(vennlst, pagename, pagetitle, cex.venn = 1, shift.title = FALSE,
                     baseUrl = ".", reportDirectory = NULL, ...){
    if(is.null(reportDirectory)){
        notnull <- sapply(vennlst, function(x) !is.null(x$vennout))
        if(!all(notnull))
            use.this <- which(notnull)[1]
        else
            use.this <- 1
        
        tmp <- strsplit(path(vennlst[[use.this]]$vennout[[1]]), "/")[[1]]
        reportDirectory <- paste(tmp[1:2], collapse = "/")
    }
    hpage <- openPage(paste0(pagename, ".html"), dirname = reportDirectory)
    hwrite(paste("The Venn diagrams all contain clickable links. Click on the counts",
                          "in any cell to see a table of the genes in that cell.",
                          "Also please note that the tables are sortable - simply click",
                          "on any header to sort on that column."), hpage,
                    br = TRUE)
                        
    lapply(seq(along = vennlst), function(x) drawVenn(vennlst[[x]], page = hpage, 
               dir = reportDirectory, num = x, cex = cex.venn, shift.title = shift.title, ...))
    closePage(hpage)
    paste0(reportDirectory, "/", pagename, ".html")
}

##' A function to generate Venn diagrams for use within Rmarkdown documents, particularly for
##' those using the Bioconductor BiocStyle package for formatting.
##'
##' This function is intended for those who use Rmarkdown documents to present results
##' and who would like to include Venn diagrams showing the overlap between two to four
##' contrasts. The Venn diagrams that are generated include links for each cell of the
##' diagram that will open HTML pages that contain results for the genes that are found
##' within the cell of the Venn diagram.
##'
##' Please note that this function is tailored specifically for use within Rmarkdown documents,
##' particularly those that use the Bioconductor BiocStyle package. The function call should be present in a
##' code block using the argument results = "asis", because we are directly generating HTML rather
##' than placing a figure.
##' @title Generate Venn diagrams with links for Rmarkdown documents
##' @param vennlst The output from \code{makeVenn}.
##' @param caplst A list of captions to accompany each Venn diagram. 
##' @param cex.venn Adjustment parameter for the numbers in the Venn diagram. The default is usually OK.
##' @param shift.title Boolean. Should the titles for the Venn diagram be shifted to accommodate long contrast names?
##' @param reportDirectory Directory containing the Venn diagram. This is usually set by \code{makeVenn} and for most
##' people, the default \code{NULL} argument should be used.
##' @param ... Allows users to pass arbitrary arguments to lower level functions.
##' @return This function returns the required HTML text to generate the Venn diagram
##' @author James W.  MacDonald \email{jmacdon@@u.washington.edu}
##' @keywords manip
##' @seealso \code{vennPage} particularly for the example.
##' @export vennInLine
vennInLine <- function(vennlst, caplst, cex.venn = 1, shift.title = FALSE, reportDirectory = NULL, ...){
    if(is.null(reportDirectory)){
        notnull <- sapply(vennlst, function(x) !is.null(x$vennout))
        if(!all(notnull))
            use.this <- which(notnull)[1]
        else
            use.this <- 1
        
        tmp <- strsplit(path(vennlst[[use.this]]$vennout[[1]]), "/")[[1]]
        reportDirectory <- paste(tmp[1:2], collapse = "/")
    }
    lapply(seq(along = vennlst), function(x)
        drawVenn(vennlst[[x]], page = "", dir = reportDirectory, num = x,
                 shift.title = shift.title, inline = TRUE, caption = caplst[[x]], ...))
    
    return(invisible())
}                                                  
    

    
drawVenn <- function(lst, page, dir, num, cex = 1, shift.title = FALSE, inline = FALSE, ...){
    nam <- paste0(dir, "/venn", num, ".png")
    nam2 <- paste0("venn", num, ".png")
    mapname <- paste0("#venn", num)
    png(nam, height = 800, width = 800)
    if(is(lst$venncounts, "VennCounts")){
        if(shift.title) colnames(lst$venncounts)[1] <- paste0(colnames(lst$venncounts)[1], "\n\n")
        vennDiagram(lst$venncounts, cex = cex)
    } else if(is(lst$venncounts, "venn")) {
        plot(lst$venncounts)
        addLines(lst$nam)
    } else {
        stop(paste("You cannot create a Venn diagram with an object of class",
                      class(lst$venncounts)), call. = FALSE)
    }
    dev.off()
    if(!inline){
        hwrite(hmakeTag("img", border = 0, width = 800, height = 800,
                        src = nam2, alt = nam2, usemap = mapname), page)
        hwriteImage(paste("Venn Diagram", num), page)
        if(!is.null(lst[[1]]))
            vennLinks(lst, page, mapname, dir)
    } else {
        inLineVennLinks(nam, mapname, lst, tabindex = TRUE, ...)
    }
}

vennLinks <- function(lst, page, mapname, reportDirectory){
    if(is.null(page)) page <- ""
    fun <- function(x,y) paste0('<area shape="circle" coords=',
                                    x, ' href=', y, '>')
    if(is(lst$venncounts, "VennCounts")){
        if(ncol(lst$venncounts) == 3){
            loclst <- list("250,400,30","550,400,30","400,400,30")
        } else {
            loclst <- list("260,310,30","540,300,30","400,550,30",
                           "400,320,30","330,420,30","470,420,30",
                           "400,390,30")
        }
    } else {
        loclst <- list("140,315,30", "315,215,30","510,215,30","685,325,30","230,270,30",
                       "230,530,30","415,625,30","415,255,30","575,530,30","595,270,30",
                       "300,345,30","485,585,30","340,585,30","530,345,30","415,470,30")
    }
    urlst <- gsub(reportDirectory, ".", sapply(lst$vennout, path))
    strng <- do.call("c", mapply(fun, loclst, urlst, SIMPLIFY = FALSE))
    strng <- c(paste0('<map name="', sub("#", "", mapname), '">'),
               strng, "</map>")
    cat(strng, file = page, sep = "\n")
}

## Used to generate the <area> links for an inline Venn diagram
inLineVennLinks <- function(png, mapname, links, caption = NULL, tabindex = TRUE, ...){
    if(substr(mapname, 1, 1) != "#") mapname <- paste0("#", mapname)
    fun <- function(x, y) paste0("<area shape=\"circle\" coords=", 
                                 x, " href=", y, ">")
    cat("<div class=\"figure\">\n")
    cat(hwrite(hmakeTag("img", border = 0, width = 800,
                                      height = 800, src = png, alt = png,
                        usemap = mapname)), "\n")
    loclst <- list(one = NULL, two = list("180,250,30","350,250,30","265,250,30"),
                     three = list("157,197,30","350,202,30","250,354,30",
                                  "250,209,30","200,283,30","316,281,30",
                                  "250,259,30"),
                     four = list("100,215,30", "200,125,30","330,135,30","435,205,30","155,175,30",
                       "160,330,30","265,400,30","270,170,30","385,330,30","390,175,30",
                       "200,220,30","315,365,30","215,365,30","340,225,30","265,300,30"))
    loclst <- loclst[[ncol(links$venncounts)-1]]
    urlst <- sapply(links$vennout, path)
    if(tabindex) for(i in seq(along = urlst)) urlst[[i]] <- paste0(urlst[[i]], " tabindex=\"", i, "\"")
    strng <- do.call("c", mapply(fun, loclst, urlst, SIMPLIFY = FALSE))
    strng <- c(paste0("<map name=\"", sub("#", "", mapname), 
        "\">"), strng, "</map>")
    cat(strng, sep = "\n")
    if(!is.null(caption))
        cat(paste0("<p class=\"caption\">\n(#fig:", sub("#", "", mapname), ") ",
                   caption, "\n</p>\n</div>"), "\n")
}


## code for 4-way venn diagrams with colored lines under the counts

addLines <- function(comps, col = c("#66FF33","#FF9933", "#000000","#9900CC")){
    x <- rep(c(35,140,260,365,90,95,200,200,300,310,130,245,155,270,200),
             c(1,1,1,1,2,2,2,2,2,2,3,3,3,3,4))
    y <- rep(c(250,315,315,250,280,110,50,290,110,280,230,95,95,230,150),
             c(1,1,1,1,2,2,2,2,2,2,3,3,3,3,4))
    x0 <- x - 10
    x1 <- x + 10
    ind <- c(rep(1, 4), rep(1:2, times=6), rep(1:3, times=4), 1:4)
    y <- y - c(7.5, 10, 12.5, 15)[ind]
    ind2 <- c(1,2,3,4,1,2,1,3,1,4,2,3,2,4,3,4,1,2,3,1,2,4,1,3,4,2,3,4,1,2,3,4)
    segments(x0, y, x1, y, col = col[ind2], lwd = 2)
    legend("topleft", comps, lty = 1, lwd = 2, col = col, bty = "n",
           cex = 0.8)
}
##' A function to create a 4-way Venn diagram
##'
##' This function is an internal function and not really intended to be called by the end user. It is generally called by the \code{vennPage}
##' function. The goal is to create a 4-way Venn diagram in an HTML page with clickable links to tables of the genes found in a given cell.
##' In addition, the numbers in each cell are underlined with colored bars that help end users tell what contrasts are captured by that cell.
##' @title 4-way Venn Diagrams
##' @param fit An \code{MArrayLM} object, created by the limma package.
##' @param contrast A contrasts matrix, used by limma to generate the comparisons made.
##' @param p.value A p-value cutoff for significance
##' @param lfc A log fold change cutoff
##' @param adj.meth The method used to adjust for multiple comparisons.
##' @param baseUrl The base directory for the tables generated. Defaults to ".", meaning the current directory. 
##' @param reportDirectory The directory in which to put the results. Defaults to a "venns" subdirectory.
##' @param affy Boolean. Set to \code{TRUE} if using Affymetrix microarrays.
##' @param probecol The column containing either the Affymetrix probeset IDs (if the affy argument is set to \code{TRUE}) or
##' the name of a column in the output tables that contains uinque identifiers (Entrez Gene IDs, gene symbols, etc).
##' @param ... Allows arbitrary arguments to be passed to lower level functions
##' @return Returns a list. The first item is a (list of) HTMLReportRef objects that can be used by ReportingTools to create HTML links.
##' The second item is the output from the \code{venn} function in gtools, and the third item is the name of the contrasts used to generate
##' the Venn diagram.
##' @author James W. MacDonald \email{jmacdon@@u.washington.edu}
venn4Way <- function(fit, contrast,  p.value, lfc, adj.meth, baseUrl = ".", reportDirectory = "./venns", affy = TRUE,
                     probecol = "PROBEID", ...){
    ##require("gplots", character.only = TRUE) || stop("The gplots package is required for 4-way Venns.\n", call. = FALSE) 
    nam <- paste0(gsub(" ", ".", colnames(contrast)), ".")
    ## do some munging to get the right columns
    allcols <- c("PROBEID","ID","SYMBOL","GENENAME","logFC","t",switch(adj.meth, none = "P.Value", "adj.P.Val"))
    shortcols <- c("logFC","t",switch(adj.meth, none = "P.Value", "adj.P.Val"))
    outlst <- lapply(1:ncol(contrast), function(x) tableFilt(fit, x, pfilt = p.value, fldfilt = lfc, adjust = adj.meth))
    allcols <- match(allcols[allcols %in% names(outlst[[1]])], names(outlst[[1]]))
    shortcols <- match(shortcols[shortcols %in% names(outlst[[1]])], names(outlst[[1]]))
    theplot <- venn(lapply(outlst, function(x) x[,1]), show.plot = FALSE)
    for(i in seq_len(length(nam))) names(outlst[[i]][allcols]) <- paste0(rep(c("", nam[i]),
                                                                             c(length(setdiff(allcols, shortcols)),length(shortcols))),
                                                                             names(outlst[[i]][allcols]))
    vcind <- c(1,4,9,16)
    colind <- grep(probecol, names(outlst[[1]]))
    colind <- if(length(colind) > 0) {
                  colind
              } else {
                  allcols[1]
                  warning(paste("Using the", names(outlst[[1]])[allcols[1]], "column to uniquely identify genes."), call. = FALSE)
              }
    vennlst2 <- do.call("rbind", lapply((1:4)[sapply(outlst, nrow) > 0], function(x) data.frame(PROBEID = outlst[[x]][,colind], grp = vcind[x])))
    vennlst2[,1] <- factor(vennlst2[,1])
    vennlst2 <- sapply(split(vennlst2[,2], vennlst2[,1]), sum)
    vennlst3 <- lapply(c(1,4,5,9,10,13,14,16,17,20,21,25,26,29,30), function(x) names(vennlst2[vennlst2 == x]))
    indmat <- rbind(diag(4),cbind(1, diag(3)), cbind(0,1,diag(2)), c(0,0,1,1))
    z <- matrix(1, 4, 4)
    diag(z) <- 0
    indmat <- rbind(indmat, z[4:1,], rep(1,4))
    indmat <- matrix(as.logical(indmat), ncol = 4)
    theind <- c(1,2,4,8,3,5,9,6,10,12,7,11,13,14,15)
    fn <- LETTERS[1:4]
    out <- lapply(seq_len(length(theind)), function(x) {
        if(sum(indmat[x,]) == 1) {
            return(outlst[[which(indmat[x,])]][outlst[[which(indmat[x,])]][,colind] %in% vennlst3[[theind[x]]],allcols])
        }else{
            littleind <- which(indmat[x,])
            tmp1 <- outlst[[littleind[1]]][match(vennlst3[[theind[x]]], outlst[[littleind[1]]][,colind]),allcols]
            tmp2 <- do.call("cbind", lapply(littleind[-1], function(y)
                                            outlst[[y]][match(vennlst3[[theind[x]]], outlst[[y]][,colind]),shortcols]))
            return(cbind(tmp1, tmp2))
        }
    })
    ind <- which(sapply(out, nrow) > 0)
    nam <- sapply(seq_len(length(out)), function(x) paste0(paste(fn[indmat[x,]], collapse = "_"), "_venn"))
    vennout <- lapply(seq_len(length(out)), function(x) HTMLReport(nam[x], nam[x], baseUrl = baseUrl, reportDirectory = reportDirectory))
    lapply(ind, function(x) publish(out[[x]], vennout[[x]], if(affy) .modifyDF = list(affyLinks)))
    lapply(vennout[ind], finish)
    lapply(ind, function(x) write.table(out[[x]], paste0(reportDirectory, "/", nam[x], ".txt"), sep = "\t",
                                        quote = FALSE, row.names = FALSE, na = ""))
    return(list(vennout = vennout, venncounts = theplot, nam = colnames(contrast)))
}



##' A function to add dotplot glyphs and links to HTML tables
##'
##' This function is intended to create little dotplot glyphs that can be added to an HTML table of
##' results from e.g., a microarray or RNA-Seq experiment, showing graphically how much the different groups
##' are changing. The glyphs have unlabeled axes to make them small enough to fit in an HTML table, and clicking
##' on a glyph will result in a new page loading with a full sized dotplot, complete with axis labels.
##'
##' This function is very similar to the stock functions in the ReportingTools package, but the standard
##' glyphs for that package consist of a dotplot on top of a boxplot, which seems too busy to me. In addition,
##' for most microarray analyses there are not enough replicates to make a boxplot useful.
##' @title Add dotplot images
##' @param df A data.frame from calling \code{topTable}. Note that the row.names for this data.frame must
##' be consistent with the "eset" object. In other words, if "eset" is an \code{ExpressionSet}, then the row.names
##' of the data.frame must consistent with the featureNames of the \code{ExpressionSet}. 
##' @param eset A matrix, data.frame, or \code{ExpressionSet}. If using RNA-Seq data, use \code{voom} from edgeR to create
##' an \code{EList} object, and then pass in the "E" list item.
##' @param grp.factor A factor that indicates which group ALL of the samples belong to. This will be subsetted internally,
##' so do not subset yourself.
##' @param design The design matrix used by limma or edgeR to fit the model.
##' @param contrast The contrast matrix used by limma or edgeR to make comparisons.
##' @param colind Which column of the contrast matrix are we using? In other words, for which comparison are we creating a table?
##' @param boxplot Boolean. If \code{TRUE}, the output HTML table will have a boxplot showing differences between groups. If
##' \code{FALSE} (default), the table will have dotplots. 
##' @param repdir A directory in which to put the HTML tables. Defaults to a "reports" directory in the working directory.
##' @param extraname By default, the tables will go in a "reports" subdirectory, and will be named based on the column
##' name of the contrast that is specified by the colind argument (after replacing any spaces with an underscore). If this
##' will result in name collisions (e.g., a previous file will be over-written because the resulting names are the same),
##' then an extraname can be appended to ensure uniqueness.
##' @param weights Array weights, generally from \code{arrayWeights} in the limma package. These will affect the size
##' of the plotting symbols, to reflect the relative importance of each sample.
##' @param insert.after Which column should the image be inserted after? Defaults to 3.
##' @param altnam Normally the output file directories are generated from the colnames of the contrast matrix.
##' This argument can be used to over-ride the default, particularly in the case that one is computing an F-test using a
##' set of columns from the contrast matrix.
##' @param \dots Allows arbitrary arguments to be passed down to lower level functions.
##' @return A list, two items. The first item is the input data.frame with the glyphs included, ready to be used with
##' ReportingTools to create an HTML table. The second item is a pdf of the most differentially expressed comparison. This is
##' useful for those who are using e.g., knitr or Sweave and want to be able to automatically insert an example dotplot
##' in the document to show clients what to expect.
##' @author James W. MacDonald \email{jmacdon@@u.washington.edu}
##' @export makeImages
makeImages <- function(df, eset, grp.factor, design, contrast, colind, boxplot = FALSE, repdir = "./reports", extraname = NULL, weights = NULL,
                       insert.after = 3, altnam = NULL, ...){
    ## check that this is going to work
    eclass <- class(eset)[1]
    addtrailingslash <- function(path){
        tmp <- strsplit(repdir, "")[[1]]
        if(!tmp[length(tmp)] %in% "/") path <- paste0(path, "/")
        path
    }
    repdir <- addtrailingslash(repdir)
    rn <- switch(eclass,
                 ExpressionSet = featureNames(eset),
                 matrix = row.names(eset),
                 data.frame = row.names(eset),
                 DGEList = row.names(eset$counts),
                 stop(paste0("The 'eset' argument is of class", eclass, ". This function can only use",
                            "an ExpressionSet, matrix, data.frame or DGEList object"), call. = FALSE))
    
    if(!all(row.names(df) %in% rn))
        stop(paste0("The row.names of your input data.frame do not match up with the data in your", eclass, ".",
                    " You need to fix that before proceeding!\n"), call. = FALSE)
    if(is.null(altnam))
        figure.directory <- paste0(repdir, gsub(" ", "_", colnames(contrast)[colind]))
    else
        figure.directory <- paste0(repdir, gsub(" ", "_", altnam))
    if(!is.null(extraname)) figure.directory <- paste0(figure.directory, extraname)
    dir.create(figure.directory, recursive = TRUE)
    ind <- apply(design[,apply(contrast[,colind, drop = FALSE], 1, function(x) any(x != 0)), drop = FALSE], 1, sum) > 0
    grp.factor <- factor(grp.factor[ind])
    eset <- eset[,ind]
    if(!is.null(weights)) weights <- weights[ind]
    colnames(df)[colnames(df) == "SYMBOL"] <- "Symbol"
    makeGenePlots(df = df, expression.dat = eset, factor = grp.factor, figure.directory = figure.directory,
                  boxplot = boxplot, weights = weights, ...)
   
    figure.directory <- gsub(repdir, "", figure.directory)
    mini.image <- file.path(figure.directory, paste("mini", 
                                                   rownames(df), "png", sep = "."))
    pdf.image <- file.path(figure.directory, paste("boxplot", 
                                                  rownames(df), "pdf", sep = "."))
    df <- data.frame(df[,1:insert.after, drop = FALSE], Image = hwriteImage(mini.image, link = pdf.image, table = FALSE),
                     df[,(insert.after + 1):ncol(df), drop = FALSE])
    return(list(df = df, top.pdf = pdf.image[1]))
}


## convert makeGenePlots to ggplot
makeGenePlots <- function (df, expression.dat, factor, figure.directory, boxplot, ylab.type = "Expression Value", 
                           xlab = NULL, weights = NULL, ...) {
    eclass <- class(expression.dat)[1]
    expression.dat <- switch(eclass,
                             ExpressionSet = exprs(expression.dat),
                             matrix = expression.dat,
                             data.frame = as.matrix(expression.dat),
                             DGEList = cpm(expression.dat, log = TRUE),
                             stop(paste0("The 'expression.dat' argument is of class", eclass, ". This function can only use",
                            "an ExpressionSet, matrix, data.frame or DGEList object"), call. = FALSE))
        
    if (any(!rownames(df) %in% rownames(expression.dat))) {
        stop(paste("Can't find expression data for some features\n"))
    }
    for (probe in rownames(df)) {
        if ("Symbol" %in% colnames(df)) {
            ylab <- paste(df[probe, "Symbol"], ylab.type)
        }
        else {
            ylab <- paste(probe, ylab.type)
        }
        if(is.null(weights))
            plotIt <- data.frame(exprs = expression.dat[probe,], factor = factor)
        else
            plotIt <- data.frame(exprs = expression.dat[probe,], factor = factor, weights = weights)
        bigplot <- ggplot(plotIt, aes(factor, exprs, colour = factor)) + labs(x = "", y = ylab) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(legend.position = "none")
        
        if(boxplot) {
            bigplot <- bigplot + geom_boxplot()
            
        } else {
            if(is.null(weights)) {
                bigplot <- bigplot + geom_point()
            } else {
                bigplot <- bigplot + geom_point(aes(size = weights))
            }
        }
        miniplot <- bigplot + theme_blank()
        minipng.filename <- paste("mini", probe, "png", sep = ".")
        minipng.file <- file.path(figure.directory, minipng.filename)
        png(minipng.file, height = 40, width = 200)
        grid.newpage()
        pushViewport(viewport(angle = 270, height = unit(220, 
            "points"), width = unit(44, "points"), name = "VP"))
        print(miniplot, newpage = FALSE)
        upViewport()
        dev.off()
        pdf.filename <- paste("boxplot", probe, "pdf", sep = ".")
        pdf.file <- file.path(figure.directory, pdf.filename)
        pdf(pdf.file, height = 4.5, width = 4.5)
        print(bigplot)
        dev.off()
    }
}


## this stolen from ReportingTools as well
remove.axis.and.padding <- function(plot) {
    plot$par.settings$layout.heights <- list(top.padding = 0, 
        main.key.padding = 0, key.axis.padding = 0, axis.xlab.padding = 0, 
        xlab.key.padding = 0, key.sub.padding = 0, bottom.padding = 0)
    plot$par.settings$layout.widths <- list(left.padding = 0, 
        key.ylab.padding = 0, ylab.axis.padding = 0, axis.key.padding = 0, 
        right.padding = 0)
    plot$x.scales$draw <- FALSE
    plot$y.scales$draw <- FALSE
    plot$xlab <- NULL
    plot$ylab <- NULL
    return(plot)
}

## convert this to ggplot

theme_blank <- function(base_size = 11, base_family = "") {
    theme_bw(base_size = base_size, base_family = base_family) %+replace%
    theme(axis.ticks = element_blank(), axis.title = element_blank(),
          axis.text = element_blank(), legend.position = "none",
          panel.border = element_blank(), panel.grid = element_blank(), complete = TRUE)
    }


##' A function to create an HTML table showing genes that gave rise to a significant GO term
##'
##' This is an internal function, not intended to be called by the end user. Documentation here for clarity.
##' After running a GO analysis, it is advantageous to output a table listing those
##' genes that gave rise to a significant GO term. This function creates the table, along with links to Netaffx
##' (if the data are Affymetrix) and to the NCBI Gene database (if there are Entrez Gene IDs).
##' @title Make Gene table from GO analysis results
##' @param fit.table The output from \link[limma]{topTable}
##' @param probe.sum.table The output from running \link[GOstats]{probeSetSummary} on a \link[GOstats:GOHyperGResult-class]{GOHyperGResults} object.
##' @param go.id The GO ID of interest
##' @param cont.name The contrast name.
##' @param base.dir Character. Where should the HTML tables be generated? Defaults to NULL. 
##' @param extraname Character. An extra name that can be used if the contrast name isn't descriptive enough.
##' @param probecol The column name in the topTable object that contains probe IDs. Defaults to PROBEID.
##' @param affy Boolean. Are the arrays from Affymetrix?
##' @return Returns an \link[ReportingTools:HTMLReportRef-class]{HTMLReportRef} object.
##' @author Jim MacDonald
makeGoGeneTable <- function(fit.table, probe.sum.table, go.id, cont.name, base.dir = NULL, extraname = NULL, 
                        probecol = "PROBEID", affy = TRUE){
    ## Windows doesn't like ':' in a file name and will silently replace with '_' anyway
    go.id <- gsub(":", "_", go.id)
    prbs <- probe.sum.table$ProbeSetID[as.logical(probe.sum.table$selected)]
    out <- fit.table[fit.table[,probecol] %in% prbs, , drop = FALSE]
    rep.dir <- gsub(" ", "_", cont.name)
    if(!is.null(extraname)) rep.dir <- paste0(rep.dir, "/", gsub(" ", "_", extraname))
    fixed <- fixHeaderAndGo(out, affy = affy, probecol = probecol)
    htab <- HTMLReport(go.id, paste0("Genes responsible for significance of ", go.id, " in contrast ", cont.name,
                                         if(!is.null(extraname)) paste(",", extraname)),
                       reportDirectory = rep.dir, basePath = base.dir)
    publish(fixed$df, htab, if(length(fixed$mdf) > 0) .modifyDF = fixed$mdf)
    finish(htab)
    htab
}
##' Internal function used to automatically test for columns that can be converted to links
##'
##' This is an internal function designed to test for the presence of Affymetrix Probeset IDs or
##' Entrez Gene IDs, and if found, generate a list that can be passed to the ReportingTools publish
##' function in order to generate hyperlinks. The underlying assumption is that the data will have been
##' annotated using a Bioconductor annotation package, and thus Affy probeset IDs will have a column header
##' "PROBEID", and Entrez Gene IDs will have a header "ENTREZID" (or any combination of upper and lowercase letters).
##' @title Fix data.frame header for use with ReportingTools
##' @param df A data.frame
##' @param affy Boolean; does the data.frame contain Affymetrix probeset IDs?
##' @param probecol Character. The column header containing Affymetrix probeset IDs. Defaults to "PROBEID".
##' @return Returns a list of length two (with names mdf and df). The mdf object can be passed to the
##' \code{\link[ReportingTools:publish-methods]{publish}} using the .modifyDF argument, and the df object is
##' input dat.frame with column names corrected to conform to \code{affyLinks} and \code{entrezLinks}, so links
##' will be generated correctly.
##' @author Jim MacDonald
fixHeaderAndGo <- function(df, affy = TRUE, probecol = "PROBEID"){
     any.entrez <- grep("entrezid", names(df), ignore.case = TRUE)
     if(length(any.entrez) > 0){
         names(df)[any.entrez] <- "ENTREZID"
         any.entrez <- TRUE
     }
     if(affy){
         any.affy <- grep(probecol, names(df), ignore.case = TRUE)
         if(length(any.affy) > 0) 
             names(df)[any.affy] <- "PROBEID"
         else
             stop(paste("\n\nFailed trying to generate links to the Affymetrix",
                        "website. If these are not Affy data, please set affy = FALSE",
                        "otherwise set the probecol argument to match the column",
                        "containing the Affymetrix Probeset IDs.\n"), call. = FALSE)
     }
     mdf <- list(affyLinks, entrezLinks)[c(affy, any.entrez)]
     return(list(mdf = mdf, df = df))
 }

##' This function is used to create HTML tables to present the results from a Gene Ontology (GO) analysis.
##'
##' After running a GO analysis, it is often useful to first present a table showing the set of significant GO terms
##' for a given comparison, and then have links to a sub-table for each GO term that shows the genes that were responsible
##' for the significance of that term. The first table can be generated using the \link[GOstats:GOHyperGResult-class]{summary} function, but
##' it will not contain the links to the sub-table. The ReportingTools package has functionality to make these tables and sub-tables
##' automatically, but the default is to include extra glyphs in the main table that are not that useful.
##' 
##'This function is intended to generate a more useful version of the table that one normally gets from ReportingTools.
##' @title Create HTML tables for Gene Ontology (GO) analyses
##' @param fit.table The output from \link[limma]{topTable}
##' @param go.summary The output from running \code{summary} on a \link[GOstats:GOHyperGResult-class]{GOHyperGResults} object.
##' @param probe.summary The output from running \link[GOstats]{probeSetSummary} on a \link[GOstats:GOHyperGResult-class]{GOHyperGResults} object.
##' @param cont.name The contrast name.
##' @param base.dir Character. Where should the HTML tables be generated? Defaults to GO_results.
##' @param extraname Character. An extra name that can be used if the contrast name isn't descriptive enough.
##' @param probecol  The column name in the topTable object that contains probe IDs. Defaults to PROBEID.
##' @param affy Boolean. Are the arrays from Affymetrix?
##' @return Returns an \link[ReportingTools:HTMLReportRef-class]{HTMLReportRef} object, which can be used when creating an index page to link
##' to the results.
##' @export
##' @author Jim MacDonald

makeGoTable <- function(fit.table, go.summary, probe.summary, cont.name, base.dir = "GO_results",
                        extraname = NULL, probecol = "PROBEID", affy = TRUE){
    htablst <- lapply(seq_len(nrow(go.summary)), function(x) makeGoGeneTable(fit.table, probe.summary[[x]],
                                                                             as.character(go.summary[x,1]),
                                                                             cont.name, base.dir, extraname,
                                                                             probecol, affy))
    go.summary[,1] <- hwrite(as.character(go.summary[,1]), 
                             link = sapply(htablst, function(x) gsub(paste0(base.dir, "/"), "", path(x))), table = FALSE)
    short.name <- paste0(gsub(" ", "_", cont.name), if(!is.null(extraname)) paste0("_", gsub(" ", "_", extraname)))
    long.name <- paste0(cont.name, if(!is.null(extraname)) paste(",", extraname))
    htab <- HTMLReport(short.name, long.name, reportDirectory = base.dir, baseDir = ".")
    publish(go.summary, htab, .modifyDF = goLinks)
    finish(htab)
    htab
}

#' Filter a topTable object
#' 
#' This function is designed to filter genes from a \code{topTable} object
#' based on p-value and/or fold change. This is an internal function and is not
#' intended to be called by thte end user.
#' 
#' 
#' @param fit An \code{MArrayLM} object, resulting from a call to \code{eBayes}
#' @param coef The contrast to be extracted into the topTable. See ?topTable
#' for more information.
#' @param number The number of genes to output. Only used if both foldfilt and
#' pfilt are NULL.
#' @param fldfilt The absolute value of fold difference to filter on. This
#' assumes the data are log transformed.
#' @param pfilt The p-value to filter on.
#' @param adjust The multiplicity adjustment to use. Options are
#' '"bonferroni"', '"holm"', '"hochberg"', '"hommel"', '"fdr"' and '"none"'. If
#' '"none"' then the p-values are not adjusted. A 'NULL' value will result in
#' the default adjustment method, which is '"fdr"'.
#' @return Returns a \code{data.frame} containing the selected genes.
#' @author James W. MacDonald <jmacdon@@u.washington.edu>
#' @keywords internal
tableFilt <- function(fit, coef = 1,  number = 30, fldfilt = NULL, pfilt = NULL,
                      adjust = "fdr"){
  if(is.null(fldfilt) && is.null(pfilt)){
    tab <- topTable(fit, coef = coef, number = number, adjust.method = adjust, sort.by = "p")
  }else{
    tab <- topTable(fit, coef = coef, number = dim(fit$coefficients)[1],
                    adjust.method = adjust, sort.by = "p")
  }
## Filter on p-value
  if(!is.null(pfilt))
      tab <- tab[tab[,"adj.P.Val"] < pfilt,]
  ## Filter on fold change
  if(!is.null(fldfilt))
    tab <- tab[abs(tab[,"logFC"]) > fldfilt,]
  tab
}

Try the affycoretools package in your browser

Any scripts or data that you put into this service are public.

affycoretools documentation built on Nov. 8, 2020, 6 p.m.