R/GoAnalysis.R

Defines functions GOTerms2Genes.sql goLinksTest enrichment_Analysis GOAnalysis

Documented in GOAnalysis

###########################################################
GOTerms2Genes.sql <- function(hgResult, anotPackage)
{
  selectedGOTerms <- intersect(names(geneIdUniverse(hgResult)), summary(hgResult)[, 1])
  selectedGO<- geneIdUniverse(hgResult)[selectedGOTerms]
  if(runMulticore == 1 || runMulticore == 3) {
    selectedGenes <- mclapply(selectedGO, function(x) {intersect(geneIds(hgResult), x)})
  } else {
    selectedGenes <- lapply(selectedGO, function(x) {intersect(geneIds(hgResult), x)})
  }

  sql.ENTREZSYMBOL <- "SELECT gene_id, symbol
                       FROM genes, gene_info
                       WHERE genes._id=gene_info._id"

  if(regexpr(".db", anotPackage) < 0) {
    genesAnnot <- dbGetQuery(eval(parse(text = paste0(anotPackage, "_dbconn()"))), sql.ENTREZSYMBOL)
  } else {
    genesAnnot <- dbGetQuery(eval(parse(text = paste0(substr(anotPackage, 1, nchar(anotPackage) - 3), "_dbconn()"))), sql.ENTREZSYMBOL)
  }

  if(runMulticore == 1 || runMulticore == 3) {
    selectedSymb <- mclapply(selectedGenes, function(x) {genesAnnot[which(unlist(genesAnnot) %in% x), ]})
    selectedSymb <- mclapply(selectedSymb, function(x) {x <- x[, -1]})
  } else {
    selectedSymb <- lapply(selectedGenes, function(x) {genesAnnot[which(unlist(genesAnnot) %in% x), ]})
    selectedSymb <- lapply(selectedSymb, function(x) {x <- x[, -1]})
  }

  return(selectedSymb)
}
###########################################################
goLinksTest <- function(my.GOIDs) {
  if(runMulticore ==1 || runMulticore ==3) {
    GOIDslinked <- unlist(mclapply(my.GOIDs, function(x) {paste0("<a href=\"http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&query=",
                                                                x, "\">", x, "</a>")}))
  } else {
    GOIDslinked <- unlist(lapply(my.GOIDs, function(x) {paste0("<a href=\"http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&query=",
                                                              x, "\">", x, "</a>")}))
  }

  return(as.character(GOIDslinked))
}
###########################################################
write.htmltable <- function (x, filename, title = "", sortby = NULL, decreasing = TRUE,
                             open = "wt", formatNumeric = function(x) paste(signif(x, 3))) {
  if(!is.null(sortby)) {
    if(!sortby %in% colnames(x))
      stop(paste("Invalid argument \"sortby\": could not find a column in data frame x with name", sortby))

    soby <- x[, sortby]

    if (!is.numeric(soby))
      stop("Invalid argument \"sortby\": column is not numeric")

    x <- x[order(soby, decreasing = decreasing), ]
  }

  outfile <- file(paste0(filename, ".html"), open = open)
  cat("<html>", "<STYLE>", "<!--TD { FONT-FAMILY: Helvetica,Arial; FONT-SIZE: 14px;}-->",
      "<!--H1 { FONT-FAMILY: Helvetica,Arial; FONT-SIZE: 22px;}-->",
      "</STYLE>", "<head>", paste0("<TITLE>", title, "</TITLE>"), "</head>",
      "<body bgcolor=#ffffff>", file = outfile, sep = "\n")

  if(title != "")
    cat("<CENTER><H1 ALIGN=\"CENTER\">", title, " </H1></CENTER>\n", file = outfile, sep = "\n")

  cat("<CENTER> \n", file = outfile)
  cat("<TABLE BORDER=0>", file = outfile, sep = "\n")
  cat("<TR>", file = outfile)

  for (j in 1:ncol(x)) cat("<TD BGCOLOR=\"", c("#e0e0ff", "#d0d0f0")[j%%2 + 1], "\"><B>",
                           colnames(x)[j], "</B></TD>\n", sep = "", file = outfile)

  cat("</TR>", file = outfile)

  for(i in 1:nrow(x)) {
    cat("<TR>", file = outfile)
    for(j in 1:ncol(x)) {
      txt <- switch(class(x[[j]]), numeric = formatNumeric(x[i, j]), as.character(x[i, j]))

      if(length(grep("^http:", txt)) > 0) {
        txt <- sub(";$", "", txt)
        s <- unlist(strsplit(txt, "[/?=]"))
        txt <- paste0("<A HREF=\"", txt, "\" TARGET=\"z\">", s[length(s)], "</A>")
      }
      cat("<TD BGCOLOR=\"", c("#e0e0ff", "#d0d0f0", "#f0f0ff", "#e0e0f0")[i%%2 * 2 + j%%2 + 1],
          "\">", txt, "</TD>\n", sep = "", file = outfile)
    }

    cat("</TR>", file = outfile)
  }

  cat("</TABLE></CENTER>", "</body>", "</html>", sep = "\n", file = outfile)
  close(outfile)
}
###########################################################

enrichment_Analysis <- function(EntrezIDs,
                                anotFName,
                                anotPackage,
                                outputDir,
                                universe = NULL,
                                pval = 0.05,
                                min.count = 3,
                                addGeneNames = TRUE,
                                ontologias = c("MF", "BP", "CC"),
                                testDirections = c("over", "under"),
                                contrast.Name) {
  universe <- unique(universe)
  EntrezIDs <- unique(EntrezIDs)

  params <- new("GOHyperGParams",
                geneIds = EntrezIDs,
                universeGeneIds = universe,
                annotation = anotPackage,
                ontology = "MF",
                pvalueCutoff = pval,
                testDirection = "over")

  resCum <- NULL
  resum <- NULL
  ontoTitle <- c(MF = "Molecular Function", BP = "Biological Process", CC = "Cellular Component")

  for(onto in ontologias) {
    params@ontology <- onto
    dir.i <- 1

    for(direction in testDirections) {
      params@testDirection <- direction
      hgResult <- hyperGTest(params)

      if(addGeneNames) {
        EnrichedGOTerms <- as.character(summary(hgResult)[, 1])
        if(length(EnrichedGOTerms) > 0) {
          selectedSymbols <- GOTerms2Genes.sql(hgResult, anotPackage)
          genesInHg <- sapply(selectedSymbols, function(x) paste(x, collapse = ", "))
          reshgTest <- cbind(summary(hgResult), GeneNames = genesInHg)
        } else {
          reshgTest <- summary(hgResult)
        }
      } else {
        reshgTest <- summary(hgResult)
      }

      colorTest <- ifelse(direction == "over", "red;\">", "green;\">")
      reshgTest <- cbind(reshgTest,
                         OverUnder = rep(paste0("<center><span style=\"color:", colorTest, direction, "</span></center>"),
                                         dim(reshgTest)[1]))
      reshgTest[, 1] <- goLinksTest(reshgTest[, 1])
      colnames(reshgTest)[1] <- "GOID"

      if(dir.i == 1) {
        reshgTest2table <- reshgTest
      } else {
        reshgTest2table <- rbind(reshgTest2table, reshgTest)
      }

      dir.i <- dir.i + 1
    }

    if(dim(reshgTest2table)[1] != 0) {
      reshgTest2table <- cbind(Ontology = paste0("<center>", onto, "</center>"),
                               reshgTest2table)
      resum <- rbind(resum, reshgTest2table)
    }
  }

  if(!is.null(resum)) {
    if(addGeneNames) {
      my.table <- resum[, c(1, 2, 8, 9, 7, 6, 5, 4, 3, 10)]
    } else {
      my.table <- resum[, c(1, 2, 8, 7, 6, 5, 4, 3, 9)]
    }

    write.htmltable(x = my.table,
                    # file = file.path(outputDir, anotFName),                              ### ModAlba
                    filename = file.path(outputDir, anotFName),                            ### ModAlba
                    title =  paste("GO Enrichment Analysis", contrast.Name),
                    open = "wt")

    sortable.html.table(df = my.table,
                        output.file = paste0(anotFName, "-sortable.html"),
                        output.directory = outputDir,
                        page.title = "GO Enrichment Analysis" )
  }

}
###########################################################
#' @name GOAnalysis
#' @title Function that perform enrichment analysis based on the GO for one gene list and all ontologies with the GOstats package.
#' @param fitMain Object generated by the lmFit function.
#' @param whichContrasts Describes the lmFit contrasts whose associated ids will be analyzed.
#' @param comparison.Name Name of the comparison. To identify the output.
#' @param outputDir Path of the file created.
#' @param anotPackage Annotation package.
#' @param my.IDs Gene identifiers to be analyze. If NULL all provided by the lmFit object will be analyzed.
#' @param addGeneNames Vector of the gene names.
#' @param fileOfLinks Name of the links file.
#' @param thrLogFC Threshold for the logarithm Fold Change
#' @param cutoffMethod Method used to select the genes to be analyzed.
#' @param P.Value.cutoff Maximum value for the p.value.
#' @param pval Pvalue for the enrichment analysis test.
#' @param min.count Minimum number of categories for a result to be considered.
#' @param ontologias Ontologies to be used in the analysis.
#' @param testDirections To decide if the analysis has to be applied to genes upregulated, downregulated or both.
#' @param minNumGens Mimimum number of genes.
#' @importFrom limma topTable
#' @importFrom GOstats hyperGTest
#' @importFrom SortableHTMLTables sortable.html.table
#' @importFrom DBI dbGetQuery
#' @importFrom links2File addToLinksFile
#' @import GO.db
#' @import GOstats
#' @examples
#' \dontrun{
#' library(GOstats)
#' GOResult<-BasicP::GOAnalysis(fitMain = fitMain, whichContrasts = wCont,
#' comparison.Name = "Estudi", outputDir = outputDir, anotPackage = "org.Hs.eg",
#' my.IDs = entrezTable, addGeneNames = TRUE, fileOfLinks = linksFile, thrLogFC = 1,
#' cutoffMethod = "adjusted", P.Value.cutoff = rep(0.05, length(wCont)), pval = 0.01,
#' min.count = 3, ontologias = c("MF", "BP", "CC"), testDirections = c("over", "under"),
#' minNumGens = 0)
#' }
#' @export


GOAnalysis <- function(fitMain,
                       whichContrasts,
                       comparison.Name,
                       outputDir,
                       anotPackage,
                       my.IDs,
                       addGeneNames = TRUE,
                       fileOfLinks,
                       thrLogFC=NULL,
                       cutoffMethod = c("adjusted", "unadjusted"),
                       P.Value.cutoff = rep(0.05, length(whichContrasts)),
                       pval = 0.05,
                       min.count = 3,
                       ontologias = c("MF", "BP", "CC"),
                       testDirections = c("over", "under"),
                       minNumGens = 0) {
  categLabel <- "GO"

  if((!is.null(fitMain)) && (!is.null(fitMain$contrasts[, whichContrasts]))) {
    if(is.null(my.IDs)) {
      # stopifnot(require(old2db (anotPackage), character.only=T))                                              ### ModAlba
      stopifnot(requireNamespace(old2db(anotPackage)))                                                          ### ModAlba

      myenvirENTREZID <- eval(parse(text = paste0(anotPackage, "ENTREZID")))

      # geneUniverse <- unlist(mget(unique( fitMain$genes[,1]), env = myenvirENTREZID, ifnotfound = NA))        ### ModAlba
      geneUniverse <- unlist(mget(unique( fitMain$genes[, 1]), envir = myenvirENTREZID, ifnotfound = NA))       ### ModAlba
    } else {
      geneUniverse <- unlist(my.IDs)
      geneUniverse <- unique(geneUniverse[geneUniverse != "---"])
    }

    for(i in whichContrasts) {
      if(is.null(thrLogFC)) {
        thrLogFC <- 0
      } else {
        thrLogFC <- abs(thrLogFC)
      }

      # top.Diff <- topTable(fitMain, coef = i, n = nrow(fitMain$t), adjust = "fdr", lfc=thrLogFC)  # Seleccionem per FDR y mes                       ### ModAlba
      top.Diff <- topTable(fitMain, coef = i, number = nrow(fitMain$t), adjust.method = "fdr", lfc = thrLogFC)  # Seleccionem per FDR y mes           ### ModAlba


      if(cutoffMethod == "adjusted") {
        top.Diff.selected.up  <- top.Diff[(top.Diff$adj.P.Val < P.Value.cutoff[i]) & (top.Diff$t >0), ]
        top.Diff.selected.down <- top.Diff[(top.Diff$adj.P.Val < P.Value.cutoff[i]) & (top.Diff$t < 0), ]
      } else {
        top.Diff.selected.up  <- top.Diff[(top.Diff$P.Value < P.Value.cutoff[i]) & (top.Diff$t >0), ]
        top.Diff.selected.down <- top.Diff[(top.Diff$P.Value < P.Value.cutoff[i]) & (top.Diff$t < 0), ]
      }

      if(!is.null(top.Diff$ID)) {
        gNames.up <- top.Diff.selected.up$ID
        gNames.down <- top.Diff.selected.down$ID
      } else {
        gNames.up <- rownames(top.Diff.selected.up)
        gNames.down <- rownames(top.Diff.selected.down)
      }

      if(!(is.null(my.IDs))) {
        selectedEntrezIds.up <- unlist(my.IDs[gNames.up])
        selectedEntrezIds.up <- unique(selectedEntrezIds.up[selectedEntrezIds.up != "---"])

        selectedEntrezIds.down <- unlist(my.IDs[gNames.down])
        selectedEntrezIds.down <- unique(selectedEntrezIds.down[selectedEntrezIds.down != "---"])
      } else {
        # stopifnot(require(old2db (anotPackage), character.only=T))                                                  ### ModAlba
        stopifnot(requireNamespace(old2db(anotPackage)))                                                              ### ModAlba

        myenvirENTREZID <- eval(parse(text = paste0(anotPackage, "ENTREZID")))

        # selectedEntrezIds.up <- unlist(mget(unique(gNames.up), env = myenvirENTREZID, ifnotfound = NA))             ### ModAlba
        # selectedEntrezIds.down <- unlist(mget(unique(gNames.down), env = myenvirENTREZID, ifnotfound = NA))         ### ModAlba
        selectedEntrezIds.up <- unlist(mget(unique(gNames.up), envir = myenvirENTREZID, ifnotfound = NA))             ### ModAlba
        selectedEntrezIds.down <- unlist(mget(unique(gNames.down), envir = myenvirENTREZID, ifnotfound = NA))         ### ModAlba
      }

      contrast.Name <- colnames(fitMain$contrasts)[i]

      if(is.null(selectedEntrezIds.up) | (length(selectedEntrezIds.up) <= minNumGens)) {
        warning(paste("There are not enough genes for cutoff", P.Value.cutoff[i], "and t > 0 in comparison", contrast.Name))
        cat(paste("There are not enough genes for cutoff", P.Value.cutoff[i], "and t > 0 in comparison", contrast.Name, "\n"))
      } else {
        eaUpFName <- paste("SignificantGO", comparison.Name, contrast.Name, "Up", sep = ".")
        eA.up <- enrichment_Analysis(EntrezIDs = selectedEntrezIds.up,
                                     anotFName = eaUpFName,
                                     anotPackage = anotPackage,
                                     outputDir = outputDir,
                                     universe = geneUniverse,
                                     pval = pval,
                                     min.count = min.count,
                                     addGeneNames = addGeneNames,
                                     ontologias = ontologias,
                                     testDirections = testDirections,
                                     contrast.Name = paste("for up-regulated genes in comparison", contrast.Name, sep = ": "))

        addToLinksFile(fileOfLinks,
                       paste(eaUpFName, "html", sep = "."),
                       categ = categLabel,
                       desc = paste("Enrichment Analysis for up-regulated genes in comparison", contrast.Name, sep = ": "))


      }

      #      if (is.null(selectedEntrezIds.down))
      if(is.null(selectedEntrezIds.down) | (length(selectedEntrezIds.down) <= minNumGens)) {
        warning(paste("There are not enough genes for cutoff", P.Value.cutoff[i], "and t < 0 in comparison", contrast.Name))
        cat(paste("There are not enough genes for cutoff", P.Value.cutoff[i], "and t < 0 in comparison", contrast.Name, "\n"))
      } else {
        eaDownFName <- paste("SignificantGO", comparison.Name, contrast.Name, "Down", sep = ".")
        eA.down <- enrichment_Analysis(EntrezIDs = selectedEntrezIds.down,
                                       anotFName = eaDownFName,
                                       anotPackage = anotPackage,
                                       outputDir = outputDir,
                                       universe = geneUniverse,
                                       pval = pval,
                                       min.count = min.count,
                                       addGeneNames = addGeneNames,
                                       ontologias = ontologias,
                                       testDirections = testDirections,
                                       contrast.Name = paste("for down-regulated genes in comparison", contrast.Name, sep = ": "))

        addToLinksFile(fileOfLinks, paste(eaDownFName, "html", sep = "."),
                       categ = categLabel,
                       desc = paste("Enrichment Analysis for down-regulated genes in comparison", contrast.Name, sep = ": "))
      }
    }
  }

  return(NULL)
}
uebvhir/BasicP4microArrays documentation built on Nov. 5, 2019, 11:03 a.m.