R/gsea_functions.R

Defines functions makeCLS makeTXT4gsea makeGMT writeSet

Documented in makeCLS makeGMT makeTXT4gsea writeSet

#' Write gene counts for genes in specified KEGG pathway to an xlsx file.
#'
#' @param set The name of the KEGG gene set to be plotted
#' @param expr Gene expression matrix, with gene names as ENSEMBL ids
#' @param filename The name of the output xlsx file.
#'
#' @return Writes xlsx file of counts to working directory.
#'
#' @examples
#' writeSet("hsa04012 ErbB signaling pathway", expr, "ErbB_counts.xlsx")
#'
#' @export
writeSet <- function(set, expr, filename = NULL) {
  genes.symb <- mapIds(org.Hs.eg.db, kegg.sets.hs[[set]], keytype = "ENTREZID", column = "SYMBOL")
  genes.symb <- genes.symb[!is.na(genes.symb)]
  names(genes.symb) <- NULL
  genes.symb <- base::intersect(genes.symb, rownames(expr))
  set.expr <- expr[genes.symb, ]
  rownames(set.expr) <- genes.symb
  set.expr <- set.expr[rowSums(set.expr) > .5, ]
  if(is.null(filename)) {
    filename = paste(set, "_dmso_norm.xlsx", sep = "")
  }
  write.xlsx(set.expr, file = filename)
}

#' Create GMT file of gene sets
#'
#' @param setgenes A list of character vectors, each of which contains the genes in a single gene set
#' @param filename The name of the output file.
#' @param changenames Character vector with the source and destination nomenclatures (optional)
#'
#' @return GMT file, in which each row represents a gene set
#'
#' @examples
#' makeGMT(my.list, "mygenesets.gmt")
#'
#' @export
makeGMT <- function(setgenes, filename, changenames = NULL) {
  set.names <- names(setgenes)
  wr <- file(filename, "w")

  to.write <- mapply(FUN = function(x, y) {
    z <- c(x, "na", y)
    z <- paste(z, collapse = "\t")
  }, x = as.list(set.names), y = setgenes)

  lapply(to.write, writeLines, wr, "\n")
  close(wr)
}

#' Create GSEA-compatible gene expression TXT file
#'
#' @param expr Gene expression matrix, with gene names as rows and samples as columns
#' @param filename The name of the output file.
#'
#' @return TXT file compatibile with GSEA
#'
#' @examples
#' makeTXT4gsea(expr, "gsea_expression.txt")
#'
#' @export
makeTXT4gsea <- function(expr, filename) {
  wr <- file(filename, "w")
  firstline <- paste(c("NAME", colnames(expr)), collapse = "\t")
  writeLines(firstline, wr)
  write.table(expr, file = wr, quote = FALSE, sep = "\t", col.names = FALSE)
  close(wr)
}

#' Create GSEA-compatible CLS file describing expression set phenotypes
#'
#' @param colData column metadata
#' @param which index containing phenotypes
#' @param filename The name of the output file.
#'
#' @return CLS file describing phenotypes of gene expression set
#'
#' @examples
#' makeCLS(expr, "gsea_expression.txt")
#'
#' @export
makeCLS <- function(colData, which, filename) {
  wr <- file(filename, "w")
  firstline <- c(dim(colData)[1], length(unique(colData[, which])), 1)
  firstline <- paste(firstline, collapse = "\t")
  writeLines(firstline, wr)

  secondline <- c("#", unique(colData[, which]))
  secondline <- paste(secondline, collapse = "\t")
  writeLines(secondline, wr)

  thirdline <- paste(colData[, 2], collapse = "\t")
  writeLines(thirdline, wr)
  close(wr)
}
danielderrick/defunctions documentation built on Aug. 4, 2017, 6:23 p.m.