library(httr)
library(XML)
library(methods)
source("R/GeneAnnoClasses.R")
#' Create gene specific objects containing data from online resources
#'
#' \code{getGeneSummary} populates and returns a vector of \code{gene} objects
#' with information sourced from a series of html requests to the NIH and
#' Uniport publicly available databases.
#'
#' Information returned from a database requests is parsed into a \code{gene}
#' object, which are saved in a 'genes' subdirectory of the working directory.
#' Each \code{gene} object is added to a vector of objects, which is then
#' returned.
#' Where gene information has previously been downloaded and objects saved
#' (within the last seven days), \code{gene} objects are repopulated from the
#' saved files so as to minimise server traffic.
#' \emph{N.B.} the function includes a random wait (of up to 5s) between each
#' gene downloaded.
#'
#' \emph{N.B.} It is possible to define an alternative directory using
#' \code{slot(geneanno,"fileroot") <- "/path/to/directory"}
#'
#' @param x object of class \code{geneanno}.
#' @return vector of \code{gene} objects, each containing the collated data
#' from the public resources.
#' @importFrom methods setGeneric setMethod
#' @importFrom stats runif
#' @importFrom utils read.table write.table
#' @examples
# \dontrun{
#' query <- matrix(c("Axitinib","BRAF","Imatinib","BRAF"),ncol=2,byrow=TRUE)
#' g <- getUniqueGeneList(geneanno(),query)
#' gs <- getGeneSummary(g)
# }
#' @export
#'
setGeneric("getGeneSummary", function(x) {
standardGeneric("getGeneSummary")
})
#' @describeIn getGeneSummary Produces a vector of \code{gene} objects, each
#' containing the collated data from the public resources.
setMethod("getGeneSummary","geneanno",
function(x){
output = list()
count = 0
archivedList = vector()
archListOld <- vector()
noret <- vector()
fileroot <- ifelse(identical(x@fileroot,character(0)),getwd(),x@fileroot)
if (!dir.exists(file.path(fileroot,x@genefilestem))) {
dir.create(file.path(fileroot,x@genefilestem))
}
archFile <- file.path(fileroot,slot(x,"genefilestem"),"archived.txt")
archList <- list()
if (file.exists(archFile)) {
archList <- read.table(archFile)
}
toMatch <- c('^ENSG', '^ENSP', '^ENST')
match <- paste(toMatch,collapse="|")
for (gene in slot(x,"genelist")) {
filename = file.path(fileroot,x@genefilestem,
sprintf("%s.RData",gene))
count = count + 1
if (gene %in% unlist(archList)) {
cat(count, gene," is archived in Ensembl\n")
archListOld <- append(archListOld,gene)
} else if (file.exists(filename) && Sys.time() < (
file.info(filename)$mtime + 7*86400)) {
load(filename)
output[[gene]] <- g
cat(count, gene, "\n" )
} else{
f <- query()
if (grepl(match,gene)) {
r <- getEnsembleList(gene)
if (length(r$display_name) == 0) {
cat(count, gene," is archived in Ensembl\n")
archivedList <- append(archivedList,gene)
next()
} else {f@query <- r$display_name}
} else {
f@query <- gene
}
query <- sprintf("Homo+sapiens[organism]+AND+(%s[Gene%%2FProtein+Name]+OR+%s[Nucleotide%%2FProtein+Accession])+AND+alive[prop]",f@query,f@query)
f <- getNihQuery(f,"gene",query)
cat(count, gene, " step 1..." )
g <- getNihSummary(gene(),f)
if (length(slot(g,"name")) == 0) {
cat("nothing returned\n")
} else {
cat(" 2..." )
g <- getUniprotSummary(g,f)
cat("downloaded\n" )
}
output[[gene]] <- g
save(g,file = filename)
Sys.sleep(runif(1)*5)
}
}
if (length(archivedList) >= 1 || length(archListOld) >= 1) {
cat("\nThe following Ensembl Ids are archived:",archivedList, archListOld,"\n\n")
if (length(archivedList) >= 1) {
write.table(archivedList,archFile,append = TRUE,row.names=FALSE,col.names=FALSE)
}
}
return(output)
}
)
#' Create unique list of genes from input list
#'
#' \code{getUniqueGeneList} takes a 2-column matrix of group identifiers and
#' gene names, returning the seperated list of unique group identifiers and a
#' unique list of genes.
#'
#' @param x object of class geneanno.
#' @param inputlist vector of strings, being a mixed list of group numbers and
#' gene names
#' @return object of type geneanno; a copy of input object having the additional
#' list of group numbers and a list of genes from \code{uniquelist}
#' @importFrom methods setGeneric setMethod
#' @examples
#' data("genematrix")
#' ga <- getUniqueGeneList(geneanno(),mygenematrix)
#' @export
#'
setGeneric("getUniqueGeneList", function(x,inputlist) {
standardGeneric("getUniqueGeneList")
})
#' @describeIn getUniqueGeneList object of type geneanno; a copy of input object
#' having the additional list of group numbers and a unique list of genes from
#' \code{s}
setMethod("getUniqueGeneList",signature(x = "geneanno",inputlist = "character"),
function(x,inputlist){
cat("inputlist is a List")
q <- inputlist[grep("^[0-9]",inputlist)]
x@groupnos <- unique(q)
inputlist <- inputlist[grep("^[A-Za-z]",inputlist)]
inputlist <- gsub("^([a-zA-Z0-9]+)\\..*","\\1",inputlist,perl = TRUE)
x@genelist <- unique(inputlist)
return(x)
}
)
#' @describeIn getUniqueGeneList object of type geneanno; a copy of input object
#' having the additional list of group numbers and a unique list of genes from
#' \code{s}
setMethod("getUniqueGeneList",signature(x = "geneanno",inputlist = "matrix"),
function(x,inputlist){
#cat("inputlist is a Matrix")
x@groupnos <- unique(inputlist[,1])
s <- inputlist[,2]
s <- gsub("^([a-zA-Z0-9]+)\\..*","\\1",s,perl = TRUE)
x@genelist <- unique(s)
return(x)
}
)
#' Create unique list of group,gene combinations from input list
#'
#' \code{getGroupGeneList} takes a mixed list of group identifiers (numeric) and
#' gene names or a 2-column matrix of group identifiers and gene names,
#' returning the seperated list of group identifiers and the related lists of
#' genes.
#'
#' @param x object of class geneanno.
#' @param inputlist vector of strings, being a mixed list of group numbers and
#' gene names
#' @return object of type geneanno; a copy of input object having the additional
#' list of group numbers and a unique list of genes from \code{s}
#' @examples
#' data("genematrix")
#' ggl <- getGroupGeneList(geneanno(),mygenematrix)
#' @importFrom methods setGeneric setMethod
#' @export
#'
setGeneric("getGroupGeneList", function(x,inputlist) {
standardGeneric("getGroupGeneList")
})
#' @describeIn getGroupGeneList uses vector of characters of group ids and Gene
#' names for inputlist.
setMethod("getGroupGeneList",signature(x = "geneanno", inputlist = "character"),
function(x,inputlist){
output <- list()
group = 0
z <- 1
for (i in x@groupnos) {
output[[i]] <- list()
}
for (i in inputlist) {
if (grepl("^[A-Za-z]",i)) {
output[[group]] <- append(output[[group]],i)
} else if (grepl("^[0-9]+",i)) {
group <- i
}
}
for (i in x@groupnos) {
t <- output[[i]][!is.null(output[[i]])]
output[[i]] <- unique(t)
}
return(output)
}
)
#' @describeIn getGroupGeneList uses matrix of group ids and Gene names for
#' inputlist.
setMethod("getGroupGeneList",signature(x = "geneanno",inputlist = "matrix"),
function(x,inputlist){
output <- list()
group = 0
for (i in x@groupnos) {
output[[i]] <- list()
}
for (i in 1:nrow(inputlist)) {
group <- inputlist[i,1]
output[[group]] <- append(output[[group]],inputlist[i,2])
}
for (i in x@groupnos) {
t <- output[[i]][!is.null(output[[i]])]
output[[i]] <- unique(t)
}
return(output)
}
)
#' Parse files of gene names and group identifiers to create unique lists of
#' each
#'
#' \code{parseInputFile} takes a mixed file containing group identifiers
#' (numeric) and gene names, returning the list of group identifiers and genes
#' with the remaining columns removed.
#' The package was originally written to work from a file laid out thus:
#' group_id1
#' gene_name1
#' gene_name2
#' group_id2
#' gene_name1
#' gene_name3
#' The methods assume that both group identifiers and gene names are
#' alphanumeric; the group identifiers, where present, begining with a number
#' and gene names starting with a character.
#' please note, this populates the vector with only the alphanumric strings
#' beginning each line of the input file. Also, RNA genes (begining ENSG000)
#' are currently excluded.
#'
#' @param x object of class geneanno.
#' @param file character string providing the name of the input file
#' @return vector of character strings, as exemplified by the inputlist data object.
#' @importFrom methods setGeneric setMethod
#' @importFrom utils read.table
#' @examples
#' data("inputlist")
#' write(myinputlist,file="inputlist.txt")
#' x <- geneanno()
#' x <- parseInputFile(x,"inputlist.txt")
#' @export
#'
setGeneric("parseInputFile", function(x,file) {
standardGeneric("parseInputFile")
})
#' @describeIn parseInputFile vector of character strings
setMethod("parseInputFile","geneanno",
function(x,file){
fileroot <- ifelse(identical(x@fileroot,character(0)),getwd(),
x@fileroot)
file <- file.path(fileroot,file)
s <- read.table(file,header = FALSE)[1]
t <- gsub("^([a-zA-Z0-9]+)\\..*","\\1",s[,1],perl = TRUE)
t <- t[grep("^ENSG000",t,invert = TRUE)]
return(t)
}
)
#' Save Gene Information to group Specific Output Files
#'
#' \code{produceOutputFiles} saves the downloaded gene summary information into
#' group specific files.
#'
#' @param x object of class geneanno.
#' @param ggl a Vector of group specific vectors relating the list of genes to
#' the groups, as in the input file
#' @param gs Vector of Gene Summary information downloaded from NIH and Uniprot
#' databases, typically output by getGeneSummary
#' @param pub Matrix containing journal article information from the PubMed
#' resource, as retruned by the \code{searchPublications} function
#' @return This function has no return value as outputs directly to file.
#' @importFrom methods setGeneric setMethod
#' @importFrom utils read.table
#' @examples
#' data("genematrix")
#' data("genesummary")
#' ggl <- getGroupGeneList(mygeneanno,mygenematrix)
#' data("publications")
#' produceOutputFiles(mygeneanno,ggl,mygenesummaries,mypublications)
#' @export
#'
setGeneric("produceOutputFiles", function(x,ggl,gs,pub) {
standardGeneric("produceOutputFiles")
})
#' @describeIn produceOutputFiles Save object data to text files.
setMethod("produceOutputFiles",signature(x = "geneanno",ggl = "vector", gs = "vector",pub = "missing"),
function(x,ggl,gs){
fileroot <- ifelse(identical(x@fileroot,character(0)),getwd(),x@fileroot)
if (!dir.exists(file.path(fileroot,x@outputstem))) {
dir.create(file.path(fileroot,x@outputstem))
}
for (d in x@groupnos) {
filename = file.path(fileroot,x@outputstem,sprintf("%s.txt",d))
if (file.exists(filename)) {file.remove(filename)}
for (i in ggl[[d]]) {
if (!is.null(i)) {
write(i,file = filename, append = TRUE)
for (j in slotNames(gs[[i]])) {
for (f in slot(gs[[i]],j)) {
if (!is.na(f) && !is.null(f) && f != "") {
write(sprintf("\t%s: %s",toupper(j),f),
file = filename,append = TRUE)
}
}
}
write('\n',file = filename,append = TRUE)
}
}
}
}
)
#' @describeIn produceOutputFiles Save object data, including journal articles,
#' to text files.
setMethod("produceOutputFiles",signature(x = "geneanno",ggl = "vector", gs = "vector",pub = "vector"),
function(x,ggl,gs,pub) {
fileroot <- ifelse(identical(x@fileroot,character(0)),getwd(),x@fileroot)
if (!dir.exists(file.path(fileroot,x@outputstem))) {
dir.create(file.path(fileroot,x@outputstem))
}
for (d in slot(x,"groupnos")) {
filename = file.path(fileroot,x@outputstem,sprintf("%s.txt",d))
if (file.exists(filename)) {file.remove(filename)}
for (i in ggl[[d]]) {
if (!is.null(i)) {
write(i,file = filename,append = TRUE)
for (j in slotNames(gs[[i]])) {
for (f in slot(gs[[i]],j)) {
if (!is.na(f) && !is.null(f) && f != "") {
write(sprintf("\t%s: %s",toupper(j),f), file = filename,append = TRUE)
}
}
}
for (k in pub[which(pub == paste(c(d,i),collapse = " ") ),2] ) {
if (length(k) > 0) {
write("\n\tJOURNAL ARTICLES:", file = filename,append = TRUE)
for (l in k) {
vol <- ifelse((!is.na(l@Issue) && !is.null(l@Issue) && l@Issue != ""),sprintf("%s(%s)",l@Volume,l@Issue),l@Volume)
write(sprintf("\t%s\n\t%s (%s). %s. %s; %s.\n\tPMID:%s. DOI:%s\n",l@Title,l@Authors,strsplit(l@Date," ")[[1]][1],l@Journal,vol,l@Pages,l@Id,l@DOI), file = filename,append = TRUE)
}
}
}
write('\n',file = filename,append = TRUE)
}
}
}
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.