#' Create a clusterProfiler compatible enrichResult data structure from a goseq result.
#'
#' The metrics and visualization methods in clusterProfiler are the
#' best. It is not always trivial to get non-model organisms working
#' well with clusterProfiler. Therefore I still like using tools like
#' topgo/goseq/gostats/gprofiler. This function and its companions
#' seek to make them cross-compatible. Ideally, they will lead me to
#' being able to rip out a lot of superfluous material.
#'
#' @param retlist Result from simple_goseq().
#' @param ontology Ontology sub-tree of interest.
#' @param cutoff (adjusted)p cutoff.
#' @param cutoff_column Choose a column of p-values.
#' @param organism Currently unused.
#' @param padjust_method Define the desired p.adjust method.
#' @return enrichResult object ready to pass to things like dotplot.
#' @export
goseq2enrich <- function(retlist, ontology = "MF", cutoff = 1,
cutoff_column = "over_represented_pvalue",
organism = NULL, padjust_method = "BH") {
godf <- retlist[["go_db"]]
sig_genes <- rownames(retlist[["input"]])
interesting_name <- paste0(tolower(ontology), "_interesting")
interesting <- retlist[[interesting_name]]
if (is.null(interesting)) {
return(NULL)
}
if (nrow(interesting) == 0) {
return(NULL)
}
## I would like to write this data as an enrichResult as per
## DOSE/clusterProfiler so that I may use their plotting functions
## without fighting. Therefore I will coerce the various results I
## create into that data structure's format.
## The enrichResult is a class created with the following code:
bg_genes <- sum(!duplicated(sort(godf[["ID"]])))
adjusted <- p.adjust(interesting[["over_represented_pvalue"]], method = padjust_method)
interesting[["adjusted"]] <- adjusted
interesting[["tmp"]] <- bg_genes
interesting_cutoff_idx <- interesting[[cutoff_column]] <= cutoff
interesting_cutoff <- interesting[interesting_cutoff_idx, ]
genes_per_category <- gather_ontology_genes(
retlist, ontology = ontology, column = "over_represented_pvalue",
pval = cutoff, include_all = FALSE)
category_genes <- gsub(pattern = ", ", replacement = "/", x = genes_per_category[["sig"]])
## FIXME: This is _definitely_ wrong for BgRatio
representation_df <- data.frame(
"ID" = rownames(interesting_cutoff),
"Description" = interesting_cutoff[["term"]],
## The following two lines are ridiculous, but required for the enrichplots to work.
"GeneRatio" = paste0(interesting_cutoff[["numDEInCat"]], "/", interesting_cutoff[["numInCat"]]),
"BgRatio" = paste0(interesting_cutoff[["numDEInCat"]], "/", interesting_cutoff[["tmp"]]),
"pvalue" = interesting_cutoff[["over_represented_pvalue"]],
"p.adjust" = interesting_cutoff[["adjusted"]],
"qvalue" = interesting_cutoff[["qvalue"]],
"geneID" = category_genes,
"Count" = interesting_cutoff[["numDEInCat"]],
stringsAsFactors = FALSE)
rownames(representation_df) <- representation_df[["ID"]]
if (is.null(organism)) {
organism <- "UNKNOWN"
}
ret <- new("enrichResult",
result = representation_df,
pvalueCutoff = cutoff,
pAdjustMethod = padjust_method,
qvalueCutoff = cutoff,
gene = sig_genes,
universe = godf[["ID"]],
## universe = extID,
geneSets = list(up=sig_genes),
## geneSets = geneSets,
organism = organism,
keytype = "UNKNOWN",
ontology = ontology,
readable = FALSE)
return(ret)
}
#' Create a clusterProfiler compatible enrichResult data structure from a gostats result.
#'
#' The metrics and visualization methods in clusterProfiler are the
#' best. It is not always trivial to get non-model organisms working
#' well with clusterProfiler. Therefore I still like using tools like
#' topgo/goseq/gostats/gprofiler. This function and its companions
#' seek to make them cross-compatible. Ideally, they will lead me to
#' being able to rip out a lot of superfluous material.
#'
#' @param retlist Result from simple_gostats().
#' @param ontology Ontology sub-tree of interest.
#' @param cutoff (adjusted)p cutoff.
#' @param cutoff_column Choose a column of p-values.
#' @param organism Currently unused.
#' @param padjust_method Define the desired p.adjust method.
#' @return enrichResult object ready to pass to things like dotplot.
#' @export
gostats2enrich <- function(retlist, ontology = "MF", cutoff = 0.1,
cutoff_column = "qvalue",
organism = NULL, padjust_method = "BH") {
godf <- retlist[["go_db"]]
sig_genes <- rownames(retlist[["input"]])
interesting_name <- glue("{tolower(ontology)}_subset")
interesting <- retlist[["tables"]][[interesting_name]]
## I think having both an adjusted and qvalue is redundant?
interesting[["adjusted"]] <- p.adjust(interesting[["Pvalue"]], method = padjust_method)
if (is.null(interesting)) {
return(NULL)
}
if (nrow(interesting) == 0) {
return(NULL)
}
## I would like to write this data as an enrichResult as per
## DOSE/clusterProfiler so that I may use their plotting functions
## without fighting. Therefore I will coerce the various results I
## create into that data structure's format.
## The enrichResult is a class created with the following code:
bg_genes <- sum(!duplicated(sort(godf[["ID"]])))
interesting[["tmp"]] <- bg_genes
interesting_cutoff_idx <- interesting[[cutoff_column]] <= cutoff
message("Given the cutoff of ", cutoff, ", ", sum(interesting_cutoff_idx),
" categories appear interesting.")
interesting_cutoff <- interesting[interesting_cutoff_idx, ]
message("Gathering genes/category, this may be slow.")
genes_per_category <- gather_ontology_genes(
retlist, ontology = ontology, column = "Pvalue",
pval = cutoff)
category_genes <- gsub(pattern = ", ", replacement = "/", x = genes_per_category[["sig"]])
interesting_cutoff[["term_nohtml"]] <- gsub(x = interesting_cutoff[["Term"]], pattern = "^<a href.*>(.*)</a>", replacement = "\\1")
## FIXME: This is _definitely_ wrong for BgRatio
representation_df <- data.frame(
"ID" = interesting_cutoff[[1]],
"Description" = interesting_cutoff[["term_nohtml"]],
## The following two lines are ridiculous, but required for the enrichplots to work.
"GeneRatio" = paste0(interesting_cutoff[["Count"]], "/", interesting_cutoff[["Size"]]),
"BgRatio" = paste0(interesting_cutoff[["Count"]], "/", interesting_cutoff[["tmp"]]),
"pvalue" = interesting_cutoff[["Pvalue"]],
"p.adjust" = interesting_cutoff[["adjusted"]],
"qvalue" = interesting_cutoff[["qvalue"]],
## "geneID" = category_genes,
"Count" = interesting_cutoff[["Count"]],
stringsAsFactors = FALSE)
rownames(representation_df) <- representation_df[["ID"]]
if (is.null(organism)) {
organism <- "UNKNOWN"
}
ret <- new("enrichResult",
result = representation_df,
pvalueCutoff = cutoff,
pAdjustMethod = padjust_method,
qvalueCutoff = cutoff,
gene = sig_genes,
universe = godf[["ID"]],
## universe = extID,
geneSets = list(up=sig_genes),
## geneSets = geneSets,
organism = organism,
keytype = "UNKNOWN",
ontology = ontology,
readable = FALSE)
return(ret)
}
#' Recast gProfiler data to the output class produced by clusterProfiler.
#'
#' I would like to use the various clusterProfiler plots more easily.
#' Therefore I figured it would be advantageous to coerce the various
#' outputs from gprofiler and friends into the datastructure produced by
#' clusterProfiler.
#'
#' @param retlist Output from simple_gprofiler()
#' @param ontology Category type to extract, currently only GO?
#' @param cutoff Use a p-value cutoff to get only the significant
#' categories?
#' @param organism Set the orgdb organism name?
#' @param padjust_method what it says on the tin.
#' @return enrichResult object ready to pass to things like dotplot.
gprofiler2enrich <- function(retlist, ontology = "MF", cutoff = 1,
organism = NULL, padjust_method = "BH") {
godf <- retlist[["go_db"]]
interesting_name <- paste0(tolower(ontology), "_interesting")
interesting <- retlist[[interesting_name]]
if (is.null(interesting)) {
return(NULL)
}
## I would like to write this data as an enrichResult as per
## DOSE/clusterProfiler so that I may use their plotting functions
## without fighting. Therefore I will coerce the various results I
## create into that data structure's format.
## The enrichResult is a class created with the following code:
bg_genes <- sum(!duplicated(sort(godf[["ID"]])))
adjusted <- p.adjust(interesting[["over_represented_pvalue"]])
genes_per_category <- gather_ontology_genes(retlist, ontology = ontology,
column = "over_represented_pvalue",
pval = adjusted)
category_genes <- gsub(pattern=", ", replacement="/", x=genes_per_category[["sig"]])
interesting[["tmp"]] <- bg_genes
## The following line may in fact be incorrect, I need to look at
## the constructure for the enrichResult again.
sig_genes <- rownames(interesting)
## FIXME: This is _definitely_ wrong for BgRatio
representation_df <- data.frame(
"ID" = sig_genes,
"Description" = interesting[["term"]],
## The following two lines are ridiculous, but required for the enrichplots to work.
"GeneRatio" = paste0(interesting[["numDEInCat"]], "/", interesting[["numInCat"]]),
"BgRatio" = paste0(interesting[["numDEInCat"]], "/", interesting[["tmp"]]),
"pvalue" = interesting[["over_represented_pvalue"]],
"p.adjust" = adjusted,
"qvalue" = interesting[["qvalue"]],
"geneID" = category_genes,
"Count" = interesting[["numDEInCat"]],
stringsAsFactors = FALSE)
rownames(representation_df) <- representation_df[["ID"]]
if (is.null(organism)) {
organism <- "UNKNOWN"
}
ret <- new("enrichResult",
result = representation_df,
pvalueCutoff = cutoff,
pAdjustMethod = padjust_method,
qvalueCutoff = cutoff,
gene = sig_genes,
universe = godf[["ID"]],
## universe = extID,
geneSets = list(up=sig_genes),
## geneSets = geneSets,
organism = organism,
keytype = "UNKNOWN",
ontology = ontology,
readable = FALSE)
return(ret)
}
#' Convert a simple_topgo() result to an enrichResult.
#'
#' Same idea as goseq2enrich.
#'
#' @param retlist result from simple_topgo()
#' @param ontology Ontology subtree to act upon.
#' @param pval Cutoff, hmm I think I need to standardize these.
#' @param organism org name/data.
#' @param column Table column to export.
#' @param padjust_method Use this method for the pvalues for the enrich result.
#' @return enrichResult object ready to pass to things like dotplot.
topgo2enrich <- function(retlist, ontology = "mf", pval = 0.05, organism = NULL,
column = "fisher", padjust_method = "BH") {
result_name <- paste0(column, "_", tolower(ontology))
if (column == "el") {
column <- "EL"
}
if (column == "ks") {
column <- "KS"
}
interesting_name <- paste0(tolower(ontology), "_interesting")
godata <- retlist[["godata"]][[result_name]]
result_data <- retlist[["results"]][[result_name]]
interesting <- retlist[["tables"]][[interesting_name]]
bg_genes <- godata@allGenes
scores <- interesting[[column]]
adjusted <- p.adjust(scores)
sig_genes <- rownames(retlist[["input"]])
genes_per_category <- gather_ontology_genes(retlist, ontology = ontology, pval = pval,
column = column)
## One of the biggest oddities of enrichResult objects: the scores
## are explicitly ratio _string_, thus 0.05 is '5/100'.
category_genes <- gsub(pattern=", ", replacement="/", x=genes_per_category[["sig"]])
names(category_genes) <- rownames(genes_per_category)
keepers <- names(category_genes) %in% rownames(interesting)
category_genes_kept <- category_genes[keepers]
interesting[["gene_ids"]] <- as.character(category_genes_kept)
interesting[["tmp"]] <- length(bg_genes)
## FIXME: This is _definitely_ wrong for BgRatio
representation_df <- data.frame(
"ID" = rownames(interesting),
"Description" = interesting[["Term"]],
## The following two lines are ridiculous, but required for the enrichplots to work.
"GeneRatio" = paste0(interesting[["Significant"]], "/", interesting[["Annotated"]]),
"BgRatio" = paste0(interesting[["Significant"]], "/", interesting[["tmp"]]),
"pvalue" = interesting[[column]],
"p.adjust" = adjusted,
"qvalue" = interesting[["qvalue"]],
"geneID" = interesting[["gene_ids"]],
"Count" = interesting[["Significant"]],
stringsAsFactors = FALSE)
rownames(representation_df) <- representation_df[["ID"]]
if (is.null(organism)) {
organism <- "UNKNOWN"
}
ret <- new("enrichResult",
result = representation_df,
pvalueCutoff = pval,
pAdjustMethod = padjust_method,
qvalueCutoff = pval,
gene = sig_genes,
universe = godata@graph@nodes,
## universe = extID,
geneSets = list(up=sig_genes),
## geneSets = geneSets,
organism = organism,
keytype = "UNKNOWN",
ontology = ontology,
readable = FALSE)
return(ret)
}
#' Perform a simple_ontology() on some random data.
#'
#' At the very least, the result should be less significant than the actual data!
#'
#' @param input Some input data
#' @param method goseq, clusterp, topgo, gostats, gprofiler.
#' @param n how many 'genes' to analyse?
#' @param ... Arguments passed to the method.
#' @return An ontology result
#' @seealso [simple_goseq()] [simple_clusterprofiler()] [simple_topgo()] [simple_gostats()]
#' @export
random_ontology <- function(input, method = "goseq", n = 200, ...) {
## Lets assume the result of *_pairwise() or combine_de_tables()
input_table <- NULL
if (class(input) == "expt" || class(input) == "ExpressionSet") {
input_table <- data.frame(row.names = rownames(exprs(input)))
input_table[["ID"]] <- rownames(input_table)
input_table[["DE"]] <- 1
} else if (!is.null(input[["data"]])) {
## Then it is from combine_de_tables
} else if (!is.null()) {
## Then it came from *_pairwise
} else {
stop("Not sure what to do with this input.")
}
input_idx <- sample(x = nrow(input_table), size = n)
input_table <- input_table[input_idx, ]
random_result <- NULL
switchret <- switch(
method,
"goseq" = {
random_result <- simple_goseq(input_table, ...)
},
"clusterp" = {
random_result <- simple_clusterprofiler(input_table, ...)
},
"topgo" = {
random_result <- simple_topgo(input_table, ...)
},
"gostats" = {
random_result <- simple_gostats(input_table, ...)
},
"gprofiler" = {
random_result <- simple_gprofiler(input_table, ...)
},
{
message("Not sure what to do with this method.")
random_result <- NULL
})
return(random_result)
}
#' Take gene/exon lengths from a suitable data source (gff/TxDb/OrganismDbi)
#'
#' Primarily goseq, but also other tools on occasion require a set of gene IDs
#' and lengths. This function is resposible for pulling that data from either a
#' gff, or TxDb/OrganismDbi.
#'
#' @param db Object containing data, if it is a string then a filename is
#' assumed to a gff file.
#' @param gene_list Set of genes to query.
#' @param type Function name used for extracting data from TxDb objects.
#' @param id Column from the resulting data structure to extract gene IDs.
#' @param possible_types Character list of types I have previously used.
#' @param ... More arguments are passed to arglist.
#' @return Dataframe containing 2 columns: ID, length
#' @seealso [GenomicFeatures]
#' @export
extract_lengths <- function(db = NULL, gene_list = NULL,
type = "GenomicFeatures::transcripts", id = "TXID",
possible_types = c("GenomicFeatures::genes",
"GenomicFeatures::cds",
"GenomicFeatures::transcripts"), ...) {
arglist <- list(...)
## The 3 ids correspond to the columns produced by genes/cds/transcripts
## respectively which contain the IDs. If one is overwritten, the other
## should be, too.
## Here is the fundamental problem with extracting lengths from these
## databases: If we have 1 gene ID, what do we associate the length to? A
## single transcript, the entire gene's length before transcription?, the
## length before splicing? All of these are possible given the databases of
## gene lengths we have available and it is not always trivial/possible to
## tell which is correct because we cannot be certain which ID type has been
## provided. Therefore, this function will query every type and try to find
## the one with the best overlap against the set of gene IDs provided.
tmpdb <- db
metadf <- NULL
gene_list <- gene_list[complete.cases(gene_list)]
chosen_column <- NULL
hits_list <- list()
column_list <- list()
chosen_type <- NULL
most_hits <- 0
## Translating to ENTREZIDs sometimes introduces NAs which messes up the
## following operations. Possible_types should be a listing of the various
## methods we have of acquiring gene lengths. The code in the for loop
## should therefore invoke each of these in turn and figure out which
## provides the best overlap and use that.
for (c in seq_along(possible_types)) {
testing <- NULL
ty <- possible_types[c]
## make a granges/iranges using the function in possible_types.
test_string <- glue("testing <- {ty}(tmpdb)")
eval(parse(text = test_string))
## as.data.frame is not only base, but also biocgenerics!!!
## Make a dataframe out of the information above and find the most
## likely appropriate ID column
test_meta <- BiocGenerics::as.data.frame(testing)
possible_columns <- colnames(test_meta)
## It turns out this is pretty much always the last column.
chosen_column <- possible_columns[length(possible_columns)]
## Find the overlap, and if this is largest than our current best overlap...
overlap <- gene_list %in% test_meta[[chosen_column]]
hits_list[[ty]] <- testing
column_list[[ty]] <- chosen_column
message("Testing ", ty, " with column ", chosen_column, " an overlap of ",
sum(overlap), " was observed out of ", length(gene_list), " genes.")
## Note it as the best type so far.
if (sum(overlap) > most_hits) {
chosen_type <- ty
most_hits <- sum(overlap)
}
}
message("Actually using type ", chosen_type,
" consider one of the above if that is not good enough.")
## Now we have a list of all the lengths by function used to acquire them.
testing <- hits_list[[chosen_type]]
## We have a second list of the columns containing the appropriate IDs.
chosen_column <- column_list[[chosen_type]]
## So, bring them together here.
meta <- BiocGenerics::as.data.frame(testing)
if (!is.null(meta[["width"]])) {
metadf <- as.data.frame(meta)[, c(chosen_column, "width")]
} else if (!is.null(test_meta[["length"]])) {
metadf <- as.data.frame(meta)[, c(chosen_column, "length")]
} else {
stop("This requires either length or width columns.")
}
colnames(metadf) <- c("ID", "length")
rownames(metadf) <- metadf[["ID"]]
return(metadf)
}
#' Extract a set of geneID to GOID mappings from a suitable data source.
#'
#' Like extract_lengths above, this is primarily intended to read gene ID and GO
#' ID mappings from a OrgDb/OrganismDbi object.
#'
#' @param db Data source containing mapping information.
#' @param metadf Data frame containing extant information.
#' @param keytype Keytype used for querying
#' @return Dataframe of 2 columns: geneID and goID.
#' @seealso [AnnotationDbi]
#' @export
extract_go <- function(db, metadf = NULL, keytype = "ENTREZID") {
keytype <- toupper(keytype)
possible_keytypes <- AnnotationDbi::keytypes(db)
godf <- data.frame()
success <- FALSE
ids <- AnnotationDbi::keys(x = db, keytype = keytype)
if ("GOID" %in% possible_keytypes) {
godf <- sm(AnnotationDbi::select(x = db, keys = ids, keytype = keytype,
columns = c("GOID")))
godf[["ID"]] <- godf[[1]]
godf <- godf[, c("ID", "GOID")]
colnames(godf) <- c("ID", "GO")
} else if ("GO" %in% possible_keytypes) {
godf <- sm(AnnotationDbi::select(x = db, keys = ids, keytype = keytype,
columns = c("GO")))
godf[["ID"]] <- godf[[1]]
godf <- godf[, c("ID", "GO")]
colnames(godf) <- c("ID", "GO")
} else if ("GOALL" %in% possible_keytypes) {
godf <- sm(AnnotationDbi::select(x = db, keys = ids, keytype = keytype,
columns = c("GOALL")))
godf[["ID"]] <- godf[[1]]
godf <- godf[, c("ID", "GO")]
colnames(godf) <- c("ID", "GO")
}
return(godf)
}
#' Extract more easily readable information from a GOTERM datum.
#'
#' The output from the GOTERM/GO.db functions is inconsistent, to put it
#' nicely. This attempts to extract from that heterogeneous datatype something
#' easily readable. Example: Synonym() might return any of the following:
#' NA, NULL, "NA", "NULL", c("NA",NA,"GO:00001"), "GO:00002", c("Some text",NA,
#' NULL, "GO:00003") This function will boil that down to 'not found', '',
#' 'GO:00004', or "GO:0001, some text, GO:00004"
#'
#' @param value Result of try(as.character(somefunction(GOTERM[id])), silent = TRUE).
#' somefunction would be 'Synonym' 'Secondary' 'Ontology', etc...
#' @return something more sane (hopefully).
#' @seealso [GO.db]
#' @examples
#' \dontrun{
#' ## goterms = GOTERM[ids]
#' ## sane_goterms = deparse_go_value(goterms)
#' }
#' @export
deparse_go_value <- function(value) {
result <- ""
if (class(value) == "try-error") {
result <- "Not found"
} else {
## Not an error
if (is.null(value)) {
result <- ""
} else if (is.na(value)) {
result <- ""
} else if (value == "NULL") {
result <- ""
} else if (grepl("^c\\(", as.character(value))) {
value <- eval(parse(text = as.character(value)))
if (class(value) == "logical") {
result <- ""
} else {
value <- as.character(value[which(complete.cases(value))])
result <- value
}
} else {
## Just a string "GO:00023409"
result <- value
}
}
return(result)
}
#' Get a go term from ID.
#'
#' @param go GO id or a list thereof, this may be a character or list(assuming
#' the elements, not names, are goids).
#' @return Some text containing the terms associated with GO id(s).
#' @seealso [AnnotationDbi] [GO.db]
#' @examples
#' \dontrun{
#' goterm("GO:0032559")
#' ## > GO:0032559
#' ## > "adenyl ribonucleotide binding"
#' }
#' @export
goterm <- function(go = "GO:0032559") {
go <- as.character(go)
term <- function(id) {
value <- try(as.character(
AnnotationDbi::Term(GO.db::GOTERM[id])), silent = TRUE)
if (class(value) == "try-error") {
value <- "not found"
}
return(value)
}
go <- mapply(term, go)
return(go)
}
#' Get a go synonym from an ID.
#'
#' I think I will need to do similar parsing of the output for this function as
#' per gosec() In some cases this also returns stuff like c("some text",
#' "GO:someID") versus "some other text" versus NULL versus NA.
#' This function just goes a mapply(gosn, go).
#'
#' @param go GO id, this may be a character or list(assuming the elements are goids).
#' @return Some text providing the synonyms for the given id(s).
#' @seealso [AnnotationDbi] [GO.db]
#' @examples
#' \dontrun{
#' text = gosyn("GO:0000001")
#' text
#' ## > GO:000001
#' ## > "mitochondrial inheritance"
#' }
#' @export
gosyn <- function(go = "GO:0000001") {
go <- as.character(go)
gosn <- function(go) {
go <- as.character(go)
result <- ""
value <- try(as.character(
AnnotationDbi::Synonym(GO.db::GOTERM[go])), silent = TRUE)
result <- paste(deparse_go_value(value), collapse = "; ")
return(result)
}
go <- mapply(gosn, go)
return(go)
}
#' Get a GO secondary ID from an id.
#'
#' Unfortunately, GOTERM's returns for secondary IDs are not consistent, so this
#' function has to have a whole bunch of logic to handle the various outputs.
#'
#' @param go GO ID, this may be a character or list(assuming the elements, not
#' names, are goids).
#' @return Some text comprising the secondary GO id(s).
#' @seealso [AnnotationDbi] [GO.db]
#' @examples
#' \dontrun{
#' gosec("GO:0032432")
#' ## > GO:0032432
#' ## > "GO:0000141" "GO:0030482"
#' }
#' @export
gosec <- function(go = "GO:0032432") {
gosc <- function(go) {
go <- as.character(go)
result <- ""
value <- try(as.character(
AnnotationDbi::Secondary(GO.db::GOTERM[go])), silent = TRUE)
result <- deparse_go_value(value)
return(result)
}
go <- mapply(gosc, go)
return(go)
}
#' Get a go long-form definition from an id.
#'
#' Sometimes it is nice to be able to read the full definition of some GO terms.
#'
#' @param go GO ID, this may be a character or list (assuming the elements are
#' goids).
#' @return Some text providing the long definition of each provided GO id.
#' @seealso [AnnotationDbi] [GO.db]
#' @examples
#' \dontrun{
#' godef("GO:0032432")
#' ## > GO:0032432
#' ## > "An assembly of actin filaments that are on the same axis but may be
#' ## > same or opposite polarities and may be packed with different levels of tightness."
#' }
#' @export
godef <- function(go = "GO:0032432") {
go <- as.character(go)
def <- function(id) {
## This call to AnnotationDbi might be wrong
value <- try(as.character(
AnnotationDbi::Definition(GO.db::GOTERM[id])), silent = TRUE)
if (class(value) == "try-error") {
value <- "not found"
}
return(value)
}
go <- mapply(def, go)
return(go)
}
#' Get a go ontology name from an ID.
#'
#' @param go GO id, this may be a character or list (assuming the elements are goids).
#' @return The set of ontology IDs associated with the GO ids, thus 'MF' or 'BP' or 'CC'.
#' @seealso [AnnotationDbi] [GO.db]
#' @examples
#' \dontrun{
#' goont(c("GO:0032432", "GO:0032433"))
#' ## > GO:0032432 GO:0032433
#' ## > "CC" "CC"
#' }
#' @export
goont <- function(go = c("GO:0032432", "GO:0032433")) {
go <- as.character(go)
ont <- function(id) {
value <- try(as.character(AnnotationDbi::Ontology(GO.db::GOTERM[id])),
silent = TRUE)
if (class(value) == "try-error") {
value <- "not found"
}
return(value)
}
go <- mapply(ont, go)
return(go)
}
#' Get a go level approximation from an ID.
#'
#' Sometimes it is useful to know how far up/down the ontology tree a given id
#' resides. This attmepts to answer that question.
#'
#' @param go GO id, this may be a character or list (assuming the elements are goids).
#' @return Set of numbers corresponding to approximate tree positions of the GO ids.
#' @seealso [AnnotationDbi] [GO.db]
#' @examples
#' \dontrun{
#' golev("GO:0032559")
#' ## > 3
#' }
#' @export
golev <- function(go) {
go <- as.character(go)
level <- 0
requireNamespace("GO.db")
while (class(try(as.character(AnnotationDbi::Ontology(GO.db::GOTERM[go])),
silent = TRUE)) != "try-error") {
term <- GO.db::GOTERM[go]
test <- as.character(AnnotationDbi::Ontology(term))
if (class(test) == "try-error") {
next
}
if (test == "MF") {
## I am not certain if GO.db:: will work for this
ancestors <- GO.db::GOMFANCESTOR[go]
} else if (test == "BP") {
ancestors <- GO.db::GOBPANCESTOR[go]
} else if (test == "CC") {
ancestors <- GO.db::GOCCANCESTOR[go]
} else {
## There was an error
message("There was an error getting the ontology: ", as.character(go))
ancestors <- "error"
}
go <- as.list(ancestors)[[1]][1]
level <- level + 1
if (go == "all") {
return(level)
}
} ## End for
return(level)
}
#' Get a go level approximation from a set of IDs.
#'
#' This just wraps golev() in mapply.
#
#' @param go Character list of IDs.
#' @return Set pf approximate levels within the onlogy.
#' @seealso [golev()]
#' @examples
#' \dontrun{
#' golevel(c("GO:0032559", "GO:0000001")
#' ## > 3 4
#' }
#' @export
golevel <- function(go = c("GO:0032559", "GO:0000001")) {
mapply(golev, go)
}
#' Test GO ids to see if they are useful.
#'
#' This just wraps gotst in mapply.
#'
#' @param go go IDs as characters.
#' @return Some text
#' @seealso [GO.db]
#' @examples
#' \dontrun{
#' gotest("GO:0032559")
#' ## > 1
#' gotest("GO:0923429034823904")
#' ## > 0
#' }
#' @export
gotest <- function(go) {
gotst <- function(go) {
go <- as.character(go)
value <- try(GO.db::GOTERM[go], silent = TRUE)
if (class(value) == "try-error") {
return(0)
}
if (is.null(value)) {
return(0)
} else {
return(1)
}
}
mapply(gotst, go)
}
#' Use the orgdb instances from clusterProfiler to gather annotation data for GO.
#'
#' Since clusterprofiler no longer builds gomaps, I need to start understanding
#' how to properly get information from orgDBs.
#'
#' @param goseq_data Some data from goseq and friends.
#' @param orgdb_go The orgDb instance with GO data.
#' @param orgdb_ensembl The orgDb instance with ensembl data.
#' @return GO mapping
#' @seealso [goseq]
gather_genes_orgdb <- function(goseq_data, orgdb_go, orgdb_ensembl) {
## all_ontologies <- mappedkeys(orgdb)
## all_mappings <- as.list(orgdb[all_ontologies])
my_table <- goseq_data[["godata_interesting"]]
my_ontologies <- my_table[["category"]]
my_genes <- goseq_data[["input"]][["ensembl_gene"]]
my_table[["entrez_ids"]] <- ""
my_table[["ensembl_ids"]] <- ""
my_table[["all_ensembl_in_ontology"]] <- ""
for (count in seq_len(nrow(my_table))) {
ont <- my_table[count, "category"]
test_map <- try(as.list(orgdb_go[ont]), silent = TRUE)
if (class(test_map) == "list") {
## If it returned as a list, then we should have 1:n entrez gene IDs
## Sadly, we do pretty much everything as ensembl, or perhaps geneDb
entrez_geneids <- unlist(test_map)
my_table[count, "entrez_ids"] <- toString(entrez_geneids)
ensembl_geneids <- as.character(orgdb_ensembl[entrez_geneids])
my_ensembl_geneids <- my_genes[my_genes %in% ensembl_geneids]
my_table[count, "ensembl_ids"] <- toString(my_ensembl_geneids)
my_table[count, "all_ensembl_in_ontology"] <- toString(ensembl_geneids)
}
}
return(my_table)
}
#' Perform ontology searches given the results of a differential expression analysis.
#'
#' This takes a set of differential expression results, extracts a subset of
#' up/down expressed genes; passes them to goseq, clusterProfiler, topGO,
#' GOstats, and gProfiler; collects the outputs; and returns them in a
#' (hopefully) consistent fashion. It attempts to handle the differing required
#' annotation/GOid inputs required for each tool and/or provide supported
#' species in ways which the various tools expect.
#'
#' @param de_out List of topTables comprising limma/deseq/edger outputs.
#' @param gene_lengths Data frame of gene lengths for goseq.
#' @param goids Data frame of goids and genes.
#' @param n Number of genes at the top/bottom of the fold-changes to define 'significant.'
#' @param z Number of standard deviations from the mean fold-change used to
#' define 'significant.'
#' @param lfc Log fold-change used to define 'significant'.
#' @param p Maximum pvalue to define 'significant.'
#' @param overwrite Overwrite existing excel results file?
#' @param species Supported organism used by the tools.
#' @param goid_map Mapping file used by topGO, if it does not exist then
#' goids_df creates it.
#' @param gff_file gff file containing the annotations used by gff2genetable
#' from clusterprofiler.
#' @param gff_type Column to use from the gff file for the universe of genes.
#' @param do_goseq Perform simple_goseq()?
#' @param do_cluster Perform simple_clusterprofiler()?
#' @param do_topgo Perform simple_topgo()?
#' @param do_gostats Perform simple_gostats()?
#' @param do_gprofiler Perform simple_gprofiler()?
#' @param do_trees make topGO trees from the data?
#' @param orgdb Provide an organismDbi/Orgdb to hold the various annotation
#' data, in response to the shift of clusterprofiler and friends towards using them.
#' @param ... Arguments to pass through in arglist.
#' @return a list of up/down ontology results from
#' goseq/clusterprofiler/topgo/gostats, and associated trees.
#' @seealso [goseq] [clusterProfiler] [topGO] [goStats] [gProfiler] [GO.db]
#' @examples
#' \dontrun{
#' many_comparisons = limma_pairwise(expt = an_expt)
#' tables = many_comparisons$limma
#' this_takes_forever = limma_ontology(tables, gene_lengths = lengthdb,
#' goids = goids_df, z = 1.5, gff_file='length_db.gff')
#' }
#' @export
all_ontology_searches <- function(de_out, gene_lengths = NULL, goids = NULL, n = NULL,
z = NULL, lfc = NULL, p = NULL, overwrite = FALSE,
species = "unsupported", orgdb = "org.Dm.eg.db",
goid_map = "reference/go/id2go.map",
gff_file = NULL, gff_type = "gene", do_goseq = TRUE,
do_cluster = TRUE, do_topgo = TRUE,
do_gostats = TRUE, do_gprofiler = TRUE,
do_trees = FALSE, ...) {
arglist <- list(...)
message("This function expects a list of contrast tables and annotation information.")
message("The annotation information would be gene lengths and ontology ids")
if (isTRUE(do_goseq) && is.null(gene_lengths)) {
stop("Performing a goseq search requires a data frame of gene lengths.")
}
if (isTRUE(do_cluster) && is.null(gff_file)) {
stop("Performing a clusterprofiler search requires a gff file.")
}
arglist <- list(...)
goid_map <- get0("goid_map")
if (is.null(goid_map)) {
goid_map <- "reference/go/id2go.map"
}
## Take a moment to detect what the data input looks like
## Perhaps a list of tables?
if (is.null(de_out[["all_tables"]])) {
if (sum(grepl(pattern = "_vs_", x = names(de_out))) == 0) {
stop("This assumes you are passing it a limma/deseq/edger output from
limma_pairwise(), edger_pairwise(), or deseq_pairwise().")
}
} else {
## In this case, you fed it something$limma rather than something$limma$all_tables
de_out <- de_out[["all_tables"]]
}
## Perhaps a single data frame of logFC etc
## In which case, coerse it to a list of 1
if (!is.null(de_out[["logFC"]])) {
tmp <- de_out
de_out <- list("first"=tmp)
rm(tmp)
}
output <- list()
for (c in seq_along(de_out)) {
datum <- de_out[[c]]
if (!is.null(datum[["Row.names"]])) {
rownames(datum) <- datum[["Row.names"]]
datum <- datum[-1]
}
comparison <- names(de_out[c])
message("Performing ontology search of:", comparison)
updown_genes <- get_sig_genes(datum, n = n, z = z, lfc = lfc, p = p)
up_genes <- updown_genes[["up_genes"]]
down_genes <- updown_genes[["down_genes"]]
goseq_up_ontology <- goseq_up_trees <- NULL
goseq_down_ontology <- goseq_down_trees <- NULL
cluster_up_ontology <- cluster_up_trees <- NULL
cluster_down_ontology <- cluster_down_trees <- NULL
topgo_up_ontology <- topgo_up_trees <- NULL
topgo_down_ontology <- topgo_down_trees <- NULL
gostats_up_ontology <- gostats_up_trees <- NULL
gostats_down_ontology <- gostats_down_trees <- NULL
gprofiler_up_ontology <- gprofiler_down_ontology <- NULL
if (isTRUE(do_goseq)) {
goseq_up_ontology <- try(simple_goseq(up_genes, goids, gene_lengths))
goseq_down_ontology <- try(simple_goseq(down_genes, goids, gene_lengths))
if (isTRUE(do_trees)) {
goseq_up_trees <- try(
goseq_trees(goseq_up_ontology, goid_map = goid_map))
goseq_down_trees <- try(
goseq_trees(goseq_down_ontology, goid_map = goid_map))
}
}
if (isTRUE(do_cluster)) {
cluster_up_ontology <- try(
simple_clusterprofiler(up_genes, datum, orgdb = orgdb, ...))
cluster_down_ontology <- try(
simple_clusterprofiler(down_genes, datum, orgdb = orgdb, ...))
if (isTRUE(do_trees)) {
cluster_up_trees <- try(cluster_trees(up_genes, cluster_up_ontology,
goid_map = goid_map, go_db = goids))
cluster_down_trees <- try(cluster_trees(down_genes, cluster_down_ontology,
goid_map = goid_map, go_db = goids))
}
}
if (isTRUE(do_topgo)) {
topgo_up_ontology <- try(
simple_topgo(up_genes, goid_map = goid_map, go_db = goids))
topgo_down_ontology <- try(
simple_topgo(down_genes, goid_map = goid_map, go_db = goids))
if (isTRUE(do_trees)) {
topgo_up_trees <- try(topgo_trees(topgo_up_ontology))
topgo_down_trees <- try(topgo_trees(topgo_down_ontology))
}
}
if (isTRUE(do_gostats)) {
gostats_up_ontology <- try(
simple_gostats(up_genes, gff_file, goids, gff_type = gff_type))
gostats_down_ontology <- try(
simple_gostats(down_genes, gff_file, goids, gff_type = gff_type))
if (isTRUE(do_trees)) {
message("gostats_trees has never been tested, this is commented out for the moment.")
}
}
if (isTRUE(do_gprofiler)) {
gprofiler_up_ontology <- try(simple_gprofiler(up_genes, species = species))
gprofiler_down_ontology <- try(simple_gprofiler(down_genes, species = species))
}
c_data <- list("up_table" = up_genes,
"down_table" = down_genes,
"up_goseq" = goseq_up_ontology,
"down_goseq" = goseq_down_ontology,
"up_cluster" = cluster_up_ontology,
"down_cluster" = cluster_down_ontology,
"up_topgo" = topgo_up_ontology,
"down_topgo" = topgo_down_ontology,
"up_gostats" = gostats_up_ontology,
"down_gostats" = gostats_down_ontology,
"up_gprofiler" = gprofiler_up_ontology,
"down_gprofiler" = gprofiler_down_ontology,
"up_goseqtrees" = goseq_up_trees,
"down_goseqtrees" = goseq_down_trees,
"up_clustertrees" = cluster_up_trees,
"down_clustertrees" = cluster_down_trees,
"up_topgotrees" = topgo_up_trees,
"down_topgotrees" = topgo_down_trees,
"up_gostatstrees" = gostats_up_trees,
"down_gostatstrees" = gostats_down_trees)
output[[c]] <- c_data
}
names(output) <- names(de_out)
return(output)
}
#' Perform ontology searches on up/down subsets of differential expression data.
#'
#' In the same way all_pairwise() attempts to simplify using multiple DE tools,
#' this function seeks to make it easier to extract subsets of differentially
#' expressed data and pass them to goseq, clusterProfiler, topGO, GOstats, and
#' gProfiler.
#'
#' @param changed_counts List of changed counts as ups and downs.
#' @param doplot Include plots in the results?
#' @param do_goseq Perform goseq search?
#' @param do_cluster Perform clusterprofiler search?
#' @param do_topgo Perform topgo search?
#' @param do_gostats Perform gostats search?
#' @param do_gprofiler Do a gprofiler search?
#' @param according_to If results from multiple DE tools were passed, which one
#' defines 'significant'?
#' @param ... Extra arguments!
#' @return List of ontology search results, up and down for each contrast.
#' @seealso [goseq] [clusterProfiler] [topGO] [goStats] [gProfiler]
#' @export
subset_ontology_search <- function(changed_counts, doplot = TRUE, do_goseq = TRUE,
do_cluster = TRUE, do_topgo = TRUE, do_gostats = TRUE,
do_gprofiler = TRUE, according_to = "limma", ...) {
arglist <- list(...)
up_list <- NULL
down_list <- NULL
if (!is.null(changed_counts[["ups"]])) {
message("The data is not sorted by a differential expression tool.")
up_list <- changed_counts[["ups"]]
down_list <- changed_counts[["downs"]]
} else if (is.null(changed_counts[[according_to]])) {
warning(glue("Could not find the table according to: {according_to}."))
warning("Using the first available table, which might end badly.")
up_list <- changed_counts[[1]][["ups"]]
down_list <- changed_counts[[2]][["downs"]]
} else {
up_list <- changed_counts[[according_to]][["ups"]]
down_list <- changed_counts[[according_to]][["downs"]]
}
up_goseq <- list()
down_goseq <- list()
up_cluster <- list()
down_cluster <- list()
up_topgo <- list()
down_topgo <- list()
up_gostats <- list()
down_gostats <- list()
up_gprofiler <- list()
down_gprofiler <- list()
## goseq() requires minimally gene_lengths and goids
lengths <- arglist[["lengths"]]
goids <- NULL
if (is.null(arglist[["goids"]])) {
goids <- arglist[["go_db"]]
} else {
goids <- arglist[["goids"]]
}
gff <- arglist[["gff"]]
gff_type <- arglist[["gff_type"]]
types_list <- c("up_goseq", "down_goseq", "up_cluster", "down_cluster",
"up_topgo", "down_topgo", "up_gostats", "down_gostats",
"up_gprofiler", "down_gprofiler")
names_list <- names(up_list)
names_length <- length(names_list)
for (cluster_count in seq_len(names_length)) {
name <- names_list[[cluster_count]]
uppers <- up_list[[cluster_count]]
downers <- down_list[[cluster_count]]
if (isTRUE(do_goseq)) {
message(cluster_count, "/", names_length, ": Starting goseq")
up_goseq[[name]] <- try(simple_goseq(de_genes = uppers, ...))
down_goseq[[name]] <- try(simple_goseq(de_genes = downers, ...))
}
if (isTRUE(do_cluster)) {
message(cluster_count, "/", names_length, ": Starting clusterprofiler")
up_cluster[[name]] <- try(simple_clusterprofiler(uppers, ...))
down_cluster[[name]] <- try(simple_clusterprofiler(downers, ...))
}
if (isTRUE(do_topgo)) {
message(cluster_count, "/", names_length, ": Starting topgo")
up_topgo[[name]] <- try(simple_topgo(de_genes = uppers, ...))
down_topgo[[name]] <- try(simple_topgo(de_genes = downers, ...))
}
if (isTRUE(do_gostats)) {
message(cluster_count, "/", names_length, ": Starting gostats")
up_gostats[[name]] <- try(simple_gostats(uppers, ...))
down_gostats[[name]] <- try(simple_gostats(downers, ...))
}
if (isTRUE(do_gprofiler)) {
message(cluster_count, "/", names_length, ": starting gprofiler.")
up_gprofiler[[name]] <- try(simple_gprofiler(uppers, ...))
down_gprofiler[[name]] <- try(simple_gprofiler(downers, ...))
}
}
ret <- list(
"up_goseq" = up_goseq,
"down_goseq" = down_goseq,
"up_cluster" = up_cluster,
"down_cluster" = down_cluster,
"up_topgo" = up_topgo,
"down_topgo" = down_topgo,
"up_gostats" = up_gostats,
"down_gostats" = down_gostats,
"up_gprofiler" = up_gprofiler,
"down_gprofiler" = down_gprofiler)
if (!file.exists("savefiles")) {
dir.create("savefiles")
}
try(save(list = c("ret"), file = "savefiles/subset_ontology_search_result.rda"))
return(ret)
}
#' Extract a dataframe of golevels using getGOLevel() from clusterProfiler.
#'
#' This function is way faster than my previous iterative golevel function.
#' That is not to say it is very fast, so it saves the result to ontlevel.rda
#' for future lookups.
#'
#' @param ont Ontology to recurse.
#' @param savefile File to save the results for future lookups.
#' @return Dataframe of goids<->highest level
#' @seealso [clusterProfiler]
#' @export
golevel_df <- function(ont = "MF", savefile = "ontlevel.rda") {
savefile <- glue("{ont}_{savefile}")
golevels <- NULL
if (file.exists(savefile)) {
load(savefile)
} else {
level <- 0
continue <- 1
golevels <- data.frame(GO = NULL, level = NULL)
while (continue == 1) {
level <- level + 1
GO <- try(getGOLevel(ont, level), silent = TRUE)
if (class(GO) != "character") {
golevels[["level"]] <- as.numeric(golevels[["level"]])
save(golevels, file = savefile, compress = "xz")
return(golevels)
} else {
tmpdf <- as.data.frame(cbind(GO, level))
## This (hopefully) ensures that each GO id is added only once,
## and gets the highest possible level.
new_go <- tmpdf[unique(tmpdf[["GO"]], golevels[["GO"]]), ]
golevels <- rbind(golevels, new_go)
}
}
}
return(golevels)
}
#' Compare the results from different ontology tools
#'
#' Combine the results from goseq, cluster profiler, topgo, and gostats; poke at
#' them with a stick and see what happens. The general idea is to pull the
#' p-value data from each tool and contrast that to the set of all possibile
#' ontologies. This allows one to do a correlation coefficient between them.
#' In addition, take the 1-pvalue for each ontology for each tool. Thus for
#' strong p-values the score will be near 1 and so we can sum the scores for all
#' the tools. Since topgo has 4 tools, the total possible is 7 if everything
#' has a p-value equal to 0.
#'
#' @param goseq Result from simple_goseq()
#' @param cluster Result from simple_clusterprofiler()
#' @param topgo Result from topGO
#' @param gostats Result from GOstats
#' @return Summary of the similarities of ontology searches
#' @seealso [goseq] [clusterProfiler] [topGO] [goStats]
#' @export
compare_go_searches <- function(goseq = NULL, cluster = NULL, topgo = NULL, gostats = NULL) {
goseq_mf_data <- goseq_bp_data <- goseq_cc_data <- NULL
if (!is.null(goseq)) {
goseq_mf_data <- goseq[["mf_subset"]][, c("category", "over_represented_pvalue")]
goseq_bp_data <- goseq[["bp_subset"]][, c("category", "over_represented_pvalue")]
goseq_cc_data <- goseq[["cc_subset"]][, c("category", "over_represented_pvalue")]
colnames(goseq_mf_data) <- c("goseq_id", "goseq_pvalue")
colnames(goseq_bp_data) <- c("goseq_id", "goseq_pvalue")
colnames(goseq_cc_data) <- c("goseq_id", "goseq_pvalue")
}
cluster_mf_data <- cluster_bp_data <- cluster_cc_data <- NULL
if (!is.null(cluster)) {
cluster_mf_data <- as.data.frame(summary(cluster[["mf_all"]]))[, c("ID", "pvalue")]
cluster_bp_data <- as.data.frame(summary(cluster[["bp_all"]]))[, c("ID", "pvalue")]
cluster_cc_data <- as.data.frame(summary(cluster[["cc_all"]]))[, c("ID", "pvalue")]
colnames(cluster_mf_data) <- c("cluster_id", "cluster_pvalue")
colnames(cluster_bp_data) <- c("cluster_id", "cluster_pvalue")
colnames(cluster_cc_data) <- c("cluster_id", "cluster_pvalue")
}
topgo_mf_data <- topgo_bp_data <- topgo_cc_data <- NULL
if (!is.null(topgo)) {
topgo_mf_data <- topgo[["tables"]][["mf"]][, c("GO.ID", "fisher", "KS", "EL", "weight")]
topgo_bp_data <- topgo[["tables"]][["bp"]][, c("GO.ID", "fisher", "KS", "EL", "weight")]
topgo_cc_data <- topgo[["tables"]][["cc"]][, c("GO.ID", "fisher", "KS", "EL", "weight")]
colnames(topgo_mf_data) <- c("topgo_id", "topgo_fisher_pvalue",
"topgo_el_pvalue", "topgo_ks_pvalue", "topgo_weight_pvalue")
colnames(topgo_bp_data) <- c("topgo_id", "topgo_fisher_pvalue",
"topgo_el_pvalue", "topgo_ks_pvalue", "topgo_weight_pvalue")
colnames(topgo_cc_data) <- c("topgo_id", "topgo_fisher_pvalue",
"topgo_el_pvalue", "topgo_ks_pvalue", "topgo_weight_pvalue")
}
gostats_mf_data <- gostats_bp_data <- gostats_cc_data <- NULL
if (!is.null(gostats)) {
gostats_mf_data <- gostats[["mf_over_all"]][, c("GOMFID", "Pvalue")]
gostats_bp_data <- gostats[["bp_over_all"]][, c("GOBPID", "Pvalue")]
gostats_cc_data <- gostats[["cc_over_all"]][, c("GOCCID", "Pvalue")]
colnames(gostats_mf_data) <- c("gostats_id", "gostats_pvalue")
colnames(gostats_bp_data) <- c("gostats_id", "gostats_pvalue")
colnames(gostats_cc_data) <- c("gostats_id", "gostats_pvalue")
}
## Now combine them...
all_mf <- merge(goseq_mf_data, cluster_mf_data,
by.x = "goseq_id", by.y = "cluster_id", all.x = TRUE, all.y = TRUE)
all_mf <- merge(all_mf, topgo_mf_data,
by.x = "goseq_id", by.y = "topgo_id", all.x = TRUE, all.y = TRUE)
all_mf <- merge(all_mf, gostats_mf_data,
by.x = "goseq_id", by.y = "gostats_id", all.x = TRUE, all.y = TRUE)
rownames(all_mf) <- all_mf[["goseq_id"]]
all_mf <- all_mf[-1]
all_mf[is.na(all_mf)] <- 1
all_mf[["goseq_pvalue"]] <- 1 - as.numeric(all_mf[["goseq_pvalue"]])
all_mf[["cluster_pvalue"]] <- 1 - as.numeric(all_mf[["cluster_pvalue"]])
all_mf[["topgo_fisher_pvalue"]] <- 1 - as.numeric(all_mf[["topgo_fisher_pvalue"]])
all_mf[["topgo_el_pvalue"]] <- 1 - as.numeric(all_mf[["topgo_el_pvalue"]])
all_mf[["topgo_ks_pvalue"]] <- 1 - as.numeric(all_mf[["topgo_ks_pvalue"]])
all_mf[["topgo_weight_pvalue"]] <- 1 - as.numeric(all_mf[["topgo_weight_pvalue"]])
all_mf[["gostats_pvalue"]] <- 1 - as.numeric(all_mf[["gostats_pvalue"]])
all_mf[is.na(all_mf)] <- 0
message(cor(all_mf))
mf_summary <- rowSums(all_mf)
message(summary(mf_summary))
all_bp <- merge(goseq_bp_data, cluster_bp_data,
by.x = "goseq_id", by.y = "cluster_id", all.x = TRUE, all.y = TRUE)
all_bp <- merge(all_bp, topgo_bp_data, by.x = "goseq_id",
by.y = "topgo_id", all.x = TRUE, all.y = TRUE)
all_bp <- merge(all_bp, gostats_bp_data, by.x = "goseq_id",
by.y = "gostats_id", all.x = TRUE, all.y = TRUE)
rownames(all_bp) <- all_bp[["goseq_id"]]
all_bp <- all_bp[-1]
all_bp[is.na(all_bp)] <- 1
all_bp[["goseq_pvalue"]] <- 1 - as.numeric(all_bp[["goseq_pvalue"]])
all_bp[["cluster_pvalue"]] <- 1 - as.numeric(all_bp[["cluster_pvalue"]])
all_bp[["topgo_fisher_pvalue"]] <- 1 - as.numeric(all_bp[["topgo_fisher_pvalue"]])
all_bp[["topgo_el_pvalue"]] <- 1 - as.numeric(all_bp[["topgo_el_pvalue"]])
all_bp[["topgo_ks_pvalue"]] <- 1 - as.numeric(all_bp[["topgo_ks_pvalue"]])
all_bp[["topgo_weight_pvalue"]] <- 1 - as.numeric(all_bp[["topgo_weight_pvalue"]])
all_bp[["gostats_pvalue"]] <- 1 - as.numeric(all_bp[["gostats_pvalue"]])
all_bp[is.na(all_bp)] <- 0
message(cor(all_bp))
bp_summary <- rowSums(all_bp)
message(summary(bp_summary))
all_cc <- merge(goseq_cc_data, cluster_cc_data,
by.x = "goseq_id", by.y = "cluster_id", all.x = TRUE, all.y = TRUE)
all_cc <- merge(all_cc, topgo_cc_data, by.x = "goseq_id",
by.y = "topgo_id", all.x = TRUE, all.y = TRUE)
all_cc <- merge(all_cc, gostats_cc_data, by.x = "goseq_id",
by.y = "gostats_id", all.x = TRUE, all.y = TRUE)
rownames(all_cc) <- all_cc[["goseq_id"]]
all_cc <- all_cc[-1]
all_cc[is.na(all_cc)] <- 1
all_cc[["goseq_pvalue"]] <- 1 - as.numeric(all_cc[["goseq_pvalue"]])
all_cc[["cluster_pvalue"]] <- 1 - as.numeric(all_cc[["cluster_pvalue"]])
all_cc[["topgo_fisher_pvalue"]] <- 1 - as.numeric(all_cc[["topgo_fisher_pvalue"]])
all_cc[["topgo_el_pvalue"]] <- 1 - as.numeric(all_cc[["topgo_el_pvalue"]])
all_cc[["topgo_ks_pvalue"]] <- 1 - as.numeric(all_cc[["topgo_ks_pvalue"]])
all_cc[["topgo_weight_pvalue"]] <- 1 - as.numeric(all_cc[["topgo_weight_pvalue"]])
all_cc[["gostats_pvalue"]] <- 1 - as.numeric(all_cc[["gostats_pvalue"]])
all_cc[is.na(all_cc)] <- 0
message(cor(all_cc))
cc_summary <- rowSums(all_cc)
message(summary(cc_summary))
}
## EOF
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.