#!/usr/bin/env Rscript
# (C) Copyright 2020 Sur Herrera Paredes
#
# This file is part of HMVAR.
#
# HMVAR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# HMVAR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HMVAR. If not, see <http://www.gnu.org/licenses/>.
library(argparser)
library(tidyverse)
library(HMVAR)
process_arguments <- function(){
p <- arg_parser(paste("Test functional enrichments from RER permutations test"))
# Positional arguments
p <- add_argument(p, "input",
help = paste("Input with RER permutations results. Directory or file."),
type = "character")
p <- add_argument(p, "annots",
help = paste("eggnog annotations. Directory or file."),
type = "character")
# Optional arguments
p <- add_argument(p, "--outdir",
help = paste("Directory path to store outputs."),
default = "output/",
type = "character")
p <- add_argument(p, "--plot",
help = paste("Plot rerperms?"),
flag = TRUE)
# Read arguments
cat("Processing arguments...\n")
args <- parse_args(p)
# Process arguments
if(dir.exists(args$input)){
if(!dir.exists(args$annots)){
stop("ERROR: annots must be a direcotry if input is a directory", call. = TRUE)
}
args$input <- list.files(args$input, full.names = TRUE)
spec <- stringr::str_remove(basename(args$input), pattern = "\\.rer\\.fdr\\.txt$")
args$annots <- file.path(args$annots,paste0(spec, ".emapper.annotations"))
if(!all(file.exists(args$annots))){
cat(args$annots[ !file.exists(args$annots) ], "not found\n")
stop("ERROR: There must be annotation files for all genomes tested", call. = TRUE)
}
}else if(file.exists(args$input)){
if(dir.exists(args$annots)){
spec <- stringr::str_remove(basename(args$input), pattern = "\\.rer\\.fdr\\.txt$")
args$annots <- file.path(args$annots, paste0(spec, ".emapper.annotations"))
}
if(!file.exists(args$annots)){
stop("ERROR: annots must be an existing file or a dir containing <spec>.emapper.annotations", call. = TRUE)
}
}else{
stop("ERROR: input must be either an existing file or directory.", call. = TRUE)
}
return(args)
}
# setwd("/cashew/users/sur/exp/fraserv/2020/today/gut")
# args <- list(input = "rerperms/Bacteroides_ovatus_58035.rer.fdr.txt",
# annots = "annots/Bacteroides_ovatus_58035.emapper.annotations",
# outdir = "output/",
# plot = TRUE)
# args <- list(input = "rerperms/Bacteroides_ovatus_58035.rer.fdr.txt",
# annots = "annots/",
# outdir = "output/",
# plot = TRUE)
# args <- list(input = "rerperms/",
# annots = "annots/",
# outdir = "output/",
# plot = TRUE)
# args <- list(input = "rerperms/",
# annots = "annots/Bacteroides_coprocola_61586.emapper.annotations",
# outdir = "output/",
# plot = TRUE)
# if(dir.exists(args$input)){
# if(!dir.exists(args$annots)){
# stop("ERROR: annots must be a direcotry if input is a directory", call. = TRUE)
# }
# args$input <- list.files(args$input, full.names = TRUE)
# spec <- stringr::str_remove(basename(args$input), pattern = "\\.rer\\.fdr\\.txt$")
# args$annots <- file.path(args$annots,paste0(spec, ".emapper.annotations"))
# if(!all(file.exists(args$annots))){
# cat(args$annots[ !file.exists(args$annots) ], "not found\n")
# stop("ERROR: There must be annotation files for all genomes tested", call. = TRUE)
# }
# }else if(file.exists(args$input)){
# if(dir.exists(args$annots)){
# spec <- stringr::str_remove(basename(args$input), pattern = "\\.rer\\.fdr\\.txt$")
# args$annots <- file.path(args$annots, paste0(spec, ".emapper.annotations"))
# }
# if(!file.exists(args$annots)){
# stop("ERROR: annots must be an existing file or a dir containing <spec>.emapper.annotations", call. = TRUE)
# }
# }else{
# stop("ERROR: input must be either an existing file or directory.", call. = TRUE)
# }
args <- process_arguments()
args
# Prepare output dir
if(!dir.exists(args$outdir)){
dir.create(args$outdir)
}
GO <- KO <- OG <- COG <- BIGG <- NULL
# for(i in 1:3){
for(i in 1:length(args$input)){
rertest_file <- args$input[i]
annots_file <- args$annots[i]
# Get species name
spec <- stringr::str_remove(basename(rertest_file),
pattern = "\\.rer\\.fdr\\.txt$")
cat(spec, "\n")
# Read data
RER <- read_tsv(rertest_file,
col_types = cols(gene_id = col_character(),
Rho = col_number(),
P = col_number(),
p.adj = col_number(),
FDR = col_number()))
annots <- read_eggnog(annots_file) %>%
mutate(query_name = str_replace(string = query_name,
pattern = "\\([+-]\\)_[0-9]+$",
replacement = "")) %>%
select(gene_id = query_name, predicted_gene_name,
GO_terms, KEGG_KOs, BiGG_reactions,
Annotation_tax_scope, OGs,
'bestOG|evalue|score', COG_cat, eggNOG_annot)
if(args$plot){
# p1 <- ggplot(RER, aes(x = -log10(P), y = -log10(FDR))) +
# geom_point() +
# geom_abline(intercept = 0, slope = 1) +
# AMOR::theme_blackbox()
#
# pval_qqplot(RER$P)
# pval_qqplot(RER$FDR)
p1 <- ggplot(RER, aes(x = Rho, y = -log10(FDR))) +
geom_point(aes(size = N), shape = 19, alpha = 0.1) +
scale_radius(trans="sqrt") +
AMOR::theme_blackbox()
filename <- file.path(args$outdir, paste0(spec, ".volcano.png"))
ggsave(filename, p1, width = 5, height = 5, dpi = 150)
}
# Match genes and annots
RER <- RER %>%
filter(!is.na(FDR)) %>%
left_join(annots, by = "gene_id")
# RER %>%
# select(-p.adj,
# -BiGG_reactions,
# -Annotation_tax_scope,
# -"bestOG|evalue|score") %>%
# print(n = 30)
# RER %>%
# filter(!is.na(BiGG_reactions))
# Enrichments
terms <- RER %>% select(gene_id, terms = GO_terms, score = FDR) %>% filter(!is.na(terms))
res <- terms_enrichment(dat = terms,
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0(spec, ".go.enrichments.txt"))
write_tsv(res, filename)
GO <- GO %>%
bind_rows(terms %>% mutate(spec = spec))
terms <- RER %>% select(gene_id, terms = KEGG_KOs, score = FDR) %>% filter(!is.na(terms))
res <- terms_enrichment(dat = terms,
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0(spec, ".ko.enrichments.txt"))
write_tsv(res, filename)
KO <- KO %>%
bind_rows(terms %>% mutate(spec = spec))
terms <- RER %>% select(gene_id, terms = OGs, score = FDR) %>% filter(!is.na(terms))
res <- terms_enrichment(dat = terms,
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0(spec, ".og.enrichments.txt"))
write_tsv(res, filename)
OG <- OG %>%
bind_rows(terms %>% mutate(spec = spec))
terms <- RER %>% select(gene_id, terms = BiGG_reactions, score = FDR) %>% filter(!is.na(terms))
res <- terms_enrichment(dat = terms,
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0(spec, ".bigg.enrichments.txt"))
write_tsv(res, filename)
BIGG <- BIGG %>%
bind_rows(terms %>% mutate(spec = spec))
terms <- RER %>% select(gene_id, terms = COG_cat, score = FDR) %>% filter(!is.na(terms))
res <- terms_enrichment(dat = terms,
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0(spec, ".cog.enrichments.txt"))
write_tsv(res, filename)
COG <- COG %>%
bind_rows(terms %>% mutate(spec = spec))
}
# Overall Enrichments
res <- terms_enrichment(dat = GO %>% select(-spec),
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0("overall", ".go.enrichments.txt"))
write_tsv(res, filename)
res <- terms_enrichment(dat = KO %>% select(-spec),
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0("overall", ".ko.enrichments.txt"))
write_tsv(res, filename)
res <- terms_enrichment(dat = OG %>% select(-spec),
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0("overall", ".og.enrichments.txt"))
write_tsv(res, filename)
res <- terms_enrichment(dat = BIGG %>% select(-spec),
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0("overall", ".bigg.enrichments.txt"))
write_tsv(res, filename)
res <- terms_enrichment(dat = COG %>% select(-spec),
method = "gsea",
test = "ks",
alternative = "greater",
min_size = 10)
filename <- file.path(args$outdir, paste0("overall", ".cog.enrichments.txt"))
write_tsv(res, filename)
# # Try Rho GSEA
# RER %>%
# filter(FDR <= 0.01) %>%
# select(gene_id, terms = BiGG_reactions, score = Rho) %>%
# filter(!is.na(terms))
# terms_enrichment(dat = RER %>%
# filter(FDR <= 0.01) %>%
# select(gene_id, terms = GO_terms, score = Rho),
# method = "gsea",
# test = "ks",
# alternative = "two.sided",
# min_size = 2)
# terms_enrichment(dat = RER %>%
# filter(FDR <= 0.01) %>%
# select(gene_id, terms = KEGG_KOs, score = Rho),
# method = "gsea",
# test = "ks",
# alternative = "two.sided",
# min_size = 2)
# terms_enrichment(dat = RER %>%
# filter(FDR <= 0.01) %>%
# select(gene_id, terms = OGs, score = Rho),
# method = "gsea",
# test = "ks",
# alternative = "two.sided",
# min_size = 2)
# terms_enrichment(dat = RER %>%
# filter(FDR <= 0.01) %>%
# select(gene_id, terms = BiGG_reactions, score = Rho),
# method = "gsea",
# test = "ks",
# alternative = "two.sided",
# min_size = 2)
# terms_enrichment(dat = RER %>%
# filter(FDR <= 0.01) %>%
# select(gene_id, terms = COG_cat, score = Rho),
# method = "gsea",
# test = "ks",
# alternative = "two.sided",
# min_size = 2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.