#!/usr/bin/env Rscript
# Kraken pseudo cigars
# Kraken information ####
# "C"/"U": one letter code indicating that the sequence was either classified or unclassified.
# The sequence ID, obtained from the FASTA/FASTQ header.
# The taxonomy ID Kraken used to label the sequence; this is 0 if the sequence is unclassified.
# The length of the sequence in bp.
# A space-delimited list indicating the LCA mapping of each k-mer in the sequence. For example, "562:13 561:4 A:31 0:1 562:3" would indicate that:
# the first 13 k-mers mapped to taxonomy ID #562
# the next 4 k-mers mapped to taxonomy ID #561
# the next 31 k-mers contained an ambiguous nucleotide
# the next k-mer was not in the database
# the last 3 k-mers mapped to taxonomy ID #562
# Script ####
suppressPackageStartupMessages({
library(readr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(ggridges)
library(optparse)
library(purrr)
library(krakenreports)
})
option_list <- list(
make_option(c("-i", "--input"),
action="store",
default=system.file("extdata", "mapped.tsv", package = "krakenreports"),
help="Kraken TSV - example included with package. Run without input to test."),
make_option(c("-s", "--seqid"),
action="store",
default=NA,
help="Sequence ID to display only one"),
make_option(c("-o", "--output"),
action="store",
default=paste(Sys.Date(), "_krakenreports", sep = ""),
help="Output prefix"),
make_option(c("-p", "--paired"),
action="store_true",
default=FALSE,
help="Paired end data?"),
make_option(c("-l", "--nolog"),
action="store_false",
default=TRUE,
help="Store log?")
)
opt <- parse_args(OptionParser(option_list=option_list), positional_arguments = TRUE)
fileprefix <- tools::file_path_sans_ext(opt$options$output)
if(opt$options$nolog){
con <- file(paste0(fileprefix, ".log"))
sink(con, append=TRUE)
sink(con, append=TRUE, type="message")
}
mapped <- readKmerData(x = opt$options$input)
if(opt$options$paired){
x <-tidyPairedKmerData(mapped)
} else{
x <-tidyKmerData(mapped)
}
# CSV Reports ####
# Kmers per sequence
x %>%
ungroup() %>%
group_by(Status, SEQID, taxeng) %>%
summarise(Count = sum(kmers)) %>%
arrange(Status, SEQID, desc(Count)) %>%
select(Status, Sequence = SEQID, TaxonomicName = taxeng, Count, everything()) %>%
write_csv(paste0(fileprefix, "_perseq.csv"))
# All Kmers
x %>%
ungroup() %>%
group_by(taxeng) %>%
summarise(Count = sum(kmers)) %>%
arrange(desc(Count)) %>%
select(TaxonomicName = taxeng, Count) %>%
write_csv(paste0(fileprefix, "_allseqs.csv"))
# Plots ####
# Write Summary Plot
message(paste("Writing summary plot to", paste0(fileprefix, "-summary", ".png")))
pdf(paste0(fileprefix, "-summary", ".pdf"), width = 12, height = 8)
plotSummary(x)
dev.off()
# Write Individual Plots
message(paste("Writing plots to", paste0(fileprefix, ".pdf")))
pdf(paste0(fileprefix, ".pdf"))
if(is.na(opt$options$seqid)){
plotAllKmerCigars(x, lg = TRUE)
}else {
plotKmerCigar(x, opt$options$seqid)
}
dev.off()
if(opt$options$nolog){
sink()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.