################
# Input/Output
################
#' Coverage Info Text
#'
#' Creates a string with information on the coverage
#'
#' @param stats Data frame with coverage statistics.
#' @param selected.group Retrieve information for a subgroup of templates only.
#' @param ident An identifier for the coverage.
#' @return A string with information on the coverage.
#' @keywords internal
create.cvg.text <- function(stats, selected.group = NULL, ident = NULL) {
if (length(selected.group) == 0 || "all" %in% selected.group) {
selected.group <- "Total"
}
if (selected.group[1] == "Total") {
out.selected.group <- "" # for output: change id
} else {
out.selected.group <- selected.group
}
if (length(ident) == 0) { # identifier for coverage type
ident <- "Coverage"
}
idx <- which(stats$Group %in% selected.group)
if (length(idx) != 0) {
stats <- stats[idx, ]
# differentiate fw & rev cvg
text <- paste0(ident, ": ", stats$N_primer, " primers (", stats$N_primer_fw, " fw/", stats$N_primer_rev,
" rev) cover ", stats$Coverage, " ", out.selected.group, " template sequences.")
} else {
text <- "Templates not covered."
}
return(text)
}
#' Check for Report Dependencies.
#'
#' Checks whether the dependencies for rmarkdown::render() are fulfilled.
#'
#' @return A logical vector giving the dependency availability status.
#' @keywords internal
check_report_deps <- function() {
pandoc.available <- Sys.which("pandoc") != ""
latex.available <- Sys.which("pdflatex") != ""
out <- c("Pandoc" = pandoc.available, "LateX" = latex.available)
return(out)
}
#' Sanitiziation of Filename.
#'
#' Ensures that a filename is valid for the file system.
#'
#' @param path The path to the file to be sanitized, without file extension.
#' @param suffix The suffix (e.g. ".png") of a file.
#' @param sub_char The character used to replacing disallowed chars.
#' @return The sanitized filename
#' @keywords internal
sanitize_path <- function(path, suffix = '', sub.char = "_") {
if (grepl('[^~:_./\\[:alnum:]-]', path)) {
warning('Replaced special characters in path "', path, '" -> "',
path <- gsub('[^~:_./\\[:alnum:]-]', sub.char, path), '"')
}
# replace . with sub.char except ../ and ./
s = strsplit(path, '[/\\\\]')[[1L]]
i = (s != '.') & (s != '..') & grepl('\\.', s)
if (any(i)) {
s[i] = gsub('\\.', sub.char, s[i])
path = paste(s, collapse = '/')
warning('Dots in path replaced with", sub.char, "("', path, '")')
}
out <- paste0(path, suffix)
return(out)
}
#' Creation of a Filename for Reports.
#'
#' Creates the filename for reports.
#'
#' @param report.name The identifier for the report type.
#' @param sample.name The identifier of the sample that was analyzed.
#' @return A character vector.
#' @keywords internal
get_report_fname <- function(report.name, sample.name) {
date <- format(Sys.time(), "%Y-%m-%d")
out <- paste(report.name, "_", sample.name, "_", date, sep = "")
out <- sanitize_path(out, ".pdf")
return(out)
}
#' Renders an rmarkdown file using Pandoc.
#'
#' Creates a PDF report using rmarkdown and Pandoc by passing the specified
#' \code{params} to the markdown file given by \code{report_template} and
#' storing the PDF in \code{out.file}.
#'
#' @param params A list with parameters for the R markdown parser.
#' @param report_template A character vector giving the basename of the
#' Rmarkdown template to use for report creation.
#' @param out.file The filename of the report PDF to be created.
#' @return Creates a PDF in \code{out.file} if successful.
#' @keywords internal
render_report <- function(params, report_template, out.file) {
deps <- check_report_deps()
missing.deps <- names(deps)[which(!deps)]
if (length(missing.deps) != 0) {
msg <- paste("Cannot create PDF report. Dependencies are missing:\n",
paste(missing.deps, collapse = ","))
warning(msg)
return()
}
report.template.dir <- system.file("extdata", "report",
package = "openPrimeR")
# copy report file to temporary directory to make sure we can write
generation.dir <- tempdir() # dir where report is generated
dir.copy(report.template.dir, generation.dir, overwrite = TRUE)
tempReport <- file.path(generation.dir, report_template)
if (rmarkdown::pandoc_available()) {
dir.create(dirname(out.file), showWarnings = FALSE, recursive = TRUE)
out.file <- file.path(normalizePath(dirname(out.file)), basename(out.file))
res <- try(rmarkdown::render(tempReport, output_file = out.file,
params = params, envir = new.env(parent = globalenv()),
quiet = TRUE))
if (is(res, "try-error")) {
warning("No PDF file was produced, please check the console output and whether all Latex dependencies are installed (Texlive available?).")
}
} else {
msg <- paste0("Pandoc for rmarkdown is not available on your system.",
"Please install it first to generate a report.")
warning(msg)
return()
}
}
#' Read FASTA File.
#'
#' Reads the input FASTA file.
#'
#' @param fasta.file The path to a FASTA file.
#' @param NTs The allowed set of nucleotides.
#' @return List with vectors of chars.
#' @keywords internal
my.read.fasta <- function(fasta.file, NTs) {
# NTs: supported NTs
if (!file.exists(fasta.file)) {
stop(paste("The input fasta.file did not exist at: '",
fasta.file, "'", sep = ""))
}
seqs <- suppressWarnings(seqinr::read.fasta(fasta.file, forceDNAtolower = TRUE))
# replace empty chars
for (i in seq_along(seqs)) {
if (" " %in% seqs[[i]]) {
new.seq <- seqs[[i]][seqs[[i]] != " "]
seqs[[i]] <- seqinr::as.SeqFastadna(new.seq, attr(seqs[[i]], "name"), attr(seqs[[i]],
"Annot"))
}
}
# check whether the seqs are in the NT alphabet
sanity.check <- unlist(lapply(seqs, function(x) all(x %in% NTs)))
idx <- which(!sanity.check)
# message(idx)
if (length(idx) != 0) {
msg <- "Some sequences contained non-supported characters. Supported are the following characters: "
sup <- paste(NTs, collapse = ", ")
loc <- paste(sapply(idx, function(x) attr(seqs[[x]], "name")), collapse = ", ")
pre <- "The sequences with the following headers contained non-supported characters: "
msg <- paste(msg, sup, ". ", pre, loc, sep = "")
my.error("FastaAlphabetError", msg)
}
return(seqs)
}
#' Wrapper for the ggplot2::ggsave function.
#'
#' Saves a plot using ggplot2's ggsave function.
#'
#' @param filename The filename to store the plot.
#' @param plot The ggplot object.
#' @param ... Further arguments to the ggplot2 ggsave function.
#' @return Stores \code{p} in \code{fname}.
#' @keywords internal
my_ggsave <- function(filename, plot = ggplot2::last_plot(), ...) {
if (length(plot) != 0) {
check <- try(suppressMessages(ggsave(filename = filename, plot = plot, ...)))
if (is(check, "try-error")) {
# downgrade error to warning
warning(attr(check, "condition") )
}
}
return()
}
add_cvg_to_workbook <- function(cvg.matrix, wb, start.row, start.col, is.first.entry = FALSE) {
# wb workbook
# start.col The column to start writing into (counting from 1)
# position for row/column style separators:
# c <- seq(start.col, ncol(cvg.matrix) - 1)
# r <- seq(start.row, start.row + nrow(cvg.matrix) - 1)
if (is.first.entry) {
#r <- r + 1
openxlsx::writeData(wb, 1, cvg.matrix,
startCol = start.col, startRow = start.row,
rowNames = FALSE, colNames = TRUE, borders = "all",
borderColour = "#000000")
} else {
openxlsx::writeData(wb, 1, cvg.matrix,
startCol = start.col, startRow = start.row,
rowNames = FALSE, colNames = FALSE, borders = "all",
borderColour = "#000000")
}
# styles:
#bodyStyle <- createStyle(border="Bottom",
#borderColour = "#000000",
#borderStyle = "thick")
#r <- tail(r,1)
# doesnt work:
#addStyle(wb, sheet = 1, bodyStyle, rows = r, cols = c, gridExpand = TRUE)
#setColWidths(wb, 1, cols=1, widths = 21) ## set column width for row names column
return(wb)
}
#' @rdname Output
#' @name Output
#' @aliases create_coverage_xls
#' @return \code{create_coverage_xls} stores information on the
#' primer coverage in a spreadsheet.
#' @export
#' @examples
#'
#' data(Ippolito)
#' filename <- tempfile("cvg_overview", fileext = ".xls")
#' # Store coverage of a single primer in an XLS file:
#' my.primers <- primer.df[3,]
#' cvd <- unique(unlist(strsplit(my.primers$Covered_Seqs, split = ",")))
#' m <- match(cvd, template.df$Identifier)
#' my.templates <- template.df[m,]
#' create_coverage_xls(my.primers, my.templates, filename, settings)
create_coverage_xls <- function(primer.df, template.df, fname, settings) {
if (length(primer.df) == 0 || nrow(primer.df) == 0) {
return(NULL)
}
if (!is(settings, "DesignSettings")) {
stop("A DesignSettings object is required for settings.")
}
if (!is(primer.df, "Primers")) {
stop("Please input a 'Primers' object.")
}
if (!is(template.df, "Templates")) {
stop("Please input a 'Templates' object.")
}
if (!"primer_coverage" %in% colnames(primer.df)) {
warning("Please compute primer coverage first.")
return(NULL)
}
cvg.constraints <- cvg_constraints(settings)
constraints <- names(cvg.constraints)[names(cvg.constraints) %in% c("primer_efficiency", "annealing_DeltaG")]
constraints <- constraints[constraints %in% colnames(primer.df)]
if (length(constraints) > 1) {
warning("Only one constraint possible at a time. Selecting the first one.")
constraints <- constraints[1]
}
# initialize workbook:
wb <- openxlsx::createWorkbook()
openxlsx::addWorksheet(wb, primer.df$Run[1])
# create coverage data for individual mismatch settings:
mm <- max(unique(unlist(sapply(strsplit(c(primer.df$Nbr_of_mismatches_fw, primer.df$Nbr_of_mismatches_rev), split = ","), function(x) as.numeric(x)))))
mm <- c(0, seq_len(mm))
# add mismatch column to worksheet:
mm.column <- data.frame("Allowed_Mismatches" = unlist(lapply(mm, function(x) rep(x, nrow(primer.df)))))
openxlsx::writeData(wb, 1, mm.column , startCol = 1,
rowNames = FALSE)
#openxlsx::openXL(wb) # error already here: SET_VECTOR_ELT() can only be applied to a list, not a symbol
start.row <- 1
for (i in seq_along(mm)) {
# create coverage data for one mismatch setting
allowed.mm <- mm[i]
fw.mm <- lapply(strsplit(primer.df$Nbr_of_mismatches_fw, split = ","), as.numeric)
allowed.idx.fw <- lapply(fw.mm, function(x) which(x <= allowed.mm))
rev.mm <- lapply(strsplit(primer.df$Nbr_of_mismatches_fw, split = ","), as.numeric)
allowed.idx.rev <- lapply(fw.mm, function(x) which(x <= allowed.mm))
# select worst case inclusion
allowed.idx <- lapply(seq_len(nrow(primer.df)), function(x) {
if (primer.df$Direction[x] == "fw") {
allowed.idx.fw[[x]]
} else if (primer.df$Direction[x] == "rev") {
allowed.idx.rev[[x]]
} else { # both
intersect(allowed.idx.fw[[x]],allowed.idx.rev[[x]])
}
})
# update covered seqs for allowed nbr of mismatches:
cur.df <- primer.df
cvd.idx <- covered.seqs.to.idx(primer.df$Covered_Seqs, template.df)
cvd <- sapply(seq_along(allowed.idx), function(x) paste(template.df$Identifier[cvd.idx[[x]][allowed.idx[[x]]]], collapse = ","))
cur.t.df <- update_template_cvg(template.df, cur.df)
cur.df$Covered_Seqs <- cvd
# get coverage matrix for specific number of mismatches
if (length(constraints) == 0) {
cvg.matrix <- t(get.coverage.matrix(cur.df, cur.t.df))
} else {
# change entries to cvg constraint values: efficiency / deltaG of annealing
# re-write constraints: select only the current constraint events
split.vals <- strsplit(primer.df[, constraints], split = ",")
vals <- unlist(lapply(seq_along(split.vals), function(x) paste(split.vals[[x]][allowed.idx[[x]]], collapse = ",")))
cur.df[,constraints] <- vals
cvg.matrix <- t(get.coverage.matrix(cur.df, cur.t.df, constraints = constraints))
# overwrite previous 0/1 coverag entries with cvg constraint values
}
cnames <- c("Primer", colnames(cvg.matrix))
cvg.matrix <- cbind(Primer = rownames(cvg.matrix),
data.frame(cvg.matrix, stringsAsFactors = FALSE))
cvg.matrix$Primer <- as.character(cvg.matrix$Primer)
colnames(cvg.matrix) <- cnames
# add entries to notebook
wb <- add_cvg_to_workbook(cvg.matrix, wb, start.row = start.row, start.col = 2, is.first.entry = (i == 1))
if (i == 1) {
# first entry in workbook
start.row <- start.row + nrow(primer.df) + 1 # plus the header row
} else {
start.row <- start.row + nrow(primer.df)
}
}
# conditional formatting is applied when xls is created and based on the final values ...
# color according to cvg.matrix entries -> 1 covered, 0 not
coveredStyle <- openxlsx::createStyle(fontColour = "#000000", bgFill = "#00b050")
uncoveredStyle <- openxlsx::createStyle(fontColour = "#000000", bgFill = "#ff0000")
# row and column indices for style rules:
r <- seq_len(nrow(mm.column)) + 1
c <- seq_len(nrow(template.df)) + 2 # add 2 for primer and mismatch cols
if (length(constraints) == 0) {
match.rule <- "==1"
mismatch.rule <- "==0"
} else {
if (constraints == "primer_efficiency") {
match.rule <- paste0(">=", cvg.constraints[[constraints]]["min"])
mismatch.rule <- paste0("<", cvg.constraints[[constraints]]["min"])
} else { # annealing
match.rule <- paste0("<=", cvg.constraints[[constraints]]["max"])
mismatch.rule <- paste0(">", cvg.constraints[[constraints]]["max"])
}
}
openxlsx::conditionalFormatting(wb, 1, cols = c, rows = r, rule = match.rule, style = coveredStyle)
openxlsx::conditionalFormatting(wb, 1, cols = c, rows = r, rule = mismatch.rule, style = uncoveredStyle)
#openxlsx::openXL(wb) # open temporary version for testing!
openxlsx::saveWorkbook(wb, fname, overwrite = TRUE)
}
#' @rdname Output
#' @name Output
#' @aliases create_report
#' @return \code{create_report} Creates a PDF file summarizing the results
#' from analyzing one or multiple sets of primers.
#' @export
#' @include primers.R templates.R
#' @note
#' Creating the report requires the external programs Pandoc (http://pandoc.org)
#' and LaTeX (http://latex-project.org).
#' @examples
#'
#' setting.xml <- system.file("extdata", "settings",
#' "C_Taq_PCR_high_stringency.xml", package = "openPrimeR")
#' settings <- read_settings(setting.xml)
#' # Creation of a report for a single primer set
#' data(Ippolito)
#' out.file.single <- tempfile("evaluation_report", fileext = ".pdf")
#' create_report(primer.df, template.df, out.file.single, settings)
#' # Creation of a report for multiple primer sets
#' data(Comparison)
#' set.sizes <- sapply(primer.data, nrow)
#' sel.sets <- order(set.sizes)[1:2]
#' out.file.comp <- tempfile("comparison_report", fileext = ".pdf")
#' create_report(primer.data[sel.sets], template.data[sel.sets], out.file.comp, settings)
setGeneric("create_report",
function(primers, templates, fname, settings,
sample.name = NULL, used.settings = NULL, ...) {
standardGeneric("create_report")
})
#' Creation of a PDF report.
#'
#' Creates a PDF report for a set of primers.
#'
#' @param primers An evaluated \code{Primers} object.
#' @param templates A \code{Templates} object.
#' @param fname A character vector giving the file to store the report in.
#' @param settings A \code{DesignSettings} object.
#' @param sample.name An identifier for your analysis.
#' @param used.settings A named list (with fields "fw" and "rev") containing the forward/reverse used design settings.
#' @param required.cvg The required coverage ratio.
#' @return Creates a PDF file reporting on the input primers.
#' @keywords internal
#' @note
#' Creating the report requires the external programs Pandoc (http://pandoc.org)
#' and LaTeX (http://latex-project.org).
setMethod("create_report", c("Primers", "Templates"), function(primers, templates,
fname, settings, sample.name, used.settings, required.cvg = 1) {
mode.directionality <- get.analysis.mode(primers)
if (length(templates) == 0 || nrow(templates) == 0 ||
!is(templates, "Templates")) {
warning("No available templates for creating a report; returning.")
return()
}
if (length(primers) == 0 || nrow(primers) == 0 ||
!is(primers, "Primers")) {
warning("No available primers for creating a report; returning.")
return()
}
if (is.null(sample.name)) {
sample.name <- primers$Run[1]
}
if (!is(settings, "DesignSettings") ) {
stop("Settings are invalid.")
}
if (any(sapply(used.settings, function(x) !is(x, "DesignSettings")))) {
stop("used.settings are invalid.")
}
if (!"primer_coverage" %in% colnames(primers)) {
warning("Please compute primer coverage first.")
return()
}
if (!requireNamespace("pander")) {
warning("Cannot create the report without 'pander'.")
return()
}
# create param list for comparison report
params <- list("primers" = primers, "templates" = templates,
"direction" = mode.directionality, "sample" = sample.name,
"settings" = settings,
"used_settings" = used.settings,
"required_cvg" = required.cvg)
render_report(params, "report.Rmd", fname)
return()
})
#####
#' Creation of a PDF Report for Primer Comparison.
#'
#' Creates a PDF report for comparing multiple primers.
#'
#' @param primers A list with evaluated \code{Primers} objects.
#' @param templates A list with \code{Templates} objects.
#' @param fname A character vector giving the file to store the report in.
#' @param settings A \code{DesignSettings} object.
#' @param sample.name An identifier for your analysis.
#' @param used.settings A named list (with fields "fw" and "rev") containing the forward/reverse used design settings.
#' @return Creates a PDF file giving a report on the comparison of the input primers.
#' @keywords internal
#' @note
#' Creating the report requires the external programs Pandoc (http://pandoc.org)
#' and LateX (http://latex-project.org).
setMethod("create_report", c("list", "list"), function(primers, templates,
fname, settings,
sample.name = NULL,
used.settings = NULL) {
mode.directionality <- sapply(primers, function(x) get.analysis.mode(x))
# check type of primer and template data
if (length(primers) == 0 || length(templates) == 0) {
stop("No primer/template data was inputted.")
}
template.classes <- sapply(templates, function(x) class(x))
primer.classes <- sapply(primers, function(x) class(x))
if (any(template.classes != "Templates") || any(primer.classes != "Primers")) {
stop("Check types of primers/templates.")
}
if (is.null(sample.name)) {
# assume templates relate to the same sample
sample.name <- templates[[1]]$Run[1]
}
if (!is(settings, "DesignSettings") ) {
stop("Settings are invalid.")
}
# cvg available/evaluated set?
cvg.available <- sapply(primers, function(x)
"primer_coverage" %in% colnames(x))
if (!all(cvg.available)) {
warning("Please compute primer coverage first.")
return()
}
params <- list("primers" = primers, "templates" = templates,
"direction" = mode.directionality, "sample" = sample.name,
"settings" = settings)
render_report(params, "report_comparison.Rmd", fname)
return()
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.