#' Checks wether in the current Redis server there is an entry for gene pair
#' 'gene.pair'
#'
#' @param Character vector of length two holding members A and B of the gene
#' pair.
#'
#' @return Boolean. TRUE of and only if the alphabetically sorted gene
#' accessions 'A_B' appear in the Redis store.
#' @export
genePairInRedis <- function(gene.pair) {
g.pair <- paste(sort(gene.pair), collapse = "_")
!is.null(redisGet(g.pair))
}
#' Puts the integer value 1 into the Redis Server using the alphabetically
#' sorted gene accessions in 'gene.pair' as key.
#'
#' @param Character vector of length two holding members A and B of the gene
#' pair.
#'
#' @return The output of calling rredis::redisSet(...)
#' @export
putGenePairIntoRedis <- function(gene.pair) {
g.pair <- paste(sort(gene.pair), collapse = "_")
redisSet(g.pair, 1)
}
#' Runs the pipeline to compute the Ks values of gene pair 'x' with coding
#' sequences held in 'cds'. Temporary files are stored in 't.d'. Disclaimer:
#' This function will only work on *nix Operating Systems with 'codeml' and
#' 'tail' installed.
#'
#' @param gene.a A string holding the gene identifier as present in argument
#' \code{cds}.
#' @param gene.b A string holding the gene identifier as present in argument
#' \code{cds}.
#' @param cds an instance of \code{base::list} as generated by
#' \code{seqinr::read.fasta} holding the unaligned coding sequences of the
#' genes in 'x'. Default is \code{GeneFamilies::all.cds}.
#' @param t.d the directory to store the files in, default is tempdir()
#' appended with \code{paste( gene,a, gene.b, collapse='_' )}
#' @param codeml.call the call passed to system(...) in order to start codeml.
#' Default is 'codeml', use option 'GeneFamilies.codeml.call' to set another
#' default.
#'
#' @return A list of three named numeric values: 'Ka', 'Ks', and 'w'. The first
#' two are parsed from the codeml output file and the latter is computed as
#' \code{Ka/Ks}.
#' @references Yang, Z. and Nielsen, R. (2000) Mol. Biol. Evol., 17, 32-43.
#' @export
computeKsPipeline <- function(gene.a, gene.b, cds = all.cds, t.d = file.path(tempdir(),
paste(gene.a, gene.b, sep = "_")), codeml.call = getOption("GeneFamilies.codeml.call",
"codeml")) {
if (!file.exists(t.d))
dir.create(t.d, recursive = TRUE)
x <- c(gene.a, gene.b)
p.n <- paste(x, collapse = "_")
tryCatch({
genes.msa <- alignCodingSequencesPipeline(cds[c(gene.a, gene.b)], work.dir = t.d,
gene.group.name = p.n)
# Found at least a single invalid CDS?
if (length(genes.msa) == 2) {
pair.cds.msa.path <- file.path(t.d, paste(p.n, "_CDS_MSA.fasta", sep = ""))
pair.codeml.in.out <- renderCodemlInputs(x, pair.cds.msa.path)
orig.dir <- getwd()
setwd(t.d)
system(paste(codeml.call, basename(pair.codeml.in.out[["in"]])))
setwd(orig.dir)
codeml.out.ln <- system(paste("tail -1", pair.codeml.in.out[["out"]]),
intern = TRUE)
genes.Ka <- as.numeric(regmatches(codeml.out.ln, regexec("\\sdN\\s*=\\s*(\\d+\\.\\d+)",
codeml.out.ln))[[1]][[2]])
genes.Ks <- as.numeric(regmatches(codeml.out.ln, regexec("\\sdS\\s*=\\s*(\\d+\\.\\d+)",
codeml.out.ln))[[1]][[2]])
list(Ka = genes.Ka, Ks = genes.Ks, w = (genes.Ka/genes.Ks))
} else NA
}, error = function(e) {
message("ERROR: Computing Ks value of gene pair '", p.n, "' caused an error:\n",
e)
NA
})
}
#' Generates the tree and control file to run PAML's codeml in order to compute
#' the Ks of 'gene.pair'
#'
#' @param gene.pair a character of length 2 holding the identifiers of the
#' pair's genes.
#' @param cds.msa.path the file path to the gene pair's codon sequences
#' alignment
#' @param ks.out.path the file path to the gene pair's codeml output file,
#' defaut is generated from 'cds.msa.path' substituting '_CDS_MSA.fasta' with
#' '_codeml_out.txt'
#' @param cds.codeml.path the file path to the gene pair's codeml input file,
#' defaut is generated from 'cds.msa.path' substituting '_CDS_MSA.fasta' with
#' '_codeml_in.cnt'
#' @param tree.path the file path to the gene pair's tree file, defaut is
#' generated from 'cds.msa.path' substituting '_CDS_MSA.fasta' with
#' '_tree.newick'. The tree will be the trivial one: '(A,B);'.
#'
#' @return A list with two string entries: 'in' the path to the control file
#' used as input to PAML's codeml, and 'out' path to the output file generated
#' by PAML's codeml.
#' @export
renderCodemlInputs <- function(gene.pair, cds.msa.path, ks.out.path = sub("_CDS_MSA.fasta",
"_codeml_out.txt", cds.msa.path, fixed = TRUE), cds.codeml.path = sub("_CDS_MSA.fasta",
"_codeml_in.cnt", cds.msa.path, fixed = TRUE), tree.path = sub("_CDS_MSA.fasta",
"_tree.newick", cds.msa.path, fixed = TRUE)) {
writeLines(paste("(", gene.pair[[1]], ",", gene.pair[[2]], ");", sep = ""), con = tree.path)
brew(text = codeml.tmpl, output = cds.codeml.path)
list(`in` = cds.codeml.path, out = ks.out.path)
}
#' Template for brew to generate the control file used as input for PAML's
#' codeml.
#' @export
codeml.tmpl <- "seqfile = <%= cds.msa.path %>\n treefile = <%= tree.path %>\n outfile = <%= ks.out.path %>\n noisy = 0\n verbose = 0\n runmode = -2\n cleandata = 1\n seqtype = 1\n CodonFreq = 2\n model = 2\n NSsites = 0\n icode = 0\n Mgene = 0\n fix_kappa = 0\n kappa = 2\n fix_omega = 0\n omega = 1\n fix_alpha = 1\n alpha = .0\n Malpha = 0\n ncatG = 4\n clock = 0\n getSE = 0\nRateAncestor = 0\n method = 0"
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.