## Functions and wrappers to fold nucleotide sequences
#' Folds a vector of sequences using a system call to Vienna RNAfold.
#'
#' This function is a wrapper for RNAfold from the ViennaRNA package. It performs thermodynamic folding of the input and
#' estimates the minimum free energy (mfe) structure.
#' The ViennaRNA package needs to be installed on the system and present on the PATH
#'
#' @param seqs a character vector og DNAStringSet object with the sequences to fold
#' @export
#' @return a data frame with columns sec_structure (dot-bracket format) and mfe (kcal/mol)
#' @seealso \code{\link{RNAduplex}}
RNAfold = function(seqs){
if(file.exists('/etc/profile.d/zz_modules.sh')){
cmmnd <- '/pstore/data/ricc/RNAfoldR.sh'
}else{
cmmnd <-"RNAfold --noPS"
}
output <- system(cmmnd, input=c(seqs),intern = T)
output <- output[grepl('[0-9]', output)]
struc <- substr(output, 1, nchar(seqs))
energy <- substr(output, nchar(seqs)+1, nchar(output))
energy <- suppressWarnings(as.double(gsub(' |[)]|[()]', '', energy)))
data.frame(sec_structure= struc,
sec_energy = energy)
#note that the only way to keep track of the returned value is that the order
#is the same as the input sequences
}
#' Estimates the duplex structure of a sequence (oligo) pairing with a copy of itself.
#'
#' This function is a wrapper for RNAduplex from the ViennaRNA package. It
#' performs thermodynamic duplex folding of the input estimates the minimum free
#' energy (mfe) structure. The ViennaRNA package needs to be installed on the
#' system and present on the PATH
#'
#' @param seqs a character vector or DNAStringSet object with the sequences to
#' fold
#' @export
#' @importFrom Biostrings reverseComplement
#' @return a data frame with columns duplex_structure (dot-bracket format),
#' duplex_pos and duplex_energy (kcal/mol)
#' @seealso \code{\link{RNAfold}}
RNAduplex = function(seqs, targetseqs=NULL){
if(is.null(targetseqs)){
targetseqs <- seqs
}
if(file.exists('/etc/profile.d/zz_modules.sh')){
cmmnd <- '/pstore/data/ricc/RNAduplexR.sh'
}else{
cmmnd <-"RNAduplex"
}
input0 <- NA
input0[seq(1,by=2,length=length(seqs))] <- seqs
input0[seq(2,by=2,length=length(seqs))] <- targetseqs
tmp2 <- system(cmmnd,input = input0, intern = TRUE)
tmp2 <- strsplit(tmp2, "\\s+", perl = T)
# duplex_pos and duplex_energy
data.frame(
duplex_structure = sapply(tmp2, function(x) x[1]),
duplex_pos = gsub(',','-',sapply(
tmp2, function(x){
paste0(x[2:4], collapse = ' ')} )) ,
duplex_energy =
as.numeric(sapply(
tmp2, function(x){
gsub("\\(|\\)", "", tail(x, n=1) )
})
)
)
#note that the only way to keep track of the returned value is that the order
#is the same as the input sequences
}
#' Estimate nucleotide resolution transcript accessibilty for a transcript
#'
#' This function is the R version of RNAplfold from the ViennaRNA package (Tafer et al., 2008). It
#' evaluates accessibility (probability of being unpaired) for a transcript. The VienneRNA
#' package needs to be installed (version 2.0.7 from http://www.tbi.univie.ac.at/~ronny/RNA/vrna2_source.html).
#'
#' @param seq.char a character vector with the nucleotide sequence
#' @param L.in allow only pairs (i,j) with j-i <= L.in
#' @param W.in average pair probabilities over windows of size W.in.
#' @param u.in compute the mean probability that regions of length 1 to u.in are unpaired.
#' @keywords transcript accessibility
#' @export
RNAplfoldR <- function(seq.char, L.in=40, W.in=80, u.in=16) {
prefix <- paste0(sample(LETTERS[1:28],size = 10,replace = TRUE),
collapse = '')
seq.char <- as.character(seq.char)
if (nchar(seq.char)<3) {
stop("Your sequence need to be a character vector more than 3nt long.")
}
## check that the ViennaRNA command RNAplfold can be run
## if there is a version difference, warn
if(file.exists('/etc/profile.d/zz_modules.sh')){
system(paste("/pstore/data/ricc/RNAplfoldR.sh -L", L.in,
"-W", W.in,"-u", u.in,"-i ", prefix),
input = seq.char,
intern = TRUE)
}else{
cmmnd <- "RNAplfold -V"
cmmnd2 <- paste("RNAplfold -L",L.in,"-W",W.in,"-u",u.in)
a <- tryCatch(
{suppressMessages(system(cmmnd, intern=T,ignore.stderr = TRUE))},
error=function(cond) {return(NA)},
warning=function(cond) {return(NA)} #,
# finally={}
)
if (is.na(a)) {
stop("RNAplfold is missing, please install")
}
## set up the RNAplfold command and pipe the sequence data to it
system(paste0(cmmnd2,' --id-prefix ', prefix),
input=seq.char, intern=TRUE)
}
## read the plfold_lunp file outputted by RNAplfold and format it
acc.tx <- read.delim(paste0(prefix,'_0001_lunp'), as.is=T, skip=2, header=F, row.names=1)
acc.tx <- acc.tx[,colSums(is.na(acc.tx))!=nrow(acc.tx)]
colnames(acc.tx) <- 1:ncol(acc.tx)
file.remove(paste0(prefix,'_0001_lunp'))
file.remove(paste0(prefix,'_0001_dp.ps'))
return(acc.tx)
}
#' Find minimum free energy hybridisations of a long (target) and a short (query) RNA
#'
#' This function is the R version of RNAhybrid by Rehmsmeier et al., RNA, 10:1507-1517, 2004.
#' The hybridisation is performed in a kind of domain mode, ie. the short sequence is hybridised
#' to the best fitting parts of the long one. The command line tool RNAhybrid
#' needs to be installed (version 2.1 from http://bibiserv.techfak.uni-bielefeld.de/rnahybrid/dl_pre-page.html).
#' There is no I/O activity so this function can be easily parallelized.
#'
#' @param seq.target a character vector with the long nucleotide sequence
#' @param seq.query a character vector with the short nucleotide sequence
#' @param b maximal number of hits to show. Hits with increasing minimum free energy
#' (reminder: larger energies are worse) are shown untill there are no more hits.
#' Hits may only overlap at dangling bases (5' or 3' unpaired end of target).
#' @keywords rnahybrid mimumum free energy hybridization
#' @export
#' @examples
#' a <- RNAhybridR("AACCTTGG", "ACTG")
RNAhybridR <- function(seq.target, seq.query, b=1) {
if (nchar(seq.target)<250000) {
if(file.exists('/etc/profile.d/zz_modules.sh')){
cmmnd <- paste0('source /etc/profile.d/zz_modules.sh && ml RNAhybrid && ',
paste("RNAhybrid -b",b,"-c -s 3utr_worm",seq.target,seq.query))
}else{
cmmnd <- paste("RNAhybrid -b",b,"-c -s 3utr_worm",seq.target,seq.query)
}
tst <- system(cmmnd, intern = T)
tst <- strsplit(tst,":")[[1]]
mfe <- as.numeric(tst[5]) #kcal/mol
} else {
mfe <- NA
}
return(mfe)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.