#' eMIRNA Function for calculating Structural Pvalues and assess folding stability.
#'
#' \code{eMIRNA.Structural.Pvalues} Returns a data.frame and .csv file
#' with calculated Structural Pvalues.
#' @param file Path to FASTA file with predicted Candidate sequences to evaluate.
#'
#' @param prefix Desired name for Features .txt file output.
#'
#' @param iterate Number of iterations for shuffling sequences.
#'
#' @examples
#' eMIRNA.Predict("Candidates.FA", "Candidates", iterate=100)
#'
#' @export
eMIRNA.Structural.Pvalues <- function(file, prefix, iterate=100){
setwd("~/")
Dir0 <- "eMIRNA"
dir.create(file.path(Dir0), showWarnings=FALSE)
Dir <- "Prediction_Results"
setwd("~/eMIRNA/")
dir.create(file.path(Dir), showWarnings = FALSE)
workdir <- "~/eMIRNA/Feature_Results/"
setwd(workdir)
message("Calculating P-values")
command1 <- paste0("RNAfold --MEA -d2 -p --noPS -i ", file)
RNAfold1 <- system(command1, intern=TRUE)
command2 <- paste0("RNAfold --noPS -i ", file, " > RNAfold_pred.txt")
system(command2)
command2.1 <- "1_check_query_content.pl RNAfold_pred.txt RNAfold_pred_checked.txt"
system(command2.1)
command2.2 <- "2_get_stemloop.pl RNAfold_pred_checked.txt RNAfold_pred_stemloop.txt 5"
system(command2.2)
StemF <- "RNAfold_pred_stemloop.txt"
StemF <- unlist(lapply(StemF, readLines))
StemF.ID <- grep(">", StemF, value=TRUE)
StemF.ID <- gsub(">", "", StemF.ID)
StemF.ID <- strsplit(StemF.ID, "_")
StemF.ID <- lapply(StemF.ID, "[", -2)
StemF.ID.index <- as.numeric(unlist(lapply(StemF.ID, "[", -2)))
#ID
n <- length(RNAfold1)
ID <- RNAfold1[seq(1,n,7)]
ID <- gsub('>', '', ID)
ID <- ID[StemF.ID.index]
#Sequence
Sequence <- RNAfold1[seq(2,n,7)]
Sequence <- Sequence[StemF.ID.index]
#RNAfold Secondary Structure (SecondaryStrc)
SecondaryStrc1 <- RNAfold1[seq(3,n,7)]
SecondaryStrc1 <- SecondaryStrc1[StemF.ID.index]
SecondaryStrc <- gsub( " .*$", "", SecondaryStrc1)
#Minimum Freen Energy (MFE)
MFE <- as.numeric(regmatches(SecondaryStrc1,
gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
SecondaryStrc1, perl=TRUE)))
#Pairing Probabilities Structure (PairProbStrc)
PairProbStrc1 <- RNAfold1[seq(4,n,7)]
PairProbStrc1 <- PairProbStrc1[StemF.ID.index]
PairProbStrc <- gsub( " .*$", "", PairProbStrc1)
#Ensemble Free Energy (EFE)
EFE <- as.numeric(regmatches(PairProbStrc1,
gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
PairProbStrc1, perl=TRUE)))
#MFE P-value (MFE.Pval)
command3 <- paste0("fasta_ushuffle -n ", iterate, " -k 2 -s 1234 < ",
file, " >", prefix, "_iterated.fa")
seqs <- system(command3, intern=TRUE)
command4 <- paste0("RNAfold --MEA -d2 -p --noPS -i ", prefix, "_iterated.fa")
RNAfold2 <- system(command4, intern=TRUE)
n4 <- length(RNAfold2)
MFEit1 <- RNAfold2[seq(3,n4,7)]
MFEit <- as.numeric(regmatches(MFEit1,
gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
MFEit1, perl=TRUE)))
N <- iterate
MFEit.along <- seq_along(MFEit)
MFEit.split <- split(MFEit, ceiling(MFEit.along/N))
R <- as.vector(sapply(Map(`<=`, MFEit.split, MFE), sum))
MFE.Pval <- (R / (N + 1))
#EFE P-value (EFE.Pval)
EFEit1 <- RNAfold2[seq(4,n4,7)]
EFEit <- as.numeric(regmatches(EFEit1,
gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
EFEit1, perl=TRUE)))
EFEit.along <- seq_along(EFEit)
EFEit.split <- split(EFEit, ceiling(EFEit.along/N))
R2 <- as.vector(sapply(Map(`<=`, EFEit.split, EFE), sum))
EFE.Pval <- (R2 / (N + 1))
unlink("*.ps")
unlink("*.txt")
unlink("*.fa")
MFE.Pval <- round(MFE.Pval, 6)
EFE.Pval <- round(EFE.Pval, 6)
table <- as.data.frame(cbind(MFE.Pval, EFE.Pval))
rownames(table) <- ID
table <- table[order(table$MFE.Pval),]
final_table <- paste0("~/eMIRNA/Prediction_Results/", prefix, "_Structural_Pvalues.csv")
write.table(table, final_table, sep=",", quote=F, col.names=NA)
return(table)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.