R/eMIRNA.Structural.Pvalues.R

#' eMIRNA Function for calculating Structural Pvalues and assess folding stability.
#'
#' \code{eMIRNA.Structural.Pvalues} Returns a data.frame and .csv file
#'  with calculated Structural Pvalues.
#' @param  file Path to FASTA file with predicted Candidate sequences to evaluate.
#'
#' @param prefix Desired name for Features .txt file output.
#'
#' @param iterate Number of iterations for shuffling sequences.
#'
#' @examples
#' eMIRNA.Predict("Candidates.FA", "Candidates", iterate=100)
#'
#' @export


eMIRNA.Structural.Pvalues <- function(file, prefix, iterate=100){
  setwd("~/")
  Dir0 <- "eMIRNA"
  dir.create(file.path(Dir0), showWarnings=FALSE)
  Dir <- "Prediction_Results"
  setwd("~/eMIRNA/")
  dir.create(file.path(Dir), showWarnings = FALSE)
  workdir <- "~/eMIRNA/Feature_Results/"
  setwd(workdir)
  message("Calculating P-values")
  command1 <- paste0("RNAfold --MEA -d2 -p --noPS -i ", file)
  RNAfold1 <- system(command1, intern=TRUE)


  command2 <- paste0("RNAfold --noPS -i ", file, " > RNAfold_pred.txt")
  system(command2)
  command2.1 <- "1_check_query_content.pl RNAfold_pred.txt RNAfold_pred_checked.txt"
  system(command2.1)
  command2.2 <- "2_get_stemloop.pl RNAfold_pred_checked.txt RNAfold_pred_stemloop.txt 5"
  system(command2.2)


  StemF <- "RNAfold_pred_stemloop.txt"
  StemF <- unlist(lapply(StemF, readLines))
  StemF.ID <- grep(">", StemF, value=TRUE)
  StemF.ID <- gsub(">", "", StemF.ID)
  StemF.ID <- strsplit(StemF.ID, "_")
  StemF.ID <- lapply(StemF.ID, "[", -2)
  StemF.ID.index <- as.numeric(unlist(lapply(StemF.ID, "[", -2)))

  #ID
  n <- length(RNAfold1)
  ID <- RNAfold1[seq(1,n,7)]
  ID <- gsub('>', '', ID)
  ID <- ID[StemF.ID.index]

  #Sequence
  Sequence <- RNAfold1[seq(2,n,7)]
  Sequence <- Sequence[StemF.ID.index]


  #RNAfold Secondary Structure (SecondaryStrc)
  SecondaryStrc1 <- RNAfold1[seq(3,n,7)]
  SecondaryStrc1 <- SecondaryStrc1[StemF.ID.index]
  SecondaryStrc <- gsub( " .*$", "", SecondaryStrc1)


  #Minimum Freen Energy (MFE)
  MFE <- as.numeric(regmatches(SecondaryStrc1,
                               gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
                                        SecondaryStrc1, perl=TRUE)))

  #Pairing Probabilities Structure (PairProbStrc)
  PairProbStrc1 <- RNAfold1[seq(4,n,7)]
  PairProbStrc1 <- PairProbStrc1[StemF.ID.index]
  PairProbStrc <- gsub( " .*$", "", PairProbStrc1)

  #Ensemble Free Energy (EFE)
  EFE <- as.numeric(regmatches(PairProbStrc1,
                               gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
                                        PairProbStrc1, perl=TRUE)))


  #MFE P-value (MFE.Pval)
  command3 <- paste0("fasta_ushuffle -n ", iterate, " -k 2 -s 1234 < ",
                     file, " >", prefix, "_iterated.fa")
  seqs <- system(command3, intern=TRUE)

  command4 <- paste0("RNAfold --MEA -d2 -p --noPS -i ", prefix, "_iterated.fa")
  RNAfold2 <- system(command4, intern=TRUE)
  n4 <- length(RNAfold2)
  MFEit1 <- RNAfold2[seq(3,n4,7)]
  MFEit <- as.numeric(regmatches(MFEit1,
                                 gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
                                          MFEit1, perl=TRUE)))
  N <- iterate
  MFEit.along <- seq_along(MFEit)
  MFEit.split <- split(MFEit, ceiling(MFEit.along/N))
  R <- as.vector(sapply(Map(`<=`, MFEit.split, MFE), sum))
  MFE.Pval <- (R / (N + 1))


  #EFE P-value (EFE.Pval)
  EFEit1 <- RNAfold2[seq(4,n4,7)]
  EFEit <- as.numeric(regmatches(EFEit1,
                                 gregexpr("(?>-)*[[:digit:]]+\\.*[[:digit:]]*",
                                          EFEit1, perl=TRUE)))

  EFEit.along <- seq_along(EFEit)
  EFEit.split <- split(EFEit, ceiling(EFEit.along/N))
  R2 <- as.vector(sapply(Map(`<=`, EFEit.split, EFE), sum))
  EFE.Pval <- (R2 / (N + 1))

  unlink("*.ps")
  unlink("*.txt")
  unlink("*.fa")

  MFE.Pval <- round(MFE.Pval, 6)
  EFE.Pval <- round(EFE.Pval, 6)
  table <- as.data.frame(cbind(MFE.Pval, EFE.Pval))
  rownames(table) <- ID
  table <- table[order(table$MFE.Pval),]

  final_table <- paste0("~/eMIRNA/Prediction_Results/", prefix, "_Structural_Pvalues.csv")
  write.table(table, final_table, sep=",", quote=F, col.names=NA)

  return(table)

}
emarmolsanchez/eMIRNA_Rmodules documentation built on May 14, 2019, 5 a.m.