R/eMIRNA.Filter.by.Structure.R

#' eMIRNA Function for filtering sequences according to Secondary Folding Structure
#'
#' \code{eMIRNA.Filter.by.Structure} Returns a filtered FASTA file with only those
#' sequences having a hairpin-like Secondary Folding Structure, i.e. two stems and
#' one terminal loop.
#'
#' @param file Path to FASTA file to filter.
#'  \code{file}.
#'
#' @param prefix Desired name for filtered FASTA output.
#'
#' @examples
#' eMIRNA.Filter.by.Structure("~/eMIRNA/FilterSize_Results/Candidates_filter_size.fa",
#'  "FASTA")
#'
#'@import Biobase
#' @export




eMIRNA.Filter.by.Structure <- function(file, prefix){
  setwd("~/")
  Dir0 <- "eMIRNA"
  dir.create(file.path(Dir0), showWarnings=FALSE)
  Dir <- "FilterStructure_Results"
  setwd("~/eMIRNA/")
  dir.create(file.path(Dir), showWarnings = FALSE)
  workdir <- "~/eMIRNA/FilterStructure_Results/"
  setwd(workdir)

  #Checking sequences and filtering by n loops
  message("Filtering sequences by Secondary Structure")
  File0 <- unlist(lapply(file, readLines))
  n0 <- length(File0)
  ID0 <- File0[seq(1,n0,2)]
  Sequence0 <- File0[seq(2,n0,2)]
  command1 <- paste0("RNAfold --MEA -d2 -p --noPS -i ", file)
  RNAfold1 <- system(command1, intern=TRUE)
  n <- length(RNAfold1)
  SecondaryStrc1 <- RNAfold1[seq(3,n,7)]
  SecondaryStrc <- gsub( " .*$", "", SecondaryStrc1)
  Nloop1 <- strsplit(SecondaryStrc, "\\((?=(\\.+\\)))", perl = TRUE)
  Nloop <- listLen(Nloop1) - 1
  Index0.nloop <- which(Nloop == 1)
  ID0.nloop <- ID0[Index0.nloop]
  Sequence0.nloop <- Sequence0[Index0.nloop]
  File0.filter.nloop <- c(rbind(ID0.nloop, Sequence0.nloop))
  name <- paste0(prefix, "_filter_nloop.fa")
  write(File0.filter.nloop, name)

  unlink("*.ps", recursive=T)

}
emarmolsanchez/eMIRNA_Rmodules documentation built on May 14, 2019, 5 a.m.