R/RRNA.r

Defines functions rotateV translate genCords circleCoord rotateS loopLength stemCords ct2coord validCT ct2knet backward forward makeCt loadCt pseudoKnot aptPlotCT transformFold bplfile RNAPlot alignCoord loadCoords genDB addBreaks coord2comp pairScores fuzzy2comp ct2ss fuzzy2ct

Documented in addBreaks alignCoord aptPlotCT backward bplfile circleCoord coord2comp ct2coord ct2knet ct2ss forward fuzzy2comp fuzzy2ct genCords genDB loadCoords loadCt loopLength makeCt pairScores pseudoKnot RNAPlot rotateS rotateV stemCords transformFold translate validCT

############################################################################
#    RRNA is a R package for plotting and manipulating RNA secondary       #
#    structures.                                                           #
############################################################################
#    Copyright (C) 2015 JP Bida                                            #
#                                                                          #
#    This program is free software: you can redistribute it and/or modify  #
#    it under the terms of the GNU General Public License as published by  #
#    the Free Software Foundation, either version 3 of the License, or     #
#    (at your option) any later version.                                   #
#                                                                          #
#    This program is distributed in the hope that it will be useful,       #
#    but WITHOUT ANY WARRANTY; without even the implied warranty of        #
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         #
#    GNU General Public License for more details.                          #
#                                                                          #
#    You should have received a copy of the GNU General Public License     #
#    along with this program.  If not, see <http://www.gnu.org/licenses/>  #
############################################################################


############################################################################
#
# FUNCTIONS
#
# loadFolds - Loads a coordinate file for an RNA structure into R with column
#            names used by other functions in the RNAPlot package
#
# alignCoord - Given a data frame containing multiple coordinate files alignCoord
#              takes the data frame, the id of the coordinate file being aligned to
#              the coordinate file that is going to be aligned, and a new origin. It
#              then translates
#
# RNAPlot   - Given fold data RRNAPlot is a generic ploting function to create
#              a plot of the secondary structure
#
# pseudoKnot - Given CT file data this function returns the positions of pseudoKnots
#
# loadCT     - Loads data from a CT file
#
# ct2Coord   - Creates secondary structure coordiant file from a CT file loaded
#              using loadCT independent of the RRNAFold program. This can be used
#              by RNAPlot function to plot secondary structures.
# ct2knet    - Creates file that can be used by knet
#
# bplfile    - Creates a bplfile from fold data
#
# makeCT     - Creates a CT file from a secondary structure in bracket form and a sequence
#
# All RNA can be represented by STEMs and Multi-Strand Loops
# Loop - NumberOfStrands,StrandsLengths

# Bulge as Loop - 2,1,3

# coord2comp - Splits RNA structure into self contained components used for RNA assembly
fuzzy2ct <- function(dat) {
  ## Read fuzzy_dat into c2 file
  ## Remove PseudoKnots
  ## Given pairs of numbers
  ## (1,4)(2,5)(3,6) etc.
  ## Identify the bracket representation
  ## [ ( { ] ( } ) )  )
  ## Such that a closing brackets for  open bracket m
  ## does not occur for before closing brackets for n
  ## when m < n

  ## Remove Minimal amount of pairs such that no pair
  ## doesn't contain every pair within its range

  ## PairID,ContainsOutOfRange,ContainsInRange,IsOutOfRange

  ## Basic - Loop through each pair, score,
  ## remove base pair occuring most outsides
  ## recalculate & repeat

  ## If same pos is going to multiple
  ## still works?


  ### Get Maximum Range
  names(dat) <- c("pdb", "model", "pos1", "pos2", "face1", "face2", "h-bonds")

  mx <- max(dat$pos1, dat$pos2)
  mn <- min(dat$pos1, dat$pos2)

  ss <- rep(".", mx)
  ### Find minimal pairs
  pairs <- pairScores(dat)
  ss[pairs$pos1] <- "("
  ss[pairs$pos2] <- ")"


  ### Output bracket notation

  ss <- paste(ss, collapse = "")
  ss
}

ct2ss <- function(dat, cleanup = TRUE) {
  ## Generate SS from CT file
  ## If CT file contains knots or multiple pairings
  ## cleanup=TRUE tries to generate the SS with most
  ## base pairs preserved

  ss <- ""
  if (cleanup) {
    mx <- max(dat$pos, dat$pos2)
    mn <- min(dat$pos, dat$pos2)
    dat$pos1 <- dat$bound
    dat <- dat[dat$pos1 > 0, ]
    dat <- dat[dat$pos2 > 0, ]
    ss <- rep(".", mx)
    ### Find minimal pairs
    pairs <- pairScores(dat)
    ss[pairs$pos1] <- "("
    ss[pairs$pos2] <- ")"


    ### Output bracket notation

    ss <- paste(ss, collapse = "")
  } else {
    ss <- rep(".", nrow(dat))
    for (i in 1:nrow(dat)) {
      if (dat$bound[i] != 0 & dat$bound[i] > dat$pos[i]) {
        ss[i] <- "("
      } else {
        if (dat$bound[i] != 0) {
          ss[i] <- ")"
        }
      }
    }
    ss <- paste(ss, collapse = "")
  }

  ss
}

fuzzy2comp <- function(dat) {
  sct <- fuzzy2ct(dat)
  ct <- makeCt(sct, paste(rep("A", nchar(sct)), collapse = ""))
  crd <- ct2coord(ct)
  comp <- coord2comp(crd)
  comp
}

pairScores <- function(PairDF) {

  ## Finds the minimum number of pairings to remove
  ## uses a greedy approach removing the maximum conflicts
  ## in each step and recalculating

  ## PairID,ContainsOutOfRange,ContainsInRange,IsOutOfRange

  ## Represent all pairs in (pos_lower,pos_upper)
  ## Remove duplciates,
  ## Remove any self-pairing
  PairDF <- PairDF[, names(PairDF) %in% c("pos1", "pos2")]
  PairDF2 <- data.frame(pos1 = PairDF$pos2, pos2 = PairDF$pos1)

  Pairs <- rbind(PairDF, PairDF2)
  Pairs <- Pairs[Pairs$pos1 < Pairs$pos2, ]
  Pairs <- unique(Pairs)

  total <- 1
  while (total > 0) {
    Pairs$ContainsOutOfRange <- 0
    Pairs$ContainsInRange <- 0
    Pairs$IsOutOfRange <- 0
    for (p in 1:nrow(Pairs)) {
      ## Only look at lower
      kContainsOutOfRange <- Pairs$pos1 > Pairs$pos1[p] & Pairs$pos1 < Pairs$pos2[p] & Pairs$pos2 > Pairs$pos2[p]
      kContainsInRange <- Pairs$pos1 > Pairs$pos1[p] & Pairs$pos1 < Pairs$pos2[p] & Pairs$pos2 < Pairs$pos2[p]
      Pairs$ContainsOutOfRange[p] <- length(Pairs$pos1[kContainsOutOfRange])
      Pairs$ContainsInRange[p] <- length(Pairs$pos1[kContainsInRange])
      Pairs$IsOutOfRange[kContainsOutOfRange] <- Pairs$IsOutOfRange[kContainsOutOfRange] + 1
    }
    total <- sum(Pairs$IsOutOfRange)
    if (total > 0) {
      ## Remove the maximum scoring base
      p <- Pairs$pos1[Pairs$ContainsOutOfRange == max(Pairs$ContainsOutOfRange)]
      Pairs <- Pairs[Pairs$pos1 != p[1], ]
    }
  }

  Pairs
}


coord2comp <- function(coord) {
  ## id1,id2,pos1a,pos1b,pos2a,pos2b
  ## Outputs list of components with junctions
  ## Overlapping base-pairs are labeled with letters
  ## For Each Group
  end <- max(coord$pos2)
  output <- list()
  w <- 1
  groups <- unique(coord$group)
  SubStructure <- rep(".", nrow(coord))
  for (g in groups) {
    GroupStructure <- SubStructure
    min_pos <- min(coord$pos2[coord$group == g])
    max_pos <- max(coord$pos2[coord$group == g])
    ## Add overlapping basepair and label it
    if (min_pos == 1 | max_pos == end) {
      comp <- coord[coord$group == g, ]
      GroupStructure[coord$group == g] <- "+"
      GroupStructure <- paste(GroupStructure, collapse = "")
    } else {
      temp <- coord
      temp$group[temp$pos2 == min_pos - 1] <- g
      temp$group[temp$pos2 == max_pos + 1] <- g
      comp <- temp[temp$group == g, ]
      GroupStructure[temp$group == g] <- "+"
      GroupStructure <- paste(GroupStructure, collapse = "")
    }
    d <- diff(comp$pos2)
    sequence <- addBreaks(d, comp$seq)
    sequence <- paste(sequence, collapse = "")
    structure <- rep(".", length(comp$seq))
    structure[comp$bound < comp$pos2 & comp$bound != 0] <- ")"
    structure[comp$bound > comp$pos2 & comp$bound != 0] <- "("
    structure <- addBreaks(d, structure)
    structure <- paste(structure, collapse = "")
    edges <- rep(".", length(comp$seq))
    coord_other <- coord
    for (go in groups) {
      if (go != g) {
        min_pos <- min(coord$pos2[coord$group == go])
        max_pos <- max(coord$pos2[coord$group == go])
        ## Add overlapping basepair and label it
        if (min_pos == 1 | max_pos == end) {
        } else {
          coord_other$group[coord_other$pos2 == min_pos - 1] <- go
          coord_other$group[coord_other$pos2 == max_pos + 1] <- go
        }
      }
    }
    edges <- coord_other$group[coord_other$pos2 %in% comp$pos2]
    edges <- addBreaks(d, edges)
    edges <- paste(edges, collapse = ",")
    out <- data.frame(group = g, sequence = sequence, structure = structure, edges = edges, substructure = GroupStructure)
    output[[w]] <- out
    w <- w + 1
  }

  output <- do.call(rbind, output)
  output
}

addBreaks <- function(d, structure) {
  breaks <- c()
  for (i in 1:length(d)) {
    if (d[i] != 1) {
      breaks <- c(breaks, structure[i], "-")
    } else {
      breaks <- c(breaks, structure[i])
    }
  }
  breaks <- c(breaks, structure[length(structure)])
  breaks
}

genDB <- function(map) {
  ## Creates Component Database
  ## breakpoint, closed_ends, rep, type
  ## .....b..... , true , 5b5,
  ## ...... , true ,  6 ,

  for (i in 1:nrow(map)) {
    ss <- strsplit(map$V2[i], "[BREAK]", fixed = TRUE)[[1]]
    lln <- nchar(ss)
  }
}

loadCoords <- function(filename) {
  ############################################################################
  ### Description:
  ############################################################################
  ###
  ### Reads a file in table format containing coordinates generated from the
  ### RRNAfold program for a single RNA secondary structure or multiple RNA
  ### secondary structures into a data frame with a standard naming convention.
  ###
  ############################################################################
  ### Usage:
  ############################################################################
  ###
  ### data=loadFolds(file)
  ###
  ############################################################################
  ### Arguments:
  ############################################################################
  ###
  ### file: the name of a file which the data are to be read from. Each row
  ###       of the table apears as one line in the file, with columns separated
  ###       by commas. This file is created by a program using the Vienna RNA
  ###       package.
  ###
  ############################################################################
  ### Details:
  ############################################################################
  ###
  ###  Example input file format:
  ###
  ###  0,158.534088,199.550888,G,0,-1
  ###  0,152.741776,194.100571,A,1,-1
  ###  0,149.307266,186.849899,A,2,-1
  ###  0,148.749847,178.776566,G,3,-1
  ###  0,151.196960,170.989944,C,4,59
  ###  0,141.412643,159.620361,U,5,58
  ###  0,131.628342,148.250793,U,6,57
  ###  0,121.844025,136.881210,A,7,56
  ###  0,112.059715,125.511642,C,8,55
  ###  0,102.275398,114.142059,A,9,54
  ###  0,89.142853,109.343330,A,10,-1
  ###  ....
  ###
  ###  There is no header on the input file. The columns are
  ###  ID,X,Y,SEQ,POS,BOUND
  ###
  ###  ID - A unique ID for a given fold in the file
  ###  X - X position of the NT in the secondary structure plot
  ###  Y - Y position of the NT in the secondary structure plot
  ###  SEQ - The nucleotide (A,G,U,C)
  ###  POS - The position of the NT in the sequence
  ###  BOUND - The position of the NT that the NT at POS is bound to
  ###
  ###  The RRNAfold program can be used to generate
  ###  <include github.com link here>
  ###  ###
  ###  Example:
  ###
  ###  Generate a data file using the RRNAfold program. Create the
  ###  input file input.dat containing a single sequence per line
  ###  in file.
  ###
  ###  input.dat
  ###
  ###  AGGGGCCCCUUUUAAAA
  ###  AGCCGCCCCUUUUAAAA
  ###  AGAGGCCCCUUUUAAAA
  ###  AGUUGCCCCUUUUAAAA
  ###
  ###  Run the RRNAfold program on the command line to generate the
  ###  output file out.dat
  ###
  ###  RRNAfold --input-file input.dat --output-file out.dat
  ###
  ###  Use the loadFolds program to load the table into an R data
  ###  frame.
  ###
  ###
  ###  >library(RRNA)
  ###  >dat=loadFolds("out.dat")
  data <- read.table(file = filename, sep = ",", header = FALSE)
  if (dim(data)[2] == 6) {
    names(data) <- c("id", "x", "y", "seq", "num", "bound")
  } else {
    if (dim(data)[2] == 7) {
      names(data) <- c("id", "x", "y", "seq", "num", "bound", "energy")
    } else {
      data <- NULL
      print(paste("The file ", filename, " does not have the correct number of columns", sep = ""))
      print(paste("Expected 6 or 7, found ", dim(data)[2], sep = ""))
    }
  }
  data
}

alignCoord <- function(data, n1, n2, x, y) {
  ############################################################################
  ### Description:
  ############################################################################
  ###
  ###  Given fold data from loadFolds align all secondary structure plots
  ###  at a particular nucleotide position. This translates the x1,y1 position of
  ###  the n1 nucleotide to the provided x,y coordinates and the roates the
  ###  secondary structure until y1=y2 for n1 and n2.
  ###
  ###  This allows you to plot multiple secondary structures with all of them
  ###  aligned to a given orientation.
  ###
  ###
  ############################################################################
  ### Usage:
  ############################################################################
  ### alignCoord(data,n1_pos,n2_pos,x0,y0)
  ### data=loadFolds(file)
  ### aligned=alignCoord(data,2,3,0,0)
  ###
  ############################################################################
  ### Arguments:
  ############################################################################
  ###
  ### data: fold data outputted from loadFolds
  ### n1_id: The position of the nucleotide that is translated to (x0,y0)
  ### n2_id: The position of the nucleotide in the sequence that is rotated
  ###        until its y coordinate is y0
  ### x0:    The x coordinate that n1 is translated to
  ### y0:    The y coordinate that n2 is translated to
  ###
  ############################################################################
  ### Details:
  ############################################################################
  ###
  ##  The RRNAfold program can be used to generate
  ##  <include github.com link here>
  ##  ###
  ###  Example:
  ###
  ###  Generate a data file using the RRNAfold program. Create the
  ###  input file input.dat containing a single sequence per line
  ###  in file.
  ###
  ###  input.dat
  ###
  ###  AGGGGCCCCUUUUAAAA
  ###  AGCCGCCCCUUUUAAAA
  ###  AGAGGCCCCUUUUAAAA
  ###  AGUUGCCCCUUUUAAAA
  ###
  ###  Run the RRNAfold program on the command line to generate the
  ###  output file out.dat
  ###
  ###  RRNAfold --input-file input.dat --output-file out.dat
  ###
  ###  Use the loadFolds program to load the table into an R data
  ###  frame.
  ###
  ###
  ###  >library(RRNA)
  ###  >dat=loadFolds("out.dat")
  ###  >aligned=alignCoord(dat,1,2,0,0)

  ids <- data$id
  ids <- table(ids)
  ids <- as.numeric(names(ids))

  ## Loop through all ids in the dataset ##
  for (i in ids) {
    tmp <- NULL
    # print(i)
    tmp <- data[data$id == i, ]
    x1 <- tmp$x[tmp$num == n1]
    x2 <- tmp$x[tmp$num == n2]

    y1 <- tmp$y[tmp$num == n1]
    y2 <- tmp$y[tmp$num == n1]

    ## Translate x1,y1 to x,y ##
    tmp$x <- tmp$x - (x1 - x)
    tmp$y <- tmp$y - (y1 - y)
    dy <- tmp$y[tmp$num == n2] - tmp$y[tmp$num == n1]
    dx <- tmp$x[tmp$num == n2] - tmp$x[tmp$num == n1]
    ang <- atan(abs(dy) / abs(dx))
    if (dx < 0 & dy < 0) {
      # print("2")
      ang <- ang + pi
    } else {
      if (dx < 0 & dy > 0) {
        # print("1")
        ang <- pi - ang
      } else {
        if (dx > 0 & dy < 0) {
          # print("3")
          ang <- 2 * pi - ang
        }
      }
    }

    ## Rotate x2,y2 around x1,y1 until y1==y2
    for (j in 1:length(tmp$x)) {
      pt <- rotateS(tmp$x[j], tmp$y[j], x, y, -ang)
      tmp$x[j] <- pt[1]
      tmp$y[j] <- pt[2]
    }
    data[data$id == i, ] <- tmp
  }
  data
}


RNAPlot <- function(data, ranges = 0, add = FALSE, hl = NULL, seqcols = NULL, seqTF = FALSE, labTF = FALSE, nt = FALSE, dp = 0.5, modspec = FALSE, modp = NULL, mod = NULL, modcol = NULL, tsize = 0.5, main = "", pointSize = 2, lineWd = 2) {
  ## dp-The distanc the text is place from the sides of the aptamer ###
  ## modspec - Modify Specific bases in the Aptamer ###
  ## modp- NT position
  ## mod -Modification to be done ###
  # 	1=put a box around the Label
  # 	2=put a circle around the Label
  # 	3=NT Change T->G
  # 	4=Add a solid colored circle at junction
  ## modcol - Color of the modification
  ############################################################################
  ### Description:
  ############################################################################
  ###
  ### Given fold data from load Plots this creates a new secondary structure
  ### plot
  ###
  ############################################################################
  ### Usage:
  ############################################################################
  ### alignCoord(data,n1_pos,n2_pos,x0,y0)
  ### data=loadFolds(file)
  ### RNAPlot(data[data$id==0,])
  ###
  ############################################################################
  ### Arguments:
  ############################################################################
  ###  data: fold data from loadFolds for a single RNA secondary structure
  ###  ranges: A data frame containing the ranges of sequence positions that
  ###          should be highlighted with given colors
  ###          Example:
  ###          data.frame(min=c(69,1,7),max=c(74,5,17),col=c(2,3,4),desc=c("Region 1","Region 2","Region 3"))
  ###
  ###          This highlights the sequence between 69-74, 1-5, and 7-17
  ###  add: Should the new plot be added to an existing plot TRUE/FALSE
  ###  hl: Takes an array of sequences and highlights them with seqcol
  ###  seqcols: Colors that should be used to highlight the sequences given in hl
  ###  seqTF: Unused argument included for backward compatibility 
  ###  labTF: Should a legend be plot TRUE/FALSE
  ###  nt: Plot the nucleotide sequence TRUE/FALSE
  ###  dp: Distance the nucleotide labels plotted using the nt option are from the
  ###      x,y coordinate of the nt position
  ###  modspec: TRUE/FALSE should specific positions be modified?
  ###  modp: Array of positions to be modified
  ###  modcol: Color of positions being modified by modp array
  ###  mod: the pch used for nucleotides being modifed by modp
  ###  tsize:Size of the nucleotide labels
  ###  main: Title used for legend
  ###  pointSize: Size of the pch used for ploting
  ###  lineWd: the line width used in the plot

  ############################################################################
  ### Details:
  ############################################################################
  ###
  ##  The RRNAfold program can be used to generate
  ##  <include github.com link here>
  ##  ###
  ###  Example:
  ###
  ###  Generate a data file using the RRNAfold program. Create the
  ###  input file input.dat containing a single sequence per line
  ###  in file.
  ###
  ###  input.dat
  ###
  ###  AGGGGCCCCUUUUAAAA
  ###  AGCCGCCCCUUUUAAAA
  ###  AGAGGCCCCUUUUAAAA
  ###  AGUUGCCCCUUUUAAAA
  ###
  ###  Run the RRNAfold program on the command line to generate the
  ###  output file out.dat
  ###
  ###  RRNAfold --input-file input.dat --output-file out.dat
  ###
  ###  Use the loadFolds program to load the table into an R data
  ###  frame.
  ###
  ###
  ###  >library(RRNA)
  ###  >dat=loadFolds("out.dat")
  ###  >aligned=alignCoord(dat,1,2,0,0)
  ###  >RNAPlot(aligned[aligned$id==0,])
  l <- 1
  if (seqTF) {
    #TODO: Update version and remove seqTF as argument in function
    # breaking backward compatibility
    #s <- data$seq
    #sequence <- paste(s, collapse = "")
  }
  if (is.null(dim(ranges)) & length(seqcols) < 1) {
    l <- 0
  }
  if (is.null(dim(ranges))) {
    ranges <- NULL
    print("default ranges used")
    ranges$min[1] <- 0
    ranges$max[1] <- dim(data)[1]
    ranges$desc[1] <- c("RNA Molecule")
    ranges$col[1] <- -1
    ranges <- as.data.frame(ranges)
  }
  ### Creating Highlighted data ###
  s2 <- dim(data)[1]
  if (length(hl) < 1) {
    print("no sequences to match")
  } else {
    hlranges <- NULL
    for (i in 1:length(hl)) {
      t <- strsplit(hl[i], "")
      s1 <- length(t[[1]])
      ## Create all sequences of length the correct size ##
      groups <- NULL
      if (s2 <= s1) {
        groups[[1]] <- as.character(data$seq)
      } else {
        for (j in 1:(s2 - s1)) {
          groups[[j]] <- as.character(data$seq[j:(j + (s1 - 1))])
        }
      }
      groups <- lapply(groups, paste, collapse = "")
      groups <- unlist(groups)
      ind <- c(1:length(groups))[groups == hl[i]]
      ind <- data$num[ind]
      print(ind)
      hlranges$min <- c(hlranges$min, ind)
      hlranges$max <- c(hlranges$max, (ind + (s1 - 1)))
      hlranges$desc <- c(hlranges$desc, rep(hl[i], length(ind)))
      hlranges$col <- c(hlranges$col, rep(seqcols[i], length(ind)))
      ## Search for first character ##
    }
    print(hlranges)
    hlranges <- as.data.frame(hlranges)
    ranges <- rbind(ranges, hlranges)
  }
  s1 <- max(c(data$x, data$y))
  s2 <- min(c(data$x, data$y))
  if (add == FALSE) {
    plot(data$x, data$y, type = "n", xlab = "", ylab = "", axes = FALSE, xlim = c(s2, s1), ylim = c(s2, s1))
    for (i in data$num) {
      if (data$bound[data$num == i] != -1) {
        lines(c(data$x[data$num == i], data$x[data$num == data$bound[data$num == i]]), c(data$y[data$num == i], data$y[data$num == (data$bound[data$num == i])]), lwd = lineWd)
      }
    }
  } else {
    if (add == TRUE) {
      points(data$x, data$y, type = "n")
      for (i in data$num) {
        if (data$bound[data$num == i] != -1) {
          lines(c(data$x[data$num == i], data$x[data$num == data$bound[data$num == i]]), c(data$y[data$num == i], data$y[data$num == (data$bound[data$num == i])]), lwd = lineWd)
        }
      }
    }
  }

  ## Adding Lines between the each point ###
  x <- data$x
  y <- data$y
  if (nt) {} else {
    lines(x, y, lw = lineWd)
    points(x, y, pch = 16, cex = pointSize)
  }
  if (l == 1) {
    if (labTF) {
      legend(s2[1], s1[1],
        legend = ranges$desc[ranges$col != 0],
        col = abs(ranges$col[ranges$col != 0]), bty = "n",
        lty = 1
      )
    }
  }
  ### Adding NT labels to each position ####
  if (nt == TRUE) {
    x1 <- data$x[1]
    x2 <- data$x[2]
    y1 <- data$y[1]
    y2 <- data$y[2]
    vx <- (x1 - x2)
    vy <- (y1 - y2)
    dist <- sqrt(vx^2 + vy^2)
    vx <- vx / dist
    vy <- vy / dist
    ## Place 5' inline with first bound ##
    text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), "5'", font = 2, cex = tsize)
    ## Place NT label perpedicular ##
    x3 <- (-1 * (y1 - y2))
    y3 <- (x1 - x2)
    vx <- x3 / dist
    vy <- y3 / dist
    text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), data$seq[1], cex = tsize, font = 2)
    ### Placing 3' End labels ###
    t1 <- length(data$x)
    t2 <- (length(data$x) - 1)
    x1 <- data$x[t1]
    x2 <- data$x[t2]
    y1 <- data$y[t1]
    y2 <- data$y[t2]
    vx <- (x1 - x2)
    vy <- (y1 - y2)
    dist <- sqrt(vx^2 + vy^2)
    vx <- vx / dist
    vy <- vy / dist
    ## Place 5' inline with first bound ##
    text((data$x[t1] + (vx * dp)), (data$y[t1] + (vy * dp)), "3'", cex = tsize, font = 2)
    ## Place NT label perpedicular ##
    x3 <- (-1 * (y1 - y2))
    y3 <- (x1 - x2)
    vx <- x3 / dist
    vy <- y3 / dist
    text((data$x[t1] - (vx * dp)), (data$y[t1] - (vy * dp)), data$seq[t1], font = 2, cex = tsize)
    ### Adding Specific Modifications ####


    for (i in 2:(length(data$x) - 1)) {
      x1 <- data$x[(i - 1)]
      x2 <- data$x[(i + 1)]
      y1 <- data$y[(i - 1)]
      y2 <- data$y[(i + 1)]
      ## Rotate 90 degrees and create unit vector ##
      x3 <- (-1 * (y1 - y2))
      y3 <- (x1 - x2)
      dist <- sqrt(x3^2 + y3^2)
      x3 <- x3 / dist
      y3 <- y3 / dist
      ## Add text to data$seq[i] ###
      text(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp), data$seq[i], font = 2, cex = tsize)
    }
  }
  if (modspec == TRUE) {
    for (i in 1:length(modp)) {
      points(data$x[modp[i]], data$y[modp[i]], pch = mod[i], col = modcol[i])
    }
  }




  ### Highlighting Sequences #######
  for (i in 1:dim(ranges)[1]) {
    # print(i)
    keep <- data$num >= ranges$min[i] & data$num <= ranges$max[i]
    if (ranges$col[i] != 0) {
      if (ranges$col[i] == -1) {
        # points(data$x[keep],data$y[keep],col=1,pch=".")
      } else {
        if (ranges$col[i] == 3) {
          points(data$x[keep], data$y[keep], col = ranges$col[i], pch = "x")
        } else {
          points(data$x[keep], data$y[keep], col = ranges$col[i], pch = 19)
        }
      }
    }
  }
  title(main)
}



bplfile <- function(dat, name) {
  ##### Creates a bpl file of the RNA secondary structure #####
  ### Get sequnce and print on first line ###
  seq <- paste(dat$seq, collapse = "")
  fileo <- paste(">", name, sep = "")
  fileo <- paste(fileo, "\n", seq, "\n", sep = "")
  fileo <- paste(fileo, "\n$ list", sep = "")
  for (i in 1:length(dat$num)) {
    if (dat$bound[i] == -1) {} else {
      fileo <- paste(fileo, "\n", (dat$num[i] + 1), " ", (dat$bound[i] + 1))
    }
  }
  write(fileo, file = name)
}





transformFold <- function(dat, x0, y0, ang) {
  ### Given fold data from loadFolds this translates and rotates the folds ###

  x <- dat$x
  y <- dat$y
  new <- rotateV(x, y, x0, y0, ang)
  dat$x <- new$x
  dat$y <- new$y
  dat
}


aptPlotCT <- function(file, ranges = 0, add = FALSE, hl = NULL, seqcols = NULL, seqTF = FALSE, labTF = FALSE, nt = FALSE, dp = 0.5, modspec = FALSE, modp = NULL, mod = NULL, modcol = NULL, tsize = 0.5, main = "", pseudoTF = FALSE, pseudo_nums = NULL, ticks = NULL, ticksTF = FALSE) {
  ### Load CT file ###
  dat <- loadCt(file)
  ### Filter PseudoKnots ##
  pseudo <- pseudoKnot(dat)
  data <- ct2coord(pseudo[[2]])
  names(data)[1] <- "num"
  data <- as.data.frame(data)
  ## dp-The distanc the text is place from the sides of the aptamer ###
  ## modspec - Modify Specific bases in the Aptamer ###
  ## modp- NT position
  ## mod -Modification to be done ###
  # 	1=put a box around the Label
  # 	2=put a circle around the Label
  # 	3=NT Change T->G
  # 	4=Add a solid colored circle at junction
  ## modcol - Color of the modification
  l <- 1
  if (seqTF) {
    #TODO: Update major version and remove seqTF breaking
    #backward compatibility
    #s <- dat$seq
    #sequence <- paste(s, collapse = "")
  }
  if (ranges == 0 & length(seqcols) < 1) {
    l <- 0
  }
  if (ranges == 0) {
    ranges <- NULL
    print("default ranges used")
    ranges$min[1] <- 0
    ranges$max[1] <- dim(data)[1]
    ranges$desc[1] <- c("Aptamer")
    ranges$col[1] <- -1
    ranges <- as.data.frame(ranges)
  }
  ### Creating Highlighted data ###
  s2 <- dim(data)[1]
  if (length(hl) < 1) {
    print("no sequences to match")
  } else {
    hlranges <- NULL
    for (i in 1:length(hl)) {
      t <- strsplit(hl[i], "")
      s1 <- length(t[[1]])
      ## Create all sequences of length the correct size ##
      groups <- NULL
      if (s2 <= s1) {
        groups[[1]] <- as.character(data$seq)
      } else {
        for (j in 1:(s2 - s1)) {
          groups[[j]] <- as.character(data$seq[j:(j + (s1 - 1))])
        }
      }
      groups <- lapply(groups, paste, collapse = "")
      groups <- unlist(groups)
      ind <- c(1:length(groups))[groups == hl[i]]
      ind <- data$num[ind]
      print(ind)
      hlranges$min <- c(hlranges$min, ind)
      hlranges$max <- c(hlranges$max, (ind + (s1 - 1)))
      hlranges$desc <- c(hlranges$desc, rep(hl[i], length(ind)))
      hlranges$col <- c(hlranges$col, rep(seqcols[i], length(ind)))
      ## Search for first character ##
    }
    print(hlranges)
    hlranges <- as.data.frame(hlranges)
    ranges <- rbind(ranges, hlranges)
  }

  s1 <- max(c(data$x, data$y))
  s1x <- max(c(data$x)) + 3
  s1y <- max(c(data$y)) + 3
  s2 <- min(c(data$x, data$y))
  s2x <- min(c(data$x)) - 3
  s2y <- min(c(data$y)) - 3

  if (add == FALSE) {
    plot(data$x, data$y, type = "n", xlab = "", ylab = "", axes = FALSE, xlim = c(s2x, s1x), ylim = c(s2y, s1y))
  } else {
    if (add == TRUE) {
      points(data$x, data$y, type = "n")
    }
  }

  ## Adding Lines between the each point ###
  x <- data$x
  y <- data$y
  if (nt) {} else {
    lines(x, y, lw = 4)
  }
  if (l == 1) {
    if (labTF) {
      legend(s2[1], s1[1],
        legend = ranges$desc[ranges$col != 0],
        col = abs(ranges$col[ranges$col != 0]), bty = "n",
        lty = 1
      )
    }
  }
  ### Adding NT labels to each position ####

  if (nt == TRUE) {
    x1 <- data$x[1]
    x2 <- data$x[2]
    y1 <- data$y[1]
    y2 <- data$y[2]
    vx <- (x1 - x2)
    vy <- (y1 - y2)
    dist <- sqrt(vx^2 + vy^2)
    vx <- vx / dist
    vy <- vy / dist
    ## Place 5' inline with first bound ##
    text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), "5'", cex = tsize)
    ## Place NT label perpedicular ##
    x3 <- (-1 * (y1 - y2))
    y3 <- (x1 - x2)
    vx <- x3 / dist
    vy <- y3 / dist
    text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), data$seq[1], cex = tsize)
    ### Placing 3' End labels ###
    t1 <- length(data$x)
    t2 <- (length(data$x) - 1)
    x1 <- data$x[t1]
    x2 <- data$x[t2]
    y1 <- data$y[t1]
    y2 <- data$y[t2]
    vx <- (x1 - x2)
    vy <- (y1 - y2)
    dist <- sqrt(vx^2 + vy^2)
    vx <- vx / dist
    vy <- vy / dist
    ## Place 5' inline with first bound ##
    text((data$x[t1] + (vx * dp)), (data$y[t1] + (vy * dp)), "3'", cex = tsize)
    ## Place NT label perpedicular ##
    x3 <- (-1 * (y1 - y2))
    y3 <- (x1 - x2)
    vx <- x3 / dist
    vy <- y3 / dist
    text((data$x[t1] - (vx * dp)), (data$y[t1] - (vy * dp)), data$seq[t1], cex = tsize)
    ### Adding Specific Modifications ####
    if (modspec == TRUE) {
      for (i in 1:length(modp)) {
        if (mod[i] == 1) {
          ## Add box around text ##
        } else {
          if (mod[i] == 2) {
            ## Add circle around text ##
          } else {
            if (mod[i] == 3) {
              ## Show NT Change ##
            } else {
              if (mod[i] == 4) {
                ## Add Colored Pt ##
                points(data$x[modp[i]], data$y[modp[i]], pch = 1, col = modcol[i])
              }
            }
          }
        }
      }
    }


    for (i in 2:(length(data$x) - 1)) {
      x1 <- data$x[(i - 1)]
      x2 <- data$x[(i + 1)]
      y1 <- data$y[(i - 1)]
      y2 <- data$y[(i + 1)]
      ## Rotate 90 degrees and create unit vector ##
      x3 <- (-1 * (y1 - y2))
      y3 <- (x1 - x2)
      dist <- sqrt(x3^2 + y3^2)
      x3 <- x3 / dist
      y3 <- y3 / dist
      ## Add text to data$seq[i] ###
      text(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp), data$seq[i], cex = tsize)
    }
  }

  if (ticksTF) {
    for (i in 2:(length(data$x) - 1)) {
      k <- data$num[i] == ticks
      t <- table(k)
      if (length(t) > 1) {
        x1 <- data$x[(i - 1)]
        x2 <- data$x[(i + 1)]
        y1 <- data$y[(i - 1)]
        y2 <- data$y[(i + 1)]
        ## Rotate 90 degrees and create unit vector ##
        x3 <- (-1 * (y1 - y2))
        y3 <- (x1 - x2)
        dist <- sqrt(x3^2 + y3^2)
        x3 <- x3 / dist
        y3 <- y3 / dist
        ## Add text to data$seq[i] ###
        points(c(data$x[i], (data$x[i] + x3 * dp)), c(data$y[i], (data$y[i] + y3 * dp)), type = "l")
        dp2 <- dp * 1.8
        text(data$x[i] + (x3 * dp2), data$y[i] + (y3 * dp2), data$pos[i], cex = tsize)
      }
    }
  }
  if (pseudoTF) {
    for (i in 2:(length(data$x) - 1)) {
      k <- data$num[i] == pseudo_nums
      t <- table(k)
      if (length(t) > 1) {
        x1 <- data$x[(i - 1)]
        x2 <- data$x[(i + 1)]
        y1 <- data$y[(i - 1)]
        y2 <- data$y[(i + 1)]
        ## Rotate 90 degrees and create unit vector ##
        x3 <- (-1 * (y1 - y2))
        y3 <- (x1 - x2)
        dist <- sqrt(x3^2 + y3^2)
        x3 <- x3 / dist
        y3 <- y3 / dist
        ## Add text to data$seq[i] ###
        text(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp), data$seq[i], cex = tsize)
        print(c(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp)))
      }
    }
  }



  ### Highlighting Sequences #######
  for (i in 1:dim(ranges)[1]) {
    # print(i)
    keep <- data$num >= ranges$min[i] & data$num <= ranges$max[i]
    if (ranges$col[i] != 0) {
      if (ranges$col[i] == -1) {
        # points(data$x[keep],data$y[keep],col=1,pch=".")
      } else {
        if (ranges$col[i] == 3) {
          points(data$x[keep], data$y[keep], col = ranges$col[i], pch = "x")
        } else {
          points(data$x[keep], data$y[keep], col = ranges$col[i], pch = 19)
        }
      }
    }
    for (i in data$num) {
      if (data$bound[data$num == i] != -1) {
        lines(c(data$x[data$num == i], data$x[data$num == data$bound[data$num == i]]), c(data$y[data$num == i], data$y[data$num == (data$bound[data$num == i])]))
        # vx<-data$x[data$num==i]-data$x[data$num==data$bound[data$num==i]]
        # vy<-data$y[data$num==i]-data$y[data$num==(data$bound[data$num==i])]
        # dist<-sqrt(vx^2+vy^2)
        # vx<-vx/dist
        # vy<-vy/dist
        # text((data$x[data$num==i]+(vx*dist*dp)),(data$y[data$num==i]+(vy*dist*dp)),data$seq[data$num==i])
      }
    }
  }
  title(main)
}

#### Converts .CT Files to coordinate files ####
# Input - pos,seq,before,after,bound2,pos
# Output - pos,seq,x,y
pseudoKnot <- function(ctDat) {
  ### Given a CT data it identifies pairing that are from pseudoknots ###
  ### removes the pseudo bps from input and returns the input dataset ###
  ### Add a new pseudo knot dataset                                   ###
  input <- ctDat
  pseudo <- NULL
  pk <- 1
  for (i in ctDat$pos) {
    bound <- ctDat$bound[ctDat$pos == i]
    if (bound != 0 & i < bound) {
      for (j in i:bound) {
        nbound <- ctDat$bound[ctDat$pos == j]
        if (nbound != 0) {
          if (nbound > bound | nbound < i) {
            input$bound[input$pos == j] <- 0
            input$bound[input$pos == nbound] <- 0
            pseudo$pos[pk] <- j
            pseudo$bound[pk] <- nbound
            pk <- pk + 1
          }
        }
      }
    }
  }
  out <- NULL
  pseudo <- as.data.frame(pseudo)
  psk <- unique(pseudo$pos)
  outpk <- NULL
  sk <- 1
  for (pk in psk) {
    outpk$pos[sk] <- pk
    bound <- pseudo$bound[pseudo$pos == pk]
    bound <- bound[1]
    outpk$bound[sk] <- bound
    sk <- sk + 1
  }

  if (length(psk) > 0) {
    #### Find stems in pseudo      ###
    stems <- NULL
    d1 <- diff(outpk$bound)
    d2 <- diff(outpk$pos)
    ind <- 1:
    length(d1)
    r1 <- ind[d1 != -1]
    r2 <- ind[d2 != 1]
    r <- c(r1, r2)
    r <- unique(r)
    r <- sort(r)
    s <- 1
    for (i in 1:length(r)) {
      tmp <- NULL
      if (i == 1) {
        tmp$pos <- outpk$pos[1:r[1]]
        tmp$bound <- outpk$bound[1:r[1]]
      } else {
        tmp$pos <- outpk$pos[(r[(i - 1)] + 1):r[i]]
        tmp$bound <- outpk$bound[(r[(i - 1)] + 1):r[i]]
      }
      stems[[s]] <- tmp
      s <- s + 1
    }
    l <- length(r)
    lp <- length(outpk$pos)
    tmp$pos <- outpk$pos[(r[l] + 1):lp]
    tmp$bound <- outpk$bound[(r[l] + 1):lp]
    stems[[s]] <- tmp
    #### Create Compatibility Table ##
    compm <- NULL
    m1t <- 1
    for (i in 1:s) {
      for (j in 1:s) {
        comp <- 0
        st1 <- stems[[i]]
        st2 <- stems[[j]]
        mb1 <- min(c(st1$bound))
        mp1 <- min(c(st1$pos))
        mb2 <- min(c(st2$bound))
        mp2 <- min(c(st2$pos))
        mm2 <- min(c(mp2, mb2))
        mx2 <- max(c(mp2, mb2))
        ### Incompat Checks ##
        print(paste(mb1, mp1, mm2, mx2))
        if (mp1 < mx2 & mp1 > mm2 & mb1 < mm2) {
          comp <- 1
        } else {
          if (mp1 < mx2 & mp1 > mm2 & mb1 > mx2) {
            comp <- 1
          } else {
            if (mb1 < mx2 & mb1 > mm2 & mp1 > mx2) {
              comp <- 1
            } else {
              if (mb1 < mx2 & mb1 > mm2 & mp1 < mm2) {
                comp <- 1
              } else {


              }
            }
          }
        }
        compm$i[m1t] <- i
        compm$j[m1t] <- j
        compm$c[m1t] <- comp
        m1t <- m1t + 1
      }
    }
    comp <- as.data.frame(compm)
    ### Find largest compatible set ###
    # For all i & j in the set they are all compatible
    # Find all pairwise compatible sets connected graphs
    mySets <- NULL
    l <- c(1:length(stems))
    #### Each stem is assignd to  a set ###
    mys <- 1
    while (length(l) > 0) {
      s1 <- l[1]
      s <- comp[comp$c == 0 & comp$i == s1, ]
      print(l)
      ### Remove all s$j from l ##
      v <- s$j[1]
      for (j in s$j) {
        v <- c(v, j)
        l <- l[l != j]
      }
      mySets[[mys]] <- v
      mys <- mys + 1
    }

    #### Find the biggest set and put it back in ###
    lst <- NULL
    for (i in 1:length(stems)) {
      tmp <- stems[[i]]
      tmp <- tmp$pos
      lst[i] <- length(tmp)
    }
    lst2 <- NULL
    for (i in 1:length(mySets)) {
      tmp <- mySets[[i]]
      tmp <- tmp[2:length(tmp)]
      lst2[i] <- sum(lst[tmp])
    }
    print(lst2)

    largest <- c(1:length(lst2))[lst2 == max(lst2)]
    largest <- mySets[[largest]]
    largest <- largest[2:length(largest)]

    print(largest)
    addBack <- NULL
    tmp <- stems[[largest[1]]]
    addBack$pos <- tmp$pos
    addBack$bound <- tmp$bound
    if (length(largest) > 1) {
      for (i in largest[2:length(largest)]) {
        tmp <- stems[[i]]
        addBack$pos <- c(addBack$pos, tmp$pos)
        addBack$bound <- c(addBack$bound, tmp$bound)
      }
    }
    for (pos in addBack$pos) {
      bnd <- addBack$bound[addBack$pos == pos]
      input$bound[input$pos == pos] <- bnd
      input$bound[input$pos == bnd] <- pos
    }
    addBack <- as.data.frame(addBack)
    outpk <- as.data.frame(outpk)

    for (pos in addBack$pos) {
      outpk <- outpk[outpk$pos != pos, ]
    }
  } else {
    outpk <- NULL
  }
  out[[1]] <- outpk
  out[[2]] <- input
  out
}

loadCt <- function(file) {
  input <- read.table(file = file, skip = 1)
  names(input) <- c("pos", "seq", "before", "after", "bound", "pos2")
  input$seq <- as.character(input$seq)
  input
}
makeCt <- function(struct, seq) {
  nts <- strsplit(seq, "")
  nts <- nts[[1]]
  stc <- strsplit(struct, "")
  stc <- stc[[1]]
  out <- NULL
  for (i in 1:length(stc)) {
    out$pos[i] <- i
    out$before[i] <- (i - 1)
    out$after[i] <- (i + 1)
    out$seq[i] <- nts[i]
    out$pos2[i] <- i
    if (stc[i] == ".") {
      out$bound[i] <- 0
    } else {
      if (stc[i] == ")") {
        b <- backward(stc, i)
        out$bound[i] <- b
      } else {
        if (stc[i] == "(") {
          b <- forward(stc, i)
          out$bound[i] <- b
        }
      }
    }
  }

  out <- as.data.frame(out)
  out
}

forward <- function(stc, i) {
  k <- 1
  go <- 1
  out <- 0
  if (stc[i] == "(") {
    i <- i + 1
    for (j in i:length(stc)) {
      if (go == 1) {
        if (stc[j] == ")") {
          k <- k - 1
          if (k == 0) {
            out <- j
            go <- 0
          }
        } else {
          if (stc[j] == "(") {
            k <- k + 1
          }
        }
      }
    }
  }
  out
}
backward <- function(stc, i) {
  k <- 1
  go <- 1
  out <- 0
  if (stc[i] == ")") {
    i <- i - 1
    for (j in i:1) {
      if (go == 1) {
        if (stc[j] == "(") {
          k <- k - 1
          if (k == 0) {
            out <- j
            go <- 0
          }
        } else {
          if (stc[j] == ")") {
            k <- k + 1
          }
        }
      }
    }
  }
  out
}



ct2knet <- function(file, ind = 0) {
  dat <- loadCt(file)
  out <- ""
  seq <- dat$seq[(ind + 1):length(dat$seq)]
  seq <- paste(seq, collapse = "")
  out <- paste(">", file, "\n", sep = "")
  out <- paste(out, seq, "\n\n", sep = "")
  out <- paste(out, "$ list\n", sep = "")
  for (i in 1:dim(dat)[1]) {
    if (dat$bound[i] != 0) {
      out <- paste(out, " ", (dat$pos[i] - ind), "   ", (dat$bound[i] - ind), "\n", sep = "")
    }
  }
  out
}

validCT <- function(ct) {
  messages <- list()
  w <- 1
  if (
    !("before" %in% names(ct)) ||
      !("after" %in% names(ct)) ||
      !("pos" %in% names(ct)) ||
      !("seq" %in% names(ct)) ||
      !("bound" %in% names(ct)) ||
      !("pos2" %in% names(ct))
  ) {
    messages[[w]] <- "CT File does not have correct column names"
    w <- w + 1
  }

  if (min(ct$pos) < 1) {
    messages[[w]] <- "CT File has pos value < 1"
    w <- w + 1
  }
  if (min(ct$pos2) < 1) {
    messages[[w]] <- "CT File has pos2 value < 1"
    w <- w + 1
  }
  if (TRUE %in% duplicated(ct$pos)) {
    messages[[w]] <- "CT File has multiple rows for nucleotide at same position."
    w <- w + 1
  }
  if (TRUE %in% duplicated(ct$pos2)) {
    messages[[w]] <- "CT File has duplicate pos2 values"
    w <- w + 1
  }
  if (TRUE %in% duplicated(ct$bound[ct$bound != 0])) {
    messages[[w]] <- "CT File has duplicate bound values"
    w <- w + 1
  }


  ## FAIL: Check equal open/closed pairs
  ss1 <- ct2ss(ct)
  ss2 <- ct2ss(ct, cleanup = FALSE)

  if (ss1 != ss2) {
    messages[[w]] <- paste("CT File has secondary structure incompatible with bracket notation.\nCleaned up:", ss1, "\nIn CT File:", ss2, sep = "")
    w <- w + 1
  }

  ## WARN: Check gap is >= 1nts
  if (min(abs(ct$pos - ct$bound)) < 2) {
    messages[[w]] <- "Gap between bound NTs is less than 2"
    w <- w + 1
  }

  messages
}

ct2coord <- function(input) {
  messages <- validCT(input)
  all <- NULL
  if (length(messages) == 0) {
    group <- 0
    firstrun <- 1
    ### First NT in the file is at 0,0 ####

    #### Create two fake base-pairs in the begining to make it work ###
    mnp <- min(input$pos)
    map <- max(input$pos)
    a1 <- map + 1
    a2 <- map + 2
    b1 <- mnp - 1
    b2 <- mnp - 2

    new1 <- NULL
    new1$pos[1] <- a1
    new1$before[1] <- (a1 - 1)
    new1$after[1] <- (a1 + 1)
    new1$seq[1] <- "A"
    new1$pos2[1] <- a1
    new1$bound[1] <- b1

    new2 <- NULL
    new2$pos[1] <- a2
    new2$before[1] <- (a2 - 1)
    new2$after[1] <- (a2 + 1)
    new2$seq[1] <- "A"
    new2$pos2[1] <- a2
    new2$bound[1] <- b2

    new3 <- NULL
    new3$pos[1] <- b1
    new3$before[1] <- (b1 - 1)
    new3$after[1] <- (b1 + 1)
    new3$seq[1] <- "A"
    new3$pos2[1] <- b1
    new3$bound[1] <- a1

    new4 <- NULL
    new4$pos[1] <- b2
    new4$before[1] <- (b2 - 1)
    new4$after[1] <- (b2 + 1)
    new4$seq[1] <- "A"
    new4$pos2[1] <- b2
    new4$bound[1] <- a2

    input <- rbind(new1, new2, input, new3, new4)
    input <- as.data.frame(input)
    mnp <- min(input$pos)

    output <- NULL
    nextNT <- input[input$pos == mnp, ]
    if (nextNT$bound != 0) {
      #### Set Coordinates to (0,0) and (0,1) #####
      output$pos[1] <- mnp
      output$x[1] <- 0
      output$y[1] <- 0
      output$pos[2] <- nextNT$bound
      output$x[2] <- 0
      output$y[2] <- 1
    }
    stems <- NULL
    stems[[1]] <- c(output$pos[1], output$pos[2])

    j <- 3
    prev <- mnp
    npos <- input$after[input$pos == mnp]
    nextNT <- input[input$pos == npos, ]
    mp <- max(input$pos)
    while (length(stems) > 0) {
      newstems <- NULL
      newloops <- NULL
      ns <- 1
      nl <- 1
      for (i in 1:length(stems)) {
        s1 <- stems[[i]]
        #### Use stemCoord to generate stem coordinates ###
        p1 <- s1[1]
        p2 <- s1[2]
        if (firstrun == 1) {
          x3 <- -1
          y3 <- 0
          firstrun <- 0
        } else {
          ### If prev.x < this.x p3=-1 ##
          prev <- input$before[input$pos == p1]
          x3 <- output$x[output$pos == prev]
          y3 <- output$y[output$pos == prev]
        }
        x1 <- output$x[output$pos == p1]
        y1 <- output$y[output$pos == p1]
        x2 <- output$x[output$pos == p2]
        y2 <- output$y[output$pos == p2]
        sout <- stemCords(input, p1, p2, x1[1], y1[1], x2[1], y2[1], x3[1], y3[1])
        sdat <- sout[[1]]
        sdat <- as.data.frame(sdat)
        sdat <- sdat[sdat$pos != p1, ]
        sdat <- sdat[sdat$pos != p2, ]
        l <- dim(sdat)[1]
        if (l > 0) {
          s <- length(output$pos)
          s <- s + 1
          for (sd in 1:length(sdat$pos)) {
            output$pos[s] <- sdat$pos[sd]
            output$x[s] <- sdat$x[sd]
            output$y[s] <- sdat$y[sd]
            output$group[s] <- group
            s <- s + 1
          }
          group <- group + 1
        }
        newloops[[nl]] <- sout[[2]]
        nl <- nl + 1
      }
      ##### Loop through all loop starts ###
      newstems <- NULL
      for (i in 1:length(newloops)) {
        lps <- newloops[[i]]
        lp1 <- loopLength(input, lps[1])
        #### Add stems to newstems ###
        if (length(lp1) > 1) {
          nstm <- lp1[[2]]
          for (nt in 1:length(nstm)) {
            newstems[[ns]] <- nstm[[nt]]
            ns <- ns + 1
          }
        }
        #### Get Coordinates for each position and add to output ###
        pp <- input$before[input$pos == lps[1]]
        lout <- genCords(lp1, lps[1], lps[2], output, 1)
        pt <- lp1[[1]]
        tout <- lout
        lpl <- length(pt$pos)
        pt$pos <- pt$pos[lpl:1]
        lout$pos <- pt$pos
        lout <- as.data.frame(lout)
        lout <- lout[lout$pos != lps[1], ]
        lout <- lout[lout$pos != lps[2], ]
        s <- length(output$pos)
        s <- s + 1
        for (lo in 1:length(lout$pos)) {
          output$pos[s] <- lout$pos[lo]
          output$x[s] <- lout$x[lo]
          output$y[s] <- lout$y[lo]
          output$group[s] <- group
          s <- s + 1
        }
        group <- group + 1
      }
      stems <- newstems
    }
    my <- min(output$y) - 5
    xy <- max(output$y) + 5
    mx <- min(output$x) - 5
    xx <- max(output$x) + 5
    # plot(output$x,output$y,type="n",ylim=c(my,xy),xlim=c(mx,xx))
    # for(i in output$pos){
    # 	b<-input$after[input$pos==i]
    # points(c(output$x[output$pos==i],output$x[output$pos==b]),c(output$y[output$pos==i],output$y[output$pos==b]),type="l")
    # text(output$x[output$pos==i],output$y[output$pos==i],i)
    # }
    output <- as.data.frame(output)
    input <- as.data.frame(input)
    all <- merge(output, input, by = "pos")
    names(all)[1] <- "num"
    all
    ### Remove first two and last two ##
    rmax <- dim(all)[1]
    rmax <- rmax - 2
    all <- all[3:rmax, ]
  } else {
    print(messages)
  }
  all
}

stemCords <- function(input, p1, p2, x1, y1, x2, y2, x3, y3) {
  ##### INput is a CT file
  ##### P1 is the position of the 5' end of the stem
  ##### P2 is the position of its bound partner
  ##### P3 is the orientation of the stem -1 or 1
  #### Given the first BasePair of a stem it fills in the rest of the stem until
  #### It comes to an unbound member

  output <- NULL
  output$pos[1] <- p1
  output$x[1] <- x1
  output$y[1] <- y1
  output$pos[2] <- p2
  output$x[2] <- x2
  output$y[2] <- y2
  ends <- -1
  #### Determine perpendicular vector ###
  vperp <- NULL
  vperp$x <- x3 - x1
  vperp$y <- y3 - y1
  v <- rotateS((x2 - x1), (y2 - y1), 0, 0, (pi / 2))
  m <- sqrt(v[1]^2 + v[2]^2) + sqrt(vperp$x^2 + vperp$y^2)
  ang <- acos((v[1] * vperp$x + v[2] * vperp$y) / m)
  if (ang < pi / 2) {
    v <- rotateS((x2 - x1), (y2 - y1), 0, 0, (-1 * pi / 2))
  }
  vect <- NULL
  vect$x <- v[1]
  vect$y <- v[2]
  nv <- sqrt((vect$x)^2 + (vect$y)^2)
  vect$x <- vect$x / nv
  vect$y <- vect$y / nv
  prev1 <- p1
  j <- 3
  mf <- length(input$pos) + 5
  mp <- max(input$pos)
  while (j < mf) {
    npos <- input$after[input$pos == prev1]
    if (npos < (mp + 1)) {
      nextNT <- input[input$pos == npos, ]
      if (dim(nextNT)[1] < 1) {
        j <- mf + 5
      } else {
        if (nextNT$bound == 0) {
          ##### Position is not bound done ####
          ## Return the last position as Ends #
          bound <- input$bound[input$pos == prev1]
          ends <- c(prev1, bound)
          j <- mf + 5
        } else {
          pbpos <- input$bound[input$pos == prev1]
          pbafter <- input$before[input$pos == pbpos]
          bpos <- input$bound[input$pos == npos]
          if (pbafter == bpos) {
            ### Check that prev bound is on the same chain ##
            output$pos[j] <- npos
            output$x[j] <- output$x[output$pos == prev1] + vect$x[1]
            output$y[j] <- output$y[output$pos == prev1] + vect$y[1]
            j <- j + 1
            if (j > 3) {
              pbpos <- input$bound[input$pos == prev1]
              bpos <- input$bound[input$pos == npos]
              output$pos[j] <- bpos
              output$x[j] <- output$x[output$pos == pbpos] + vect$x[1]
              output$y[j] <- output$y[output$pos == pbpos] + vect$y[1]
              j <- j + 1
            }
          } else {
            ends <- c(prev1, pbpos)
            j <- mf + 5
          }
          prev1 <- npos
          #################################################
        }
      }
    } else {
      j <- mf + 5
    }
    ### Second NT is at 0,1 ###
  }
  l <- NULL
  l[[1]] <- output
  l[[2]] <- ends
  l
}

loopLength <- function(input, start) {
  #### Determine how many NTs there are before another base-pair appears ####
  ### Loop Ends when Get to previous NT's bound or the last NT of the chain #
  stems <- NULL
  s <- 1
  first <- input[input$pos == start, ]
  mf <- length(input$pos)
  mp <- max(input$pos)
  j <- 0
  ntot <- 1
  output <- NULL
  output$pos[1] <- first$pos
  i <- 2
  nextNT <- NULL
  nextNT <- input[input$pos == first$pos, ]
  bound <- 1
  while (j < mf) {
    if (nextNT$bound != 0 & bound == 0) {
      npos <- nextNT$bound
      bound <- 1
      ### Save To Stems ###
      ### Determine vector toward outside of circle ##
      stems[[s]] <- c(nextNT$pos, npos)
      s <- s + 1
    } else {
      bound <- 0
      npos <- input$after[input$pos == nextNT$pos]
    }
    nextNT <- input[input$pos == npos, ]
    if (nextNT$pos == first$bound) {
      j <- mf
      output$pos[i] <- nextNT$pos
    } else {
      if (nextNT$pos == mp) {
        ntot <- ntot + 1
        j <- mf
        output$pos[i] <- nextNT$pos
      } else {
        ntot <- ntot + 1
        output$pos[i] <- nextNT$pos
        j <- j + 1
        i <- i + 1
      }
    }
  }
  ### Returns Pos, X,Y, foreach element in the loop ###
  out <- NULL
  out[[1]] <- output
  out[[2]] <- stems
  out
}

rotateS <- function(x2, y2, x0, y0, ang) {
  pt <- NULL
  x1 <- x2 - x0
  y1 <- y2 - y0
  pt[1] <- x1 * cos(ang) - y1 * sin(ang) + x0
  pt[2] <- y1 * cos(ang) + x1 * sin(ang) + y0
  pt
}

circleCoord <- function(n) {
  ang <- 2 * pi / (n)
  output <- NULL
  output$y[1] <- 0
  output$x[1] <- 1
  for (i in 1:(n - 1)) {
    v <- rotateS(1, 0, 0, 0, (i * ang))
    j <- i + 1
    output$x[j] <- v[1] + output$x[(j - 1)]
    output$y[j] <- v[2] + output$y[(j - 1)]
  }
  ### Rotate ##
  output
}


genCords <- function(loop, p1, p2, input, vn) {
  #### Given the loop coordinates for the first two points in generates all others ####
  dat <- loop[[1]]
  n <- length(dat$pos)
  ### Get Coordinates of p1,p2 ###
  c <- circleCoord(n)
  ### Translate p1 to 0,0 ###
  ### Calcualte angle between p2 and x-axis
  pt1 <- NULL
  pt2 <- NULL
  pt1$x <- input$x[input$pos == p1]
  pt1$y <- input$y[input$pos == p1]
  pt2$x <- input$x[input$pos == p2]
  pt2$y <- input$y[input$pos == p2]

  pt2 <- translate(pt2$x, pt2$y, pt1$x, pt1$y)
  ang <- atan2(pt2$y, pt2$x)
  ang
  ### Rotate Circ coordinates and translate ###
  if (vn == 1) {
    #### Flip over y=axis ###
    c$y <- -1 * c$y
  }
  c <- rotateV(c$x, c$y, 0, 0, ang)
  c <- translate(c$x, c$y, (-1 * pt1$x), (-1 * pt1$y))
  c
}

translate <- function(x1, y1, x2, y2) {
  out <- NULL
  out$x <- x1 - x2
  out$y <- y1 - y2
  out
}

rotateV <- function(x2, y2, x0, y0, ang) {
  ## Rotates and translates a vector ###
  pt <- NULL
  x1 <- x2 - x0
  y1 <- y2 - y0
  pt$x <- x1 * cos(ang) - y1 * sin(ang) + x0
  pt$y <- y1 * cos(ang) + x1 * sin(ang) + y0
  pt
}
jpbida/RRNA documentation built on April 28, 2024, 1:27 p.m.