############################################################################
# RRNA is a R package for plotting and manipulating RNA secondary #
# structures. #
############################################################################
# Copyright (C) 2015 JP Bida #
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <http://www.gnu.org/licenses/> #
############################################################################
############################################################################
#
# FUNCTIONS
#
# loadFolds - Loads a coordinate file for an RNA structure into R with column
# names used by other functions in the RNAPlot package
#
# alignCoord - Given a data frame containing multiple coordinate files alignCoord
# takes the data frame, the id of the coordinate file being aligned to
# the coordinate file that is going to be aligned, and a new origin. It
# then translates
#
# RNAPlot - Given fold data RRNAPlot is a generic ploting function to create
# a plot of the secondary structure
#
# pseudoKnot - Given CT file data this function returns the positions of pseudoKnots
#
# loadCT - Loads data from a CT file
#
# ct2Coord - Creates secondary structure coordiant file from a CT file loaded
# using loadCT independent of the RRNAFold program. This can be used
# by RNAPlot function to plot secondary structures.
# ct2knet - Creates file that can be used by knet
#
# bplfile - Creates a bplfile from fold data
#
# makeCT - Creates a CT file from a secondary structure in bracket form and a sequence
#
# All RNA can be represented by STEMs and Multi-Strand Loops
# Loop - NumberOfStrands,StrandsLengths
# Bulge as Loop - 2,1,3
# coord2comp - Splits RNA structure into self contained components used for RNA assembly
fuzzy2ct <- function(dat) {
## Read fuzzy_dat into c2 file
## Remove PseudoKnots
## Given pairs of numbers
## (1,4)(2,5)(3,6) etc.
## Identify the bracket representation
## [ ( { ] ( } ) ) )
## Such that a closing brackets for open bracket m
## does not occur for before closing brackets for n
## when m < n
## Remove Minimal amount of pairs such that no pair
## doesn't contain every pair within its range
## PairID,ContainsOutOfRange,ContainsInRange,IsOutOfRange
## Basic - Loop through each pair, score,
## remove base pair occuring most outsides
## recalculate & repeat
## If same pos is going to multiple
## still works?
### Get Maximum Range
names(dat) <- c("pdb", "model", "pos1", "pos2", "face1", "face2", "h-bonds")
mx <- max(dat$pos1, dat$pos2)
mn <- min(dat$pos1, dat$pos2)
ss <- rep(".", mx)
### Find minimal pairs
pairs <- pairScores(dat)
ss[pairs$pos1] <- "("
ss[pairs$pos2] <- ")"
### Output bracket notation
ss <- paste(ss, collapse = "")
ss
}
ct2ss <- function(dat, cleanup = TRUE) {
## Generate SS from CT file
## If CT file contains knots or multiple pairings
## cleanup=TRUE tries to generate the SS with most
## base pairs preserved
ss <- ""
if (cleanup) {
mx <- max(dat$pos, dat$pos2)
mn <- min(dat$pos, dat$pos2)
dat$pos1 <- dat$bound
dat <- dat[dat$pos1 > 0, ]
dat <- dat[dat$pos2 > 0, ]
ss <- rep(".", mx)
### Find minimal pairs
pairs <- pairScores(dat)
ss[pairs$pos1] <- "("
ss[pairs$pos2] <- ")"
### Output bracket notation
ss <- paste(ss, collapse = "")
} else {
ss <- rep(".", nrow(dat))
for (i in 1:nrow(dat)) {
if (dat$bound[i] != 0 & dat$bound[i] > dat$pos[i]) {
ss[i] <- "("
} else {
if (dat$bound[i] != 0) {
ss[i] <- ")"
}
}
}
ss <- paste(ss, collapse = "")
}
ss
}
fuzzy2comp <- function(dat) {
sct <- fuzzy2ct(dat)
ct <- makeCt(sct, paste(rep("A", nchar(sct)), collapse = ""))
crd <- ct2coord(ct)
comp <- coord2comp(crd)
comp
}
pairScores <- function(PairDF) {
## Finds the minimum number of pairings to remove
## uses a greedy approach removing the maximum conflicts
## in each step and recalculating
## PairID,ContainsOutOfRange,ContainsInRange,IsOutOfRange
## Represent all pairs in (pos_lower,pos_upper)
## Remove duplciates,
## Remove any self-pairing
PairDF <- PairDF[, names(PairDF) %in% c("pos1", "pos2")]
PairDF2 <- data.frame(pos1 = PairDF$pos2, pos2 = PairDF$pos1)
Pairs <- rbind(PairDF, PairDF2)
Pairs <- Pairs[Pairs$pos1 < Pairs$pos2, ]
Pairs <- unique(Pairs)
total <- 1
while (total > 0) {
Pairs$ContainsOutOfRange <- 0
Pairs$ContainsInRange <- 0
Pairs$IsOutOfRange <- 0
for (p in 1:nrow(Pairs)) {
## Only look at lower
kContainsOutOfRange <- Pairs$pos1 > Pairs$pos1[p] & Pairs$pos1 < Pairs$pos2[p] & Pairs$pos2 > Pairs$pos2[p]
kContainsInRange <- Pairs$pos1 > Pairs$pos1[p] & Pairs$pos1 < Pairs$pos2[p] & Pairs$pos2 < Pairs$pos2[p]
Pairs$ContainsOutOfRange[p] <- length(Pairs$pos1[kContainsOutOfRange])
Pairs$ContainsInRange[p] <- length(Pairs$pos1[kContainsInRange])
Pairs$IsOutOfRange[kContainsOutOfRange] <- Pairs$IsOutOfRange[kContainsOutOfRange] + 1
}
total <- sum(Pairs$IsOutOfRange)
if (total > 0) {
## Remove the maximum scoring base
p <- Pairs$pos1[Pairs$ContainsOutOfRange == max(Pairs$ContainsOutOfRange)]
Pairs <- Pairs[Pairs$pos1 != p[1], ]
}
}
Pairs
}
coord2comp <- function(coord) {
## id1,id2,pos1a,pos1b,pos2a,pos2b
## Outputs list of components with junctions
## Overlapping base-pairs are labeled with letters
## For Each Group
end <- max(coord$pos2)
output <- list()
w <- 1
groups <- unique(coord$group)
SubStructure <- rep(".", nrow(coord))
for (g in groups) {
GroupStructure <- SubStructure
min_pos <- min(coord$pos2[coord$group == g])
max_pos <- max(coord$pos2[coord$group == g])
## Add overlapping basepair and label it
if (min_pos == 1 | max_pos == end) {
comp <- coord[coord$group == g, ]
GroupStructure[coord$group == g] <- "+"
GroupStructure <- paste(GroupStructure, collapse = "")
} else {
temp <- coord
temp$group[temp$pos2 == min_pos - 1] <- g
temp$group[temp$pos2 == max_pos + 1] <- g
comp <- temp[temp$group == g, ]
GroupStructure[temp$group == g] <- "+"
GroupStructure <- paste(GroupStructure, collapse = "")
}
d <- diff(comp$pos2)
sequence <- addBreaks(d, comp$seq)
sequence <- paste(sequence, collapse = "")
structure <- rep(".", length(comp$seq))
structure[comp$bound < comp$pos2 & comp$bound != 0] <- ")"
structure[comp$bound > comp$pos2 & comp$bound != 0] <- "("
structure <- addBreaks(d, structure)
structure <- paste(structure, collapse = "")
edges <- rep(".", length(comp$seq))
coord_other <- coord
for (go in groups) {
if (go != g) {
min_pos <- min(coord$pos2[coord$group == go])
max_pos <- max(coord$pos2[coord$group == go])
## Add overlapping basepair and label it
if (min_pos == 1 | max_pos == end) {
} else {
coord_other$group[coord_other$pos2 == min_pos - 1] <- go
coord_other$group[coord_other$pos2 == max_pos + 1] <- go
}
}
}
edges <- coord_other$group[coord_other$pos2 %in% comp$pos2]
edges <- addBreaks(d, edges)
edges <- paste(edges, collapse = ",")
out <- data.frame(group = g, sequence = sequence, structure = structure, edges = edges, substructure = GroupStructure)
output[[w]] <- out
w <- w + 1
}
output <- do.call(rbind, output)
output
}
addBreaks <- function(d, structure) {
breaks <- c()
for (i in 1:length(d)) {
if (d[i] != 1) {
breaks <- c(breaks, structure[i], "-")
} else {
breaks <- c(breaks, structure[i])
}
}
breaks <- c(breaks, structure[length(structure)])
breaks
}
genDB <- function(map) {
## Creates Component Database
## breakpoint, closed_ends, rep, type
## .....b..... , true , 5b5,
## ...... , true , 6 ,
for (i in 1:nrow(map)) {
ss <- strsplit(map$V2[i], "[BREAK]", fixed = TRUE)[[1]]
lln <- nchar(ss)
}
}
loadCoords <- function(filename) {
############################################################################
### Description:
############################################################################
###
### Reads a file in table format containing coordinates generated from the
### RRNAfold program for a single RNA secondary structure or multiple RNA
### secondary structures into a data frame with a standard naming convention.
###
############################################################################
### Usage:
############################################################################
###
### data=loadFolds(file)
###
############################################################################
### Arguments:
############################################################################
###
### file: the name of a file which the data are to be read from. Each row
### of the table apears as one line in the file, with columns separated
### by commas. This file is created by a program using the Vienna RNA
### package.
###
############################################################################
### Details:
############################################################################
###
### Example input file format:
###
### 0,158.534088,199.550888,G,0,-1
### 0,152.741776,194.100571,A,1,-1
### 0,149.307266,186.849899,A,2,-1
### 0,148.749847,178.776566,G,3,-1
### 0,151.196960,170.989944,C,4,59
### 0,141.412643,159.620361,U,5,58
### 0,131.628342,148.250793,U,6,57
### 0,121.844025,136.881210,A,7,56
### 0,112.059715,125.511642,C,8,55
### 0,102.275398,114.142059,A,9,54
### 0,89.142853,109.343330,A,10,-1
### ....
###
### There is no header on the input file. The columns are
### ID,X,Y,SEQ,POS,BOUND
###
### ID - A unique ID for a given fold in the file
### X - X position of the NT in the secondary structure plot
### Y - Y position of the NT in the secondary structure plot
### SEQ - The nucleotide (A,G,U,C)
### POS - The position of the NT in the sequence
### BOUND - The position of the NT that the NT at POS is bound to
###
### The RRNAfold program can be used to generate
### <include github.com link here>
### ###
### Example:
###
### Generate a data file using the RRNAfold program. Create the
### input file input.dat containing a single sequence per line
### in file.
###
### input.dat
###
### AGGGGCCCCUUUUAAAA
### AGCCGCCCCUUUUAAAA
### AGAGGCCCCUUUUAAAA
### AGUUGCCCCUUUUAAAA
###
### Run the RRNAfold program on the command line to generate the
### output file out.dat
###
### RRNAfold --input-file input.dat --output-file out.dat
###
### Use the loadFolds program to load the table into an R data
### frame.
###
###
### >library(RRNA)
### >dat=loadFolds("out.dat")
data <- read.table(file = filename, sep = ",", header = FALSE)
if (dim(data)[2] == 6) {
names(data) <- c("id", "x", "y", "seq", "num", "bound")
} else {
if (dim(data)[2] == 7) {
names(data) <- c("id", "x", "y", "seq", "num", "bound", "energy")
} else {
data <- NULL
print(paste("The file ", filename, " does not have the correct number of columns", sep = ""))
print(paste("Expected 6 or 7, found ", dim(data)[2], sep = ""))
}
}
data
}
alignCoord <- function(data, n1, n2, x, y) {
############################################################################
### Description:
############################################################################
###
### Given fold data from loadFolds align all secondary structure plots
### at a particular nucleotide position. This translates the x1,y1 position of
### the n1 nucleotide to the provided x,y coordinates and the roates the
### secondary structure until y1=y2 for n1 and n2.
###
### This allows you to plot multiple secondary structures with all of them
### aligned to a given orientation.
###
###
############################################################################
### Usage:
############################################################################
### alignCoord(data,n1_pos,n2_pos,x0,y0)
### data=loadFolds(file)
### aligned=alignCoord(data,2,3,0,0)
###
############################################################################
### Arguments:
############################################################################
###
### data: fold data outputted from loadFolds
### n1_id: The position of the nucleotide that is translated to (x0,y0)
### n2_id: The position of the nucleotide in the sequence that is rotated
### until its y coordinate is y0
### x0: The x coordinate that n1 is translated to
### y0: The y coordinate that n2 is translated to
###
############################################################################
### Details:
############################################################################
###
## The RRNAfold program can be used to generate
## <include github.com link here>
## ###
### Example:
###
### Generate a data file using the RRNAfold program. Create the
### input file input.dat containing a single sequence per line
### in file.
###
### input.dat
###
### AGGGGCCCCUUUUAAAA
### AGCCGCCCCUUUUAAAA
### AGAGGCCCCUUUUAAAA
### AGUUGCCCCUUUUAAAA
###
### Run the RRNAfold program on the command line to generate the
### output file out.dat
###
### RRNAfold --input-file input.dat --output-file out.dat
###
### Use the loadFolds program to load the table into an R data
### frame.
###
###
### >library(RRNA)
### >dat=loadFolds("out.dat")
### >aligned=alignCoord(dat,1,2,0,0)
ids <- data$id
ids <- table(ids)
ids <- as.numeric(names(ids))
## Loop through all ids in the dataset ##
for (i in ids) {
tmp <- NULL
# print(i)
tmp <- data[data$id == i, ]
x1 <- tmp$x[tmp$num == n1]
x2 <- tmp$x[tmp$num == n2]
y1 <- tmp$y[tmp$num == n1]
y2 <- tmp$y[tmp$num == n1]
## Translate x1,y1 to x,y ##
tmp$x <- tmp$x - (x1 - x)
tmp$y <- tmp$y - (y1 - y)
dy <- tmp$y[tmp$num == n2] - tmp$y[tmp$num == n1]
dx <- tmp$x[tmp$num == n2] - tmp$x[tmp$num == n1]
ang <- atan(abs(dy) / abs(dx))
if (dx < 0 & dy < 0) {
# print("2")
ang <- ang + pi
} else {
if (dx < 0 & dy > 0) {
# print("1")
ang <- pi - ang
} else {
if (dx > 0 & dy < 0) {
# print("3")
ang <- 2 * pi - ang
}
}
}
## Rotate x2,y2 around x1,y1 until y1==y2
for (j in 1:length(tmp$x)) {
pt <- rotateS(tmp$x[j], tmp$y[j], x, y, -ang)
tmp$x[j] <- pt[1]
tmp$y[j] <- pt[2]
}
data[data$id == i, ] <- tmp
}
data
}
RNAPlot <- function(data, ranges = 0, add = FALSE, hl = NULL, seqcols = NULL, seqTF = FALSE, labTF = FALSE, nt = FALSE, dp = 0.5, modspec = FALSE, modp = NULL, mod = NULL, modcol = NULL, tsize = 0.5, main = "", pointSize = 2, lineWd = 2) {
## dp-The distanc the text is place from the sides of the aptamer ###
## modspec - Modify Specific bases in the Aptamer ###
## modp- NT position
## mod -Modification to be done ###
# 1=put a box around the Label
# 2=put a circle around the Label
# 3=NT Change T->G
# 4=Add a solid colored circle at junction
## modcol - Color of the modification
############################################################################
### Description:
############################################################################
###
### Given fold data from load Plots this creates a new secondary structure
### plot
###
############################################################################
### Usage:
############################################################################
### alignCoord(data,n1_pos,n2_pos,x0,y0)
### data=loadFolds(file)
### RNAPlot(data[data$id==0,])
###
############################################################################
### Arguments:
############################################################################
### data: fold data from loadFolds for a single RNA secondary structure
### ranges: A data frame containing the ranges of sequence positions that
### should be highlighted with given colors
### Example:
### data.frame(min=c(69,1,7),max=c(74,5,17),col=c(2,3,4),desc=c("Region 1","Region 2","Region 3"))
###
### This highlights the sequence between 69-74, 1-5, and 7-17
### add: Should the new plot be added to an existing plot TRUE/FALSE
### hl: Takes an array of sequences and highlights them with seqcol
### seqcols: Colors that should be used to highlight the sequences given in hl
### seqTF: Unused argument included for backward compatibility
### labTF: Should a legend be plot TRUE/FALSE
### nt: Plot the nucleotide sequence TRUE/FALSE
### dp: Distance the nucleotide labels plotted using the nt option are from the
### x,y coordinate of the nt position
### modspec: TRUE/FALSE should specific positions be modified?
### modp: Array of positions to be modified
### modcol: Color of positions being modified by modp array
### mod: the pch used for nucleotides being modifed by modp
### tsize:Size of the nucleotide labels
### main: Title used for legend
### pointSize: Size of the pch used for ploting
### lineWd: the line width used in the plot
############################################################################
### Details:
############################################################################
###
## The RRNAfold program can be used to generate
## <include github.com link here>
## ###
### Example:
###
### Generate a data file using the RRNAfold program. Create the
### input file input.dat containing a single sequence per line
### in file.
###
### input.dat
###
### AGGGGCCCCUUUUAAAA
### AGCCGCCCCUUUUAAAA
### AGAGGCCCCUUUUAAAA
### AGUUGCCCCUUUUAAAA
###
### Run the RRNAfold program on the command line to generate the
### output file out.dat
###
### RRNAfold --input-file input.dat --output-file out.dat
###
### Use the loadFolds program to load the table into an R data
### frame.
###
###
### >library(RRNA)
### >dat=loadFolds("out.dat")
### >aligned=alignCoord(dat,1,2,0,0)
### >RNAPlot(aligned[aligned$id==0,])
l <- 1
if (seqTF) {
#TODO: Update version and remove seqTF as argument in function
# breaking backward compatibility
#s <- data$seq
#sequence <- paste(s, collapse = "")
}
if (is.null(dim(ranges)) & length(seqcols) < 1) {
l <- 0
}
if (is.null(dim(ranges))) {
ranges <- NULL
print("default ranges used")
ranges$min[1] <- 0
ranges$max[1] <- dim(data)[1]
ranges$desc[1] <- c("RNA Molecule")
ranges$col[1] <- -1
ranges <- as.data.frame(ranges)
}
### Creating Highlighted data ###
s2 <- dim(data)[1]
if (length(hl) < 1) {
print("no sequences to match")
} else {
hlranges <- NULL
for (i in 1:length(hl)) {
t <- strsplit(hl[i], "")
s1 <- length(t[[1]])
## Create all sequences of length the correct size ##
groups <- NULL
if (s2 <= s1) {
groups[[1]] <- as.character(data$seq)
} else {
for (j in 1:(s2 - s1)) {
groups[[j]] <- as.character(data$seq[j:(j + (s1 - 1))])
}
}
groups <- lapply(groups, paste, collapse = "")
groups <- unlist(groups)
ind <- c(1:length(groups))[groups == hl[i]]
ind <- data$num[ind]
print(ind)
hlranges$min <- c(hlranges$min, ind)
hlranges$max <- c(hlranges$max, (ind + (s1 - 1)))
hlranges$desc <- c(hlranges$desc, rep(hl[i], length(ind)))
hlranges$col <- c(hlranges$col, rep(seqcols[i], length(ind)))
## Search for first character ##
}
print(hlranges)
hlranges <- as.data.frame(hlranges)
ranges <- rbind(ranges, hlranges)
}
s1 <- max(c(data$x, data$y))
s2 <- min(c(data$x, data$y))
if (add == FALSE) {
plot(data$x, data$y, type = "n", xlab = "", ylab = "", axes = FALSE, xlim = c(s2, s1), ylim = c(s2, s1))
for (i in data$num) {
if (data$bound[data$num == i] != -1) {
lines(c(data$x[data$num == i], data$x[data$num == data$bound[data$num == i]]), c(data$y[data$num == i], data$y[data$num == (data$bound[data$num == i])]), lwd = lineWd)
}
}
} else {
if (add == TRUE) {
points(data$x, data$y, type = "n")
for (i in data$num) {
if (data$bound[data$num == i] != -1) {
lines(c(data$x[data$num == i], data$x[data$num == data$bound[data$num == i]]), c(data$y[data$num == i], data$y[data$num == (data$bound[data$num == i])]), lwd = lineWd)
}
}
}
}
## Adding Lines between the each point ###
x <- data$x
y <- data$y
if (nt) {} else {
lines(x, y, lw = lineWd)
points(x, y, pch = 16, cex = pointSize)
}
if (l == 1) {
if (labTF) {
legend(s2[1], s1[1],
legend = ranges$desc[ranges$col != 0],
col = abs(ranges$col[ranges$col != 0]), bty = "n",
lty = 1
)
}
}
### Adding NT labels to each position ####
if (nt == TRUE) {
x1 <- data$x[1]
x2 <- data$x[2]
y1 <- data$y[1]
y2 <- data$y[2]
vx <- (x1 - x2)
vy <- (y1 - y2)
dist <- sqrt(vx^2 + vy^2)
vx <- vx / dist
vy <- vy / dist
## Place 5' inline with first bound ##
text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), "5'", font = 2, cex = tsize)
## Place NT label perpedicular ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
vx <- x3 / dist
vy <- y3 / dist
text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), data$seq[1], cex = tsize, font = 2)
### Placing 3' End labels ###
t1 <- length(data$x)
t2 <- (length(data$x) - 1)
x1 <- data$x[t1]
x2 <- data$x[t2]
y1 <- data$y[t1]
y2 <- data$y[t2]
vx <- (x1 - x2)
vy <- (y1 - y2)
dist <- sqrt(vx^2 + vy^2)
vx <- vx / dist
vy <- vy / dist
## Place 5' inline with first bound ##
text((data$x[t1] + (vx * dp)), (data$y[t1] + (vy * dp)), "3'", cex = tsize, font = 2)
## Place NT label perpedicular ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
vx <- x3 / dist
vy <- y3 / dist
text((data$x[t1] - (vx * dp)), (data$y[t1] - (vy * dp)), data$seq[t1], font = 2, cex = tsize)
### Adding Specific Modifications ####
for (i in 2:(length(data$x) - 1)) {
x1 <- data$x[(i - 1)]
x2 <- data$x[(i + 1)]
y1 <- data$y[(i - 1)]
y2 <- data$y[(i + 1)]
## Rotate 90 degrees and create unit vector ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
dist <- sqrt(x3^2 + y3^2)
x3 <- x3 / dist
y3 <- y3 / dist
## Add text to data$seq[i] ###
text(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp), data$seq[i], font = 2, cex = tsize)
}
}
if (modspec == TRUE) {
for (i in 1:length(modp)) {
points(data$x[modp[i]], data$y[modp[i]], pch = mod[i], col = modcol[i])
}
}
### Highlighting Sequences #######
for (i in 1:dim(ranges)[1]) {
# print(i)
keep <- data$num >= ranges$min[i] & data$num <= ranges$max[i]
if (ranges$col[i] != 0) {
if (ranges$col[i] == -1) {
# points(data$x[keep],data$y[keep],col=1,pch=".")
} else {
if (ranges$col[i] == 3) {
points(data$x[keep], data$y[keep], col = ranges$col[i], pch = "x")
} else {
points(data$x[keep], data$y[keep], col = ranges$col[i], pch = 19)
}
}
}
}
title(main)
}
bplfile <- function(dat, name) {
##### Creates a bpl file of the RNA secondary structure #####
### Get sequnce and print on first line ###
seq <- paste(dat$seq, collapse = "")
fileo <- paste(">", name, sep = "")
fileo <- paste(fileo, "\n", seq, "\n", sep = "")
fileo <- paste(fileo, "\n$ list", sep = "")
for (i in 1:length(dat$num)) {
if (dat$bound[i] == -1) {} else {
fileo <- paste(fileo, "\n", (dat$num[i] + 1), " ", (dat$bound[i] + 1))
}
}
write(fileo, file = name)
}
transformFold <- function(dat, x0, y0, ang) {
### Given fold data from loadFolds this translates and rotates the folds ###
x <- dat$x
y <- dat$y
new <- rotateV(x, y, x0, y0, ang)
dat$x <- new$x
dat$y <- new$y
dat
}
aptPlotCT <- function(file, ranges = 0, add = FALSE, hl = NULL, seqcols = NULL, seqTF = FALSE, labTF = FALSE, nt = FALSE, dp = 0.5, modspec = FALSE, modp = NULL, mod = NULL, modcol = NULL, tsize = 0.5, main = "", pseudoTF = FALSE, pseudo_nums = NULL, ticks = NULL, ticksTF = FALSE) {
### Load CT file ###
dat <- loadCt(file)
### Filter PseudoKnots ##
pseudo <- pseudoKnot(dat)
data <- ct2coord(pseudo[[2]])
names(data)[1] <- "num"
data <- as.data.frame(data)
## dp-The distanc the text is place from the sides of the aptamer ###
## modspec - Modify Specific bases in the Aptamer ###
## modp- NT position
## mod -Modification to be done ###
# 1=put a box around the Label
# 2=put a circle around the Label
# 3=NT Change T->G
# 4=Add a solid colored circle at junction
## modcol - Color of the modification
l <- 1
if (seqTF) {
#TODO: Update major version and remove seqTF breaking
#backward compatibility
#s <- dat$seq
#sequence <- paste(s, collapse = "")
}
if (ranges == 0 & length(seqcols) < 1) {
l <- 0
}
if (ranges == 0) {
ranges <- NULL
print("default ranges used")
ranges$min[1] <- 0
ranges$max[1] <- dim(data)[1]
ranges$desc[1] <- c("Aptamer")
ranges$col[1] <- -1
ranges <- as.data.frame(ranges)
}
### Creating Highlighted data ###
s2 <- dim(data)[1]
if (length(hl) < 1) {
print("no sequences to match")
} else {
hlranges <- NULL
for (i in 1:length(hl)) {
t <- strsplit(hl[i], "")
s1 <- length(t[[1]])
## Create all sequences of length the correct size ##
groups <- NULL
if (s2 <= s1) {
groups[[1]] <- as.character(data$seq)
} else {
for (j in 1:(s2 - s1)) {
groups[[j]] <- as.character(data$seq[j:(j + (s1 - 1))])
}
}
groups <- lapply(groups, paste, collapse = "")
groups <- unlist(groups)
ind <- c(1:length(groups))[groups == hl[i]]
ind <- data$num[ind]
print(ind)
hlranges$min <- c(hlranges$min, ind)
hlranges$max <- c(hlranges$max, (ind + (s1 - 1)))
hlranges$desc <- c(hlranges$desc, rep(hl[i], length(ind)))
hlranges$col <- c(hlranges$col, rep(seqcols[i], length(ind)))
## Search for first character ##
}
print(hlranges)
hlranges <- as.data.frame(hlranges)
ranges <- rbind(ranges, hlranges)
}
s1 <- max(c(data$x, data$y))
s1x <- max(c(data$x)) + 3
s1y <- max(c(data$y)) + 3
s2 <- min(c(data$x, data$y))
s2x <- min(c(data$x)) - 3
s2y <- min(c(data$y)) - 3
if (add == FALSE) {
plot(data$x, data$y, type = "n", xlab = "", ylab = "", axes = FALSE, xlim = c(s2x, s1x), ylim = c(s2y, s1y))
} else {
if (add == TRUE) {
points(data$x, data$y, type = "n")
}
}
## Adding Lines between the each point ###
x <- data$x
y <- data$y
if (nt) {} else {
lines(x, y, lw = 4)
}
if (l == 1) {
if (labTF) {
legend(s2[1], s1[1],
legend = ranges$desc[ranges$col != 0],
col = abs(ranges$col[ranges$col != 0]), bty = "n",
lty = 1
)
}
}
### Adding NT labels to each position ####
if (nt == TRUE) {
x1 <- data$x[1]
x2 <- data$x[2]
y1 <- data$y[1]
y2 <- data$y[2]
vx <- (x1 - x2)
vy <- (y1 - y2)
dist <- sqrt(vx^2 + vy^2)
vx <- vx / dist
vy <- vy / dist
## Place 5' inline with first bound ##
text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), "5'", cex = tsize)
## Place NT label perpedicular ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
vx <- x3 / dist
vy <- y3 / dist
text((data$x[1] + (vx * dp)), (data$y[1] + (vy * dp)), data$seq[1], cex = tsize)
### Placing 3' End labels ###
t1 <- length(data$x)
t2 <- (length(data$x) - 1)
x1 <- data$x[t1]
x2 <- data$x[t2]
y1 <- data$y[t1]
y2 <- data$y[t2]
vx <- (x1 - x2)
vy <- (y1 - y2)
dist <- sqrt(vx^2 + vy^2)
vx <- vx / dist
vy <- vy / dist
## Place 5' inline with first bound ##
text((data$x[t1] + (vx * dp)), (data$y[t1] + (vy * dp)), "3'", cex = tsize)
## Place NT label perpedicular ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
vx <- x3 / dist
vy <- y3 / dist
text((data$x[t1] - (vx * dp)), (data$y[t1] - (vy * dp)), data$seq[t1], cex = tsize)
### Adding Specific Modifications ####
if (modspec == TRUE) {
for (i in 1:length(modp)) {
if (mod[i] == 1) {
## Add box around text ##
} else {
if (mod[i] == 2) {
## Add circle around text ##
} else {
if (mod[i] == 3) {
## Show NT Change ##
} else {
if (mod[i] == 4) {
## Add Colored Pt ##
points(data$x[modp[i]], data$y[modp[i]], pch = 1, col = modcol[i])
}
}
}
}
}
}
for (i in 2:(length(data$x) - 1)) {
x1 <- data$x[(i - 1)]
x2 <- data$x[(i + 1)]
y1 <- data$y[(i - 1)]
y2 <- data$y[(i + 1)]
## Rotate 90 degrees and create unit vector ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
dist <- sqrt(x3^2 + y3^2)
x3 <- x3 / dist
y3 <- y3 / dist
## Add text to data$seq[i] ###
text(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp), data$seq[i], cex = tsize)
}
}
if (ticksTF) {
for (i in 2:(length(data$x) - 1)) {
k <- data$num[i] == ticks
t <- table(k)
if (length(t) > 1) {
x1 <- data$x[(i - 1)]
x2 <- data$x[(i + 1)]
y1 <- data$y[(i - 1)]
y2 <- data$y[(i + 1)]
## Rotate 90 degrees and create unit vector ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
dist <- sqrt(x3^2 + y3^2)
x3 <- x3 / dist
y3 <- y3 / dist
## Add text to data$seq[i] ###
points(c(data$x[i], (data$x[i] + x3 * dp)), c(data$y[i], (data$y[i] + y3 * dp)), type = "l")
dp2 <- dp * 1.8
text(data$x[i] + (x3 * dp2), data$y[i] + (y3 * dp2), data$pos[i], cex = tsize)
}
}
}
if (pseudoTF) {
for (i in 2:(length(data$x) - 1)) {
k <- data$num[i] == pseudo_nums
t <- table(k)
if (length(t) > 1) {
x1 <- data$x[(i - 1)]
x2 <- data$x[(i + 1)]
y1 <- data$y[(i - 1)]
y2 <- data$y[(i + 1)]
## Rotate 90 degrees and create unit vector ##
x3 <- (-1 * (y1 - y2))
y3 <- (x1 - x2)
dist <- sqrt(x3^2 + y3^2)
x3 <- x3 / dist
y3 <- y3 / dist
## Add text to data$seq[i] ###
text(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp), data$seq[i], cex = tsize)
print(c(data$x[i] + (x3 * dp), data$y[i] + (y3 * dp)))
}
}
}
### Highlighting Sequences #######
for (i in 1:dim(ranges)[1]) {
# print(i)
keep <- data$num >= ranges$min[i] & data$num <= ranges$max[i]
if (ranges$col[i] != 0) {
if (ranges$col[i] == -1) {
# points(data$x[keep],data$y[keep],col=1,pch=".")
} else {
if (ranges$col[i] == 3) {
points(data$x[keep], data$y[keep], col = ranges$col[i], pch = "x")
} else {
points(data$x[keep], data$y[keep], col = ranges$col[i], pch = 19)
}
}
}
for (i in data$num) {
if (data$bound[data$num == i] != -1) {
lines(c(data$x[data$num == i], data$x[data$num == data$bound[data$num == i]]), c(data$y[data$num == i], data$y[data$num == (data$bound[data$num == i])]))
# vx<-data$x[data$num==i]-data$x[data$num==data$bound[data$num==i]]
# vy<-data$y[data$num==i]-data$y[data$num==(data$bound[data$num==i])]
# dist<-sqrt(vx^2+vy^2)
# vx<-vx/dist
# vy<-vy/dist
# text((data$x[data$num==i]+(vx*dist*dp)),(data$y[data$num==i]+(vy*dist*dp)),data$seq[data$num==i])
}
}
}
title(main)
}
#### Converts .CT Files to coordinate files ####
# Input - pos,seq,before,after,bound2,pos
# Output - pos,seq,x,y
pseudoKnot <- function(ctDat) {
### Given a CT data it identifies pairing that are from pseudoknots ###
### removes the pseudo bps from input and returns the input dataset ###
### Add a new pseudo knot dataset ###
input <- ctDat
pseudo <- NULL
pk <- 1
for (i in ctDat$pos) {
bound <- ctDat$bound[ctDat$pos == i]
if (bound != 0 & i < bound) {
for (j in i:bound) {
nbound <- ctDat$bound[ctDat$pos == j]
if (nbound != 0) {
if (nbound > bound | nbound < i) {
input$bound[input$pos == j] <- 0
input$bound[input$pos == nbound] <- 0
pseudo$pos[pk] <- j
pseudo$bound[pk] <- nbound
pk <- pk + 1
}
}
}
}
}
out <- NULL
pseudo <- as.data.frame(pseudo)
psk <- unique(pseudo$pos)
outpk <- NULL
sk <- 1
for (pk in psk) {
outpk$pos[sk] <- pk
bound <- pseudo$bound[pseudo$pos == pk]
bound <- bound[1]
outpk$bound[sk] <- bound
sk <- sk + 1
}
if (length(psk) > 0) {
#### Find stems in pseudo ###
stems <- NULL
d1 <- diff(outpk$bound)
d2 <- diff(outpk$pos)
ind <- 1:
length(d1)
r1 <- ind[d1 != -1]
r2 <- ind[d2 != 1]
r <- c(r1, r2)
r <- unique(r)
r <- sort(r)
s <- 1
for (i in 1:length(r)) {
tmp <- NULL
if (i == 1) {
tmp$pos <- outpk$pos[1:r[1]]
tmp$bound <- outpk$bound[1:r[1]]
} else {
tmp$pos <- outpk$pos[(r[(i - 1)] + 1):r[i]]
tmp$bound <- outpk$bound[(r[(i - 1)] + 1):r[i]]
}
stems[[s]] <- tmp
s <- s + 1
}
l <- length(r)
lp <- length(outpk$pos)
tmp$pos <- outpk$pos[(r[l] + 1):lp]
tmp$bound <- outpk$bound[(r[l] + 1):lp]
stems[[s]] <- tmp
#### Create Compatibility Table ##
compm <- NULL
m1t <- 1
for (i in 1:s) {
for (j in 1:s) {
comp <- 0
st1 <- stems[[i]]
st2 <- stems[[j]]
mb1 <- min(c(st1$bound))
mp1 <- min(c(st1$pos))
mb2 <- min(c(st2$bound))
mp2 <- min(c(st2$pos))
mm2 <- min(c(mp2, mb2))
mx2 <- max(c(mp2, mb2))
### Incompat Checks ##
print(paste(mb1, mp1, mm2, mx2))
if (mp1 < mx2 & mp1 > mm2 & mb1 < mm2) {
comp <- 1
} else {
if (mp1 < mx2 & mp1 > mm2 & mb1 > mx2) {
comp <- 1
} else {
if (mb1 < mx2 & mb1 > mm2 & mp1 > mx2) {
comp <- 1
} else {
if (mb1 < mx2 & mb1 > mm2 & mp1 < mm2) {
comp <- 1
} else {
}
}
}
}
compm$i[m1t] <- i
compm$j[m1t] <- j
compm$c[m1t] <- comp
m1t <- m1t + 1
}
}
comp <- as.data.frame(compm)
### Find largest compatible set ###
# For all i & j in the set they are all compatible
# Find all pairwise compatible sets connected graphs
mySets <- NULL
l <- c(1:length(stems))
#### Each stem is assignd to a set ###
mys <- 1
while (length(l) > 0) {
s1 <- l[1]
s <- comp[comp$c == 0 & comp$i == s1, ]
print(l)
### Remove all s$j from l ##
v <- s$j[1]
for (j in s$j) {
v <- c(v, j)
l <- l[l != j]
}
mySets[[mys]] <- v
mys <- mys + 1
}
#### Find the biggest set and put it back in ###
lst <- NULL
for (i in 1:length(stems)) {
tmp <- stems[[i]]
tmp <- tmp$pos
lst[i] <- length(tmp)
}
lst2 <- NULL
for (i in 1:length(mySets)) {
tmp <- mySets[[i]]
tmp <- tmp[2:length(tmp)]
lst2[i] <- sum(lst[tmp])
}
print(lst2)
largest <- c(1:length(lst2))[lst2 == max(lst2)]
largest <- mySets[[largest]]
largest <- largest[2:length(largest)]
print(largest)
addBack <- NULL
tmp <- stems[[largest[1]]]
addBack$pos <- tmp$pos
addBack$bound <- tmp$bound
if (length(largest) > 1) {
for (i in largest[2:length(largest)]) {
tmp <- stems[[i]]
addBack$pos <- c(addBack$pos, tmp$pos)
addBack$bound <- c(addBack$bound, tmp$bound)
}
}
for (pos in addBack$pos) {
bnd <- addBack$bound[addBack$pos == pos]
input$bound[input$pos == pos] <- bnd
input$bound[input$pos == bnd] <- pos
}
addBack <- as.data.frame(addBack)
outpk <- as.data.frame(outpk)
for (pos in addBack$pos) {
outpk <- outpk[outpk$pos != pos, ]
}
} else {
outpk <- NULL
}
out[[1]] <- outpk
out[[2]] <- input
out
}
loadCt <- function(file) {
input <- read.table(file = file, skip = 1)
names(input) <- c("pos", "seq", "before", "after", "bound", "pos2")
input$seq <- as.character(input$seq)
input
}
makeCt <- function(struct, seq) {
nts <- strsplit(seq, "")
nts <- nts[[1]]
stc <- strsplit(struct, "")
stc <- stc[[1]]
out <- NULL
for (i in 1:length(stc)) {
out$pos[i] <- i
out$before[i] <- (i - 1)
out$after[i] <- (i + 1)
out$seq[i] <- nts[i]
out$pos2[i] <- i
if (stc[i] == ".") {
out$bound[i] <- 0
} else {
if (stc[i] == ")") {
b <- backward(stc, i)
out$bound[i] <- b
} else {
if (stc[i] == "(") {
b <- forward(stc, i)
out$bound[i] <- b
}
}
}
}
out <- as.data.frame(out)
out
}
forward <- function(stc, i) {
k <- 1
go <- 1
out <- 0
if (stc[i] == "(") {
i <- i + 1
for (j in i:length(stc)) {
if (go == 1) {
if (stc[j] == ")") {
k <- k - 1
if (k == 0) {
out <- j
go <- 0
}
} else {
if (stc[j] == "(") {
k <- k + 1
}
}
}
}
}
out
}
backward <- function(stc, i) {
k <- 1
go <- 1
out <- 0
if (stc[i] == ")") {
i <- i - 1
for (j in i:1) {
if (go == 1) {
if (stc[j] == "(") {
k <- k - 1
if (k == 0) {
out <- j
go <- 0
}
} else {
if (stc[j] == ")") {
k <- k + 1
}
}
}
}
}
out
}
ct2knet <- function(file, ind = 0) {
dat <- loadCt(file)
out <- ""
seq <- dat$seq[(ind + 1):length(dat$seq)]
seq <- paste(seq, collapse = "")
out <- paste(">", file, "\n", sep = "")
out <- paste(out, seq, "\n\n", sep = "")
out <- paste(out, "$ list\n", sep = "")
for (i in 1:dim(dat)[1]) {
if (dat$bound[i] != 0) {
out <- paste(out, " ", (dat$pos[i] - ind), " ", (dat$bound[i] - ind), "\n", sep = "")
}
}
out
}
validCT <- function(ct) {
messages <- list()
w <- 1
if (
!("before" %in% names(ct)) ||
!("after" %in% names(ct)) ||
!("pos" %in% names(ct)) ||
!("seq" %in% names(ct)) ||
!("bound" %in% names(ct)) ||
!("pos2" %in% names(ct))
) {
messages[[w]] <- "CT File does not have correct column names"
w <- w + 1
}
if (min(ct$pos) < 1) {
messages[[w]] <- "CT File has pos value < 1"
w <- w + 1
}
if (min(ct$pos2) < 1) {
messages[[w]] <- "CT File has pos2 value < 1"
w <- w + 1
}
if (TRUE %in% duplicated(ct$pos)) {
messages[[w]] <- "CT File has multiple rows for nucleotide at same position."
w <- w + 1
}
if (TRUE %in% duplicated(ct$pos2)) {
messages[[w]] <- "CT File has duplicate pos2 values"
w <- w + 1
}
if (TRUE %in% duplicated(ct$bound[ct$bound != 0])) {
messages[[w]] <- "CT File has duplicate bound values"
w <- w + 1
}
## FAIL: Check equal open/closed pairs
ss1 <- ct2ss(ct)
ss2 <- ct2ss(ct, cleanup = FALSE)
if (ss1 != ss2) {
messages[[w]] <- paste("CT File has secondary structure incompatible with bracket notation.\nCleaned up:", ss1, "\nIn CT File:", ss2, sep = "")
w <- w + 1
}
## WARN: Check gap is >= 1nts
if (min(abs(ct$pos - ct$bound)) < 2) {
messages[[w]] <- "Gap between bound NTs is less than 2"
w <- w + 1
}
messages
}
ct2coord <- function(input) {
messages <- validCT(input)
all <- NULL
if (length(messages) == 0) {
group <- 0
firstrun <- 1
### First NT in the file is at 0,0 ####
#### Create two fake base-pairs in the begining to make it work ###
mnp <- min(input$pos)
map <- max(input$pos)
a1 <- map + 1
a2 <- map + 2
b1 <- mnp - 1
b2 <- mnp - 2
new1 <- NULL
new1$pos[1] <- a1
new1$before[1] <- (a1 - 1)
new1$after[1] <- (a1 + 1)
new1$seq[1] <- "A"
new1$pos2[1] <- a1
new1$bound[1] <- b1
new2 <- NULL
new2$pos[1] <- a2
new2$before[1] <- (a2 - 1)
new2$after[1] <- (a2 + 1)
new2$seq[1] <- "A"
new2$pos2[1] <- a2
new2$bound[1] <- b2
new3 <- NULL
new3$pos[1] <- b1
new3$before[1] <- (b1 - 1)
new3$after[1] <- (b1 + 1)
new3$seq[1] <- "A"
new3$pos2[1] <- b1
new3$bound[1] <- a1
new4 <- NULL
new4$pos[1] <- b2
new4$before[1] <- (b2 - 1)
new4$after[1] <- (b2 + 1)
new4$seq[1] <- "A"
new4$pos2[1] <- b2
new4$bound[1] <- a2
input <- rbind(new1, new2, input, new3, new4)
input <- as.data.frame(input)
mnp <- min(input$pos)
output <- NULL
nextNT <- input[input$pos == mnp, ]
if (nextNT$bound != 0) {
#### Set Coordinates to (0,0) and (0,1) #####
output$pos[1] <- mnp
output$x[1] <- 0
output$y[1] <- 0
output$pos[2] <- nextNT$bound
output$x[2] <- 0
output$y[2] <- 1
}
stems <- NULL
stems[[1]] <- c(output$pos[1], output$pos[2])
j <- 3
prev <- mnp
npos <- input$after[input$pos == mnp]
nextNT <- input[input$pos == npos, ]
mp <- max(input$pos)
while (length(stems) > 0) {
newstems <- NULL
newloops <- NULL
ns <- 1
nl <- 1
for (i in 1:length(stems)) {
s1 <- stems[[i]]
#### Use stemCoord to generate stem coordinates ###
p1 <- s1[1]
p2 <- s1[2]
if (firstrun == 1) {
x3 <- -1
y3 <- 0
firstrun <- 0
} else {
### If prev.x < this.x p3=-1 ##
prev <- input$before[input$pos == p1]
x3 <- output$x[output$pos == prev]
y3 <- output$y[output$pos == prev]
}
x1 <- output$x[output$pos == p1]
y1 <- output$y[output$pos == p1]
x2 <- output$x[output$pos == p2]
y2 <- output$y[output$pos == p2]
sout <- stemCords(input, p1, p2, x1[1], y1[1], x2[1], y2[1], x3[1], y3[1])
sdat <- sout[[1]]
sdat <- as.data.frame(sdat)
sdat <- sdat[sdat$pos != p1, ]
sdat <- sdat[sdat$pos != p2, ]
l <- dim(sdat)[1]
if (l > 0) {
s <- length(output$pos)
s <- s + 1
for (sd in 1:length(sdat$pos)) {
output$pos[s] <- sdat$pos[sd]
output$x[s] <- sdat$x[sd]
output$y[s] <- sdat$y[sd]
output$group[s] <- group
s <- s + 1
}
group <- group + 1
}
newloops[[nl]] <- sout[[2]]
nl <- nl + 1
}
##### Loop through all loop starts ###
newstems <- NULL
for (i in 1:length(newloops)) {
lps <- newloops[[i]]
lp1 <- loopLength(input, lps[1])
#### Add stems to newstems ###
if (length(lp1) > 1) {
nstm <- lp1[[2]]
for (nt in 1:length(nstm)) {
newstems[[ns]] <- nstm[[nt]]
ns <- ns + 1
}
}
#### Get Coordinates for each position and add to output ###
pp <- input$before[input$pos == lps[1]]
lout <- genCords(lp1, lps[1], lps[2], output, 1)
pt <- lp1[[1]]
tout <- lout
lpl <- length(pt$pos)
pt$pos <- pt$pos[lpl:1]
lout$pos <- pt$pos
lout <- as.data.frame(lout)
lout <- lout[lout$pos != lps[1], ]
lout <- lout[lout$pos != lps[2], ]
s <- length(output$pos)
s <- s + 1
for (lo in 1:length(lout$pos)) {
output$pos[s] <- lout$pos[lo]
output$x[s] <- lout$x[lo]
output$y[s] <- lout$y[lo]
output$group[s] <- group
s <- s + 1
}
group <- group + 1
}
stems <- newstems
}
my <- min(output$y) - 5
xy <- max(output$y) + 5
mx <- min(output$x) - 5
xx <- max(output$x) + 5
# plot(output$x,output$y,type="n",ylim=c(my,xy),xlim=c(mx,xx))
# for(i in output$pos){
# b<-input$after[input$pos==i]
# points(c(output$x[output$pos==i],output$x[output$pos==b]),c(output$y[output$pos==i],output$y[output$pos==b]),type="l")
# text(output$x[output$pos==i],output$y[output$pos==i],i)
# }
output <- as.data.frame(output)
input <- as.data.frame(input)
all <- merge(output, input, by = "pos")
names(all)[1] <- "num"
all
### Remove first two and last two ##
rmax <- dim(all)[1]
rmax <- rmax - 2
all <- all[3:rmax, ]
} else {
print(messages)
}
all
}
stemCords <- function(input, p1, p2, x1, y1, x2, y2, x3, y3) {
##### INput is a CT file
##### P1 is the position of the 5' end of the stem
##### P2 is the position of its bound partner
##### P3 is the orientation of the stem -1 or 1
#### Given the first BasePair of a stem it fills in the rest of the stem until
#### It comes to an unbound member
output <- NULL
output$pos[1] <- p1
output$x[1] <- x1
output$y[1] <- y1
output$pos[2] <- p2
output$x[2] <- x2
output$y[2] <- y2
ends <- -1
#### Determine perpendicular vector ###
vperp <- NULL
vperp$x <- x3 - x1
vperp$y <- y3 - y1
v <- rotateS((x2 - x1), (y2 - y1), 0, 0, (pi / 2))
m <- sqrt(v[1]^2 + v[2]^2) + sqrt(vperp$x^2 + vperp$y^2)
ang <- acos((v[1] * vperp$x + v[2] * vperp$y) / m)
if (ang < pi / 2) {
v <- rotateS((x2 - x1), (y2 - y1), 0, 0, (-1 * pi / 2))
}
vect <- NULL
vect$x <- v[1]
vect$y <- v[2]
nv <- sqrt((vect$x)^2 + (vect$y)^2)
vect$x <- vect$x / nv
vect$y <- vect$y / nv
prev1 <- p1
j <- 3
mf <- length(input$pos) + 5
mp <- max(input$pos)
while (j < mf) {
npos <- input$after[input$pos == prev1]
if (npos < (mp + 1)) {
nextNT <- input[input$pos == npos, ]
if (dim(nextNT)[1] < 1) {
j <- mf + 5
} else {
if (nextNT$bound == 0) {
##### Position is not bound done ####
## Return the last position as Ends #
bound <- input$bound[input$pos == prev1]
ends <- c(prev1, bound)
j <- mf + 5
} else {
pbpos <- input$bound[input$pos == prev1]
pbafter <- input$before[input$pos == pbpos]
bpos <- input$bound[input$pos == npos]
if (pbafter == bpos) {
### Check that prev bound is on the same chain ##
output$pos[j] <- npos
output$x[j] <- output$x[output$pos == prev1] + vect$x[1]
output$y[j] <- output$y[output$pos == prev1] + vect$y[1]
j <- j + 1
if (j > 3) {
pbpos <- input$bound[input$pos == prev1]
bpos <- input$bound[input$pos == npos]
output$pos[j] <- bpos
output$x[j] <- output$x[output$pos == pbpos] + vect$x[1]
output$y[j] <- output$y[output$pos == pbpos] + vect$y[1]
j <- j + 1
}
} else {
ends <- c(prev1, pbpos)
j <- mf + 5
}
prev1 <- npos
#################################################
}
}
} else {
j <- mf + 5
}
### Second NT is at 0,1 ###
}
l <- NULL
l[[1]] <- output
l[[2]] <- ends
l
}
loopLength <- function(input, start) {
#### Determine how many NTs there are before another base-pair appears ####
### Loop Ends when Get to previous NT's bound or the last NT of the chain #
stems <- NULL
s <- 1
first <- input[input$pos == start, ]
mf <- length(input$pos)
mp <- max(input$pos)
j <- 0
ntot <- 1
output <- NULL
output$pos[1] <- first$pos
i <- 2
nextNT <- NULL
nextNT <- input[input$pos == first$pos, ]
bound <- 1
while (j < mf) {
if (nextNT$bound != 0 & bound == 0) {
npos <- nextNT$bound
bound <- 1
### Save To Stems ###
### Determine vector toward outside of circle ##
stems[[s]] <- c(nextNT$pos, npos)
s <- s + 1
} else {
bound <- 0
npos <- input$after[input$pos == nextNT$pos]
}
nextNT <- input[input$pos == npos, ]
if (nextNT$pos == first$bound) {
j <- mf
output$pos[i] <- nextNT$pos
} else {
if (nextNT$pos == mp) {
ntot <- ntot + 1
j <- mf
output$pos[i] <- nextNT$pos
} else {
ntot <- ntot + 1
output$pos[i] <- nextNT$pos
j <- j + 1
i <- i + 1
}
}
}
### Returns Pos, X,Y, foreach element in the loop ###
out <- NULL
out[[1]] <- output
out[[2]] <- stems
out
}
rotateS <- function(x2, y2, x0, y0, ang) {
pt <- NULL
x1 <- x2 - x0
y1 <- y2 - y0
pt[1] <- x1 * cos(ang) - y1 * sin(ang) + x0
pt[2] <- y1 * cos(ang) + x1 * sin(ang) + y0
pt
}
circleCoord <- function(n) {
ang <- 2 * pi / (n)
output <- NULL
output$y[1] <- 0
output$x[1] <- 1
for (i in 1:(n - 1)) {
v <- rotateS(1, 0, 0, 0, (i * ang))
j <- i + 1
output$x[j] <- v[1] + output$x[(j - 1)]
output$y[j] <- v[2] + output$y[(j - 1)]
}
### Rotate ##
output
}
genCords <- function(loop, p1, p2, input, vn) {
#### Given the loop coordinates for the first two points in generates all others ####
dat <- loop[[1]]
n <- length(dat$pos)
### Get Coordinates of p1,p2 ###
c <- circleCoord(n)
### Translate p1 to 0,0 ###
### Calcualte angle between p2 and x-axis
pt1 <- NULL
pt2 <- NULL
pt1$x <- input$x[input$pos == p1]
pt1$y <- input$y[input$pos == p1]
pt2$x <- input$x[input$pos == p2]
pt2$y <- input$y[input$pos == p2]
pt2 <- translate(pt2$x, pt2$y, pt1$x, pt1$y)
ang <- atan2(pt2$y, pt2$x)
ang
### Rotate Circ coordinates and translate ###
if (vn == 1) {
#### Flip over y=axis ###
c$y <- -1 * c$y
}
c <- rotateV(c$x, c$y, 0, 0, ang)
c <- translate(c$x, c$y, (-1 * pt1$x), (-1 * pt1$y))
c
}
translate <- function(x1, y1, x2, y2) {
out <- NULL
out$x <- x1 - x2
out$y <- y1 - y2
out
}
rotateV <- function(x2, y2, x0, y0, ang) {
## Rotates and translates a vector ###
pt <- NULL
x1 <- x2 - x0
y1 <- y2 - y0
pt$x <- x1 * cos(ang) - y1 * sin(ang) + x0
pt$y <- y1 * cos(ang) + x1 * sin(ang) + y0
pt
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.