#' Construct the upper-/lower case representation of LNA-oligos by modification-input
#'
#' This function merges sequence and modification for a LNA-oligo
#' into the upper-/lower case representation.
#'
#' @param line.in a character vector of length 2 where the first element is the sequence
#' (e.g. ACGTGTTT) and the second element the modification (e.g. LLLDDDLL).
#' @keywords LNA representation
#' @export
#' @examples
#' seqmod2uplow(c("ACGTGTTT","LLLDDDLL"))
#' @seealso \code{\link{seqmod2plus}}
seqmod2uplow <- function(line.in) {
line.in <- toupper(line.in) #added for robustness
tf.dna <- strsplit(line.in[2],"")[[1]]=="D"
ol.in <- strsplit(line.in[1],"")[[1]]
tf.u <- ol.in=="U"
ol.in[tf.u] <- "T"
ol.in[tf.dna] <- tolower(ol.in[tf.dna])
ol.in <- paste(ol.in, collapse="")
ol.in
}
#' Construct the plus (+) representation of LNA-oligos
#'
#' This function merges sequence and modification for a LNA-oligo
#' into the plus representation.
#'
#' @param line.in a character vector of length 2 where the first element is the sequence
#' (e.g. ACGTGTTT) and the second element the modification (e.g. LLLDDDLL).
#' @keywords LNA representation
#' @export
#' @examples
#' seqmod2plus(c("ACGTGTTT","LLLDDDLL"))
#' @seealso \code{\link{seqmod2uplow}}
seqmod2plus <- function(line.in) {
line.in <- toupper(line.in) #added for robustness
tf.dna <- strsplit(line.in[2],"")[[1]]=="D"
ol.in <- strsplit(line.in[1],"")[[1]]
tf.u <- ol.in=="U"
ol.in[tf.u] <- "T"
ol.in[!tf.dna] <- paste("+",ol.in[!tf.dna],sep="")
ol.in <- paste(ol.in, collapse="")
ol.in
}
#' Construct the upper-/lower case representation of LNA-oligos by flank-input
#'
#' This function uses input on flank lengths to construct a upper-/lower case representation
#' of a gapmer.
#'
#' @param o character string (the sequence) to be converted to gapmer format.
#' @param leftflank size of left LNA-flank.
#' @param rightflank size of right LNA flank.
#' @export
#' @examples
#' sapply(c("ACGTGTTT","TCCGGAAT"),gapmerize)
#' @seealso \code{\link{seqmod2uplow}}
gapmerize <- function(o, leftflank=3, rightflank=3){
left = substr(o,1,leftflank)
right = substr(o,nchar(o)-rightflank+1,nchar(o))
middle = substr(o, leftflank+1, nchar(o)-rightflank)
gapmer=paste(toupper(left),tolower(middle),toupper(right), sep='')
names(gapmer)=names(o)
return(gapmer)
}
#' Another version of seqmod2uplow
#' @export
seqdesign2camel <- function(seqs, mods) {
seqs=toupper(seqs)
camelcases = c()
for (i in 1:length(seqs)){
seq = seqs[[i]]
mod = mods[[i]]
if (nchar(seq)!=nchar(mod)){
stop(paste(seq, "and", mod, "not of equal length, at",i))
}
dna <- strsplit(mod,"")[[1]]=="D"
seq.split = strsplit(seq, "")[[1]]
seq.split[dna] = tolower(seq.split[dna])
camelcase = paste(seq.split, collapse="")
camelcases[i]=camelcase
}
return(camelcases)
}
#' Construct the sequence representation of LNA-oligos used in the Pythia database
#'
#' This function merges sequence, modification, and backbone for a LNA-oligo
#' into the representation used in the Pythia database.
#'
#' @param line.in a character vector of length 3 where the first element is the sequence
#' (e.g. ACGTGTTT) and the second element the modification (e.g. LLLDDDLL), and the
#' final element is the backbone ("SSOSSOSH"). Notice all elements have same length.
#' The backbone elements are between nucleotides and always have H in the end.
#' @keywords LNA representation
#' @export
#' @examples
#' a <- seqmod2pythia( c("acgtacgtee", "LLDDLDLDDD", "SSOSSOOSSH"))
#' @seealso \code{\link{pythia2seqmod}}
seqmod2pythia <- function(line.in) {
## split line.in so it is easy to follow the objects
seq.in <- tolower(line.in[1])
lna.in <- toupper(line.in[2])
bb.in <- toupper(line.in[3])
## split sequence and replace e with mc
seq.out <- strsplit(seq.in,"")[[1]]
seq.out <- gsub("e","mc", seq.out)
## split LNA pattern and replace with pythia terminology
lna.out <- strsplit(lna.in,"")[[1]]
lna.out <- gsub("D","dna", lna.out)
lna.out <- gsub("L","oxy", lna.out)
lna.out <- gsub("M","moe", lna.out)
## combine sequence and LNA
seq.comb <- paste(lna.out,seq.out,sep="")
## split backbone
bb.out <- strsplit(bb.in,"")[[1]]
## combine backbone with rest
seq.full <- paste(seq.comb, bb.out,sep=":")
seq.full <- paste(seq.full, collapse=";")
## replace oxyc with oxymc (since it is always that version with LNA)
seq.full <- gsub("oxyc","oxymc", seq.full)
return(seq.full)
}
#' Split up the sequence representation of LNA-oligos used in the Pythia database
#'
#' This function splits up the sequence representation used in the Pythia database
#' into sequence, modification, and backbone for a LNA-oligo.
#'
#' @param line.in a character vector of length 1 with something like
#' oxya:S;oxymc:S;dnag:O;dnat:S;oxya:S;dnac:O;oxyg:O;dnat:S;dnamc:S;dnamc:H.
#' @keywords LNA representation
#' @export
#' @examples
#' a <- pythia2seqmod("oxya:S;oxymc:S;dnag:O;dnat:S;oxya:S;dnac:O;oxyg:O;dnat:S;dnamc:S;dnamc:H")
#' @seealso \code{\link{seqmod2pythia}}
pythia2seqmod <- function(line.in) {
## convert input to lowercase for robustness and split into nucleotides
line.in <- tolower(line.in)
line.split <- strsplit(line.in,";")[[1]]
## identify backbone after the :
bb.out <- paste(unlist(lapply(strsplit(line.split,":"), function(x) {x[2]})),collapse="")
## identify sequence AND modification and spliit it based on position
flna <- function(x) {switch(x, oxy = "L",dna = "D",moe = "M")}
seqlna.out <- unlist(lapply(strsplit(line.split,":"), function(x) {x[1]}))
seqlna.out <- gsub("oxymc","oxyc",seqlna.out) #LNAs do not write E but C
seq.out <- substr(seqlna.out,4,10)
seq.out <- paste(gsub("mc","e",seq.out),collapse="")
lna.out <- substr(seqlna.out,1,3)
lna.out <- paste(sapply(lna.out, flna),collapse="")
## return everything in the right case
return(c(seq.out, toupper(lna.out), toupper(bb.out)))
}
#' Filter LNA-oligos and construct the upper-/lower case representation based on modification-input
#'
#' This function filters LNA-oligos based on sequence, sugar modification and backbone and returns
#' the upper-/lower case representation. Only oligos suitable for the oligo-predictors are accepted.
#' All nucleobases other than A, C, G, T, U and E, sugar modifications other than L and D and backbones other
#' than S (phosphorothioate) are excluded and result in NA.
#'
#' @param line.in a character vector of length 3 where the first element is the sequence
#' (e.g. ACGTGTTT), the second element the modification (e.g. LLLDDDLL) and the third
#' is the backbone (e.g. SSSSSSSS).
#' @keywords LNA representation
#' @export
#' @examples
#' seqmod2uplow.strict(c("ACGTGTTT","LLLDDDLL","SSSSSSSS"))
#' @seealso \code{\link{seqmod2uplow}} \code{\link{seqmod2plus}}
#'
seqmod2uplow.strict<-function (line.in)
{
line.in <- toupper(line.in)
line.OK<-all( # Check whether line is compliant
grepl("^[ACGTEU]+$", line.in[1]), #Filter out all nucleobases other than A, C, G, T, U and E
grepl("^[LD]+$", line.in[2]), #Filter out all sugar modifications other than L and D
grepl("^[S]+$", line.in[3]), #Filter out all backbones other than S
grepl("^[X]+$", line.in[4]) #Filter out all stereo defined
)
if(line.OK){ # Determine upper-lower case notation for compliant lines
tf.dna <- strsplit(line.in[2], "")[[1]] == "D"
ol.in <- strsplit(line.in[1], "")[[1]]
tf.u <- ol.in == "U"
ol.in[tf.u] <- "T"
tf.e<-ol.in == "E"
ol.in[tf.e] <- "C"
ol.in[tf.dna] <- tolower(ol.in[tf.dna])
ol.in <- paste(ol.in, collapse = "")
} else{ # Set non-compliant lines as NA
ol.in<-NA
}
ol.in
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.