calc.dh.ds <- function(oligos, targets=NULL, parameters.in=NULL, parallel=F, cores=NA) {
### function that makes the complement of the oligo
### needed when target=NULL so that target = complement.internal(oligo)
###
complement.internal <- function(x) {
x <- tolower(x)
x <- gsub("a","T",x)
x <- gsub("t","A",x)
x <- gsub("c","G",x)
x <- gsub("g","C",x)
return(tolower(x))
}
### function that makes the reverse of a character vector,
### needed for checking self-complementarity of oligos for symmetry correction
###
rev.internal <- function(x) {
x <- paste(rev(strsplit(x,"")[[1]]),collapse="")
return(x)
}
### function that calculates dh and ds for a single entry
### x is a character vector of length 2 (oligo seq, 5'-3', target seq, 3'-5')
###
calc.single.dh.ds <- function(x) {
### function to split character vector into two-letter units (dinucleotides)
split.dnt <- function(char.in) {
n <- nchar(char.in)
char.out <- substring(char.in,1:(n-1),2:n)
}
### split oligo and target, combine
oligo.split <- split.dnt(x[1])
target.split <- split.dnt(x[2])
comb.split <- paste(oligo.split,target.split,sep="|") #these now match rownames in thermopar (parameters.in)
### get flanks (the first and last nucleotide, these also contribute to dh and ds in addition to their contribution as part of dinucleotides)
n <- nchar(x[1])
oligo.flanks <- paste("flank",c(substr(x[1],1,1),substr(x[1],n,n)),sep=".")
target.flanks <- paste("flank",c(substr(x[2],1,1),substr(x[2],n,n)),sep=".")
comb.flanks <- paste(oligo.flanks,target.flanks,sep="|") #these now match rownames in thermopar (parameters.in)
### include symmetri correction in case oligo is self-complementary
olg.revcomp <- rev.internal(complement.internal(x[1]))
if (olg.revcomp==x[1]) {
comb.split <- c(comb.split,"symmetry")
}
### lookup
out.df <- na.omit(parameters.in[c(comb.split, comb.flanks),])
if (nrow(out.df)!=(n+1) & nrow(out.df)!=(n+2)) { #are there n-1 dinucleotides and 2 nucleotide flanks (n+1) and perhaps symmetry (n+2)??
stop("Some elements in the oligo could not be found in the lookup table (thermopar)")
}
out.dh <- sum(out.df$enthalpy.cal.mol)
out.ds <- sum(out.df$entropy.cal.mol.K)
out <- c(dh=out.dh, ds=out.ds)
return(out)
}
### check input
###
## check thermodynamic parameters
if (is.null(parameters.in)) { #if function input has not been changed from default
## If thermopar can not be found in the global environment, load it
if (sum(c("thermopar")%in%ls(globalenv()))!=1) {
data(thermopar, package="affinity")
}
parameters.in <- thermopar
}
## check oligos
l.oligo <- length(oligos)
if (sum(!is.na(oligos))!=l.oligo) {
stop("Some entries in oligo are NA")
}
if (sum(oligos!="")!=l.oligo) {
stop("Some entries in oligo are empty")
}
uni.nt <- unique(unlist(strsplit(oligos,"")))
if (length(uni.nt)!=sum( uni.nt%in%c("A","T","C","G","a","t","c","g"))) { #important, only these letters are allowed!!
stop("Some characters in the oligo are not A,T,C,G,a,t,c,g")
}
## check targets
## make target sequences if they are not input
if (is.null(targets[1])) {
targets <- complement.internal(oligos)
} else {
if (sum(nchar(oligos)==nchar(targets))!=length(oligos)) {
stop("oligo lengths and target lengths do not match")
}
}
### do the actual calculations
###
if (parallel) {
## set number of cores (if NA, set to maximal as detected)
if (is.na(cores)) {
no.cores <- detectCores()
} else {
no.cores <- cores
}
options(warn = -1)
## make the number no.cores of parallel R sessions
cl <- suppressPackageStartupMessages(makeCluster(no.cores))
## export the functions and objects needed in the spawned processes to make calc.single.dh.ds run
clusterExport(cl=cl, varlist=c("complement.internal","rev.internal", "parameters.in"), envir=environment())
## do the parallel computation, and terminate processes
out <- parRapply(cl=cl, x=cbind(oligos,targets), FUN=calc.single.dh.ds)
stopCluster(cl)
## format out similar to when not doing parallel
out <- as.data.frame(matrix(out,ncol=2, byrow=T))
colnames(out) <- c("dh","ds")
options(warn=0)
} else {
## call the function to calculate dh and ds on each oligo (and target)
out <- as.data.frame(t(apply(cbind(oligos,targets),1,calc.single.dh.ds)))
colnames(out) <- c("dh","ds")
}
return(out)
}
calc.dg <- function(dh, ds, temp.c.in=37) {
temp.k <- temp.c.in+273.15
dg <- dh - temp.k*ds
return(dg)
}
calc.dg.internal <- function(dh.ds.in, temp.c.in=37) {
dg <- calc.dg(dh=dh.ds.in$dh,ds=dh.ds.in$ds, temp.c.in=temp.c.in)
return(dg)
}
calc.tm.internal <- function(dh.ds.in, conc.total=1e-5) {
R <- 1.986 #cal/(mol*K)
tm <- calc.tm(dh=dh.ds.in$dh,ds=dh.ds.in$ds,conc.oligo=conc.total/2, conc.rna=conc.total/2)
return(tm)
}
calc.tm <- function(dh, ds, conc.oligo=5e-4, conc.rna=5e-4) {
R <- 1.986 #cal/(mol*K)
if (conc.oligo>conc.rna) {
tm <- dh/(ds + R*log(conc.oligo - conc.rna/2))
} else {
tm <- dh/(ds + R*log(conc.rna - conc.oligo/2))
}
tm <- as.numeric(tm - 273.15)
return(tm)
}
calc.po.dtm <- function(oligos, conc.total=1e-5, parallel=F, cores=NA) {
dhds.asdna <- calc.dh.ds(oligo=tolower(oligos), parallel=parallel, cores=cores)
tm.asdna <- calc.tm.internal(dh.ds.in=dhds.asdna, conc.total=conc.total)
l <- nchar(oligos)-1
tm.sub <- tm.asdna/l
dtm <- -0.4762 * tm.sub + 0.789
dtm[dtm>0] <- 0
dtm[dtm< -2] <- -2
dtm <- dtm * l
return(dtm)
}
calc.k <- function(dg, temp.c.in) {
R <- 1.986 #cal/(mol*K)
temp.k <- temp.c.in+273.15
k.out <- exp(-dg/(R*temp.k))
return(k.out)
}
calc.k.internal <- function(dh.ds.in, temp.c.in=37) {
dg <- calc.dg.internal(dh.ds.in=dh.ds.in, temp.c.in=temp.c.in)
k.out <- calc.k(dg=dg, temp.c.in=temp.c.in)
return(k.out)
}
calculate.affinity <- function(oligos, conc.total=1e-5, temp.c.in=37, parallel=F, cores=NA) {
## make sure oligos is a character vector
oligos <- as.character(oligos)
## are there any NAs?
tf <- is.na(oligos)
## if not all entries are NAs...
if (sum(tf)!=length(tf)) {
oligos.full <- oligos
oligos <- na.omit(oligos)
vecna <- rep(NA, length(oligos.full))
y <- data.frame(tm=vecna, tm.unadj=vecna, dg=vecna, dh=vecna, ds=vecna, k=vecna)
## calculate enthaply and entropy
dhds.out <- calc.dh.ds(oligo=oligos, parallel=parallel, cores=cores)
## calculate free energy
dg.out <- calc.dg.internal(dh.ds.in=dhds.out, temp.c.in=temp.c.in)
## calculate melting temperature without adjustment for phosphorothiate link
tm.out <- calc.tm.internal(dh.ds.in=dhds.out, conc.total=conc.total)
## calculate tm adjusted for phosphorothiate link
tm.adj.out <- tm.out + calc.po.dtm(oligos=oligos, conc.total=conc.total, parallel=parallel, cores=cores)
## calculate equilibrium constant at temp.c.in degree celcius
k.out <- calc.k.internal(dh.ds.in=dhds.out, temp.c.in=temp.c.in)
y[!tf,] <- data.frame(tm=signif(tm.adj.out,3), tm.unadj=signif(tm.out,3), dg=dg.out, dh=dhds.out$dh, ds=dhds.out$ds, k=signif(k.out,3))
rownames(y) <- 1:nrow(y)
} else {
vecna <- rep(NA, sum(tf))
y <- data.frame(tm=vecna, tm.unadj=vecna, dg=vecna, dh=vecna, ds=vecna, k=vecna)
rownames(y) <- 1:nrow(y)
}
## collect all calculations and return
return(y)
}
calculate.rnaseh.activity <- function(oligos.lna) {
data(rnaseh_param, package="affinity")
states.di <- paste(rep(c("a","c","g","t"),each=4),rep(c("a","c","g","t"),4), sep="")
states.tri <- paste(rep(states.di, each=4), rep(c("a","c","g","t"),16), sep="")
states.quad <- paste(rep(states.tri, each=4), rep(c("a","c","g","t"),64), sep="")
split.oligo.quad <- function(x) {
l <- nchar(x)
a <- table(factor(substring(x, 1:(l - 3), 4:l),
levels = states.quad))
return(a)
}
oligos.lna <- as.character(oligos.lna)
## are there any NAs?
tf <- is.na(oligos.lna)
## if not all entries are NAs...
if (sum(tf)!=length(tf)) {
oligos.full <- oligos.lna
oligos <- na.omit(oligos.lna)
count.quad.all <- t(sapply(as.character(oligos), split.oligo.quad))
num <- as.numeric(round(count.quad.all%*%vec.quad[colnames(count.quad.all)]+vec.quad["(Intercept)"],0))
num[num<0] <- 0
num[num>100] <- 100
num.all <- rep(NA, length(oligos.full))
num.all[!tf] <- num
num <- num.all
} else {
num <- rep(NA, sum(tf))
}
return(num)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.