R/affinity.R

calc.dh.ds <- function(oligos, targets=NULL, parameters.in=NULL, parallel=F, cores=NA) {
  
  ### function that makes the complement of the oligo
  ### needed when target=NULL so that target = complement.internal(oligo)
  ###
  complement.internal <- function(x) {
    x <- tolower(x)
    x <- gsub("a","T",x)
    x <- gsub("t","A",x)
    x <- gsub("c","G",x)
    x <- gsub("g","C",x)
    return(tolower(x))
  }
  
  ### function that makes the reverse of a character vector,
  ### needed for checking self-complementarity of oligos for symmetry correction
  ###
  rev.internal <- function(x) {
    x <- paste(rev(strsplit(x,"")[[1]]),collapse="")
    return(x)
  }
  
  ### function that calculates dh and ds for a single entry
  ### x is a character vector of length 2 (oligo seq, 5'-3', target seq, 3'-5') 
  ###
  calc.single.dh.ds <- function(x) {
    ### function to split character vector into two-letter units (dinucleotides)
    split.dnt <- function(char.in) {
      n <- nchar(char.in)
      char.out <- substring(char.in,1:(n-1),2:n)
    }
    ### split oligo and target, combine
    oligo.split <- split.dnt(x[1])
    target.split <- split.dnt(x[2])
    comb.split <- paste(oligo.split,target.split,sep="|") #these now match rownames in thermopar (parameters.in)
    
    ### get flanks (the first and last nucleotide, these also contribute to dh and ds in addition to their contribution as part of dinucleotides)
    n <- nchar(x[1])
    oligo.flanks <- paste("flank",c(substr(x[1],1,1),substr(x[1],n,n)),sep=".")
    target.flanks <- paste("flank",c(substr(x[2],1,1),substr(x[2],n,n)),sep=".")
    comb.flanks <-  paste(oligo.flanks,target.flanks,sep="|") #these now match rownames in thermopar (parameters.in)
    
    ### include symmetri correction in case oligo is self-complementary
    olg.revcomp <- rev.internal(complement.internal(x[1]))
    if (olg.revcomp==x[1]) {
      comb.split <- c(comb.split,"symmetry")
    }
    
    ### lookup
    out.df <- na.omit(parameters.in[c(comb.split, comb.flanks),])
    if (nrow(out.df)!=(n+1) & nrow(out.df)!=(n+2)) {  #are there n-1 dinucleotides and 2 nucleotide flanks (n+1) and perhaps symmetry (n+2)??
      stop("Some elements in the oligo could not be found in the lookup table (thermopar)")
    }
    out.dh <- sum(out.df$enthalpy.cal.mol)
    out.ds <- sum(out.df$entropy.cal.mol.K)
    out <- c(dh=out.dh, ds=out.ds)
    return(out)
  }
  
  ### check input
  ### 
  
  ## check thermodynamic parameters
  if (is.null(parameters.in)) {   #if function input has not been changed from default
    ## If thermopar can not be found in the global environment, load it
    if (sum(c("thermopar")%in%ls(globalenv()))!=1) {
      data(thermopar, package="affinity")
    }
    parameters.in <- thermopar
  }
  
  ## check oligos
  l.oligo <- length(oligos)
  if (sum(!is.na(oligos))!=l.oligo) {
    stop("Some entries in oligo are NA")
  }
  if (sum(oligos!="")!=l.oligo) {
    stop("Some entries in oligo are empty")
  }
  uni.nt <- unique(unlist(strsplit(oligos,"")))
  if (length(uni.nt)!=sum( uni.nt%in%c("A","T","C","G","a","t","c","g"))) { #important, only these letters are allowed!!
    stop("Some characters in the oligo are not A,T,C,G,a,t,c,g")
  }
  
  ## check targets
  ## make target sequences if they are not input
  if (is.null(targets[1])) {
    targets <- complement.internal(oligos)
  } else {
    if (sum(nchar(oligos)==nchar(targets))!=length(oligos)) {
      stop("oligo lengths and target lengths do not match")
    }
  }
  
  ### do the actual calculations
  ### 
  if (parallel) {
    ## set number of cores (if NA, set to maximal as detected) 
    if (is.na(cores)) {
      no.cores <- detectCores()
    } else {
      no.cores <- cores
    }
    
    options(warn = -1)
    ## make the number no.cores of parallel R sessions
    cl <- suppressPackageStartupMessages(makeCluster(no.cores))
    
    ## export the functions and objects needed in the spawned processes to make calc.single.dh.ds run
    clusterExport(cl=cl, varlist=c("complement.internal","rev.internal", "parameters.in"), envir=environment())
    
    ## do the parallel computation, and terminate processes
    out <- parRapply(cl=cl, x=cbind(oligos,targets), FUN=calc.single.dh.ds)
    stopCluster(cl)
    
    ## format out similar to when not doing parallel
    out <- as.data.frame(matrix(out,ncol=2, byrow=T))
    colnames(out) <- c("dh","ds")
    options(warn=0)
    
  } else {
    ##  call the function to calculate dh and ds on each oligo (and target)
    out <- as.data.frame(t(apply(cbind(oligos,targets),1,calc.single.dh.ds)))
    colnames(out) <- c("dh","ds")
  }
  return(out)
}



calc.dg <- function(dh, ds, temp.c.in=37) {
  temp.k <- temp.c.in+273.15
  dg <- dh - temp.k*ds
  return(dg)
}

calc.dg.internal <- function(dh.ds.in, temp.c.in=37) {
  dg <- calc.dg(dh=dh.ds.in$dh,ds=dh.ds.in$ds, temp.c.in=temp.c.in)
  return(dg)
}

calc.tm.internal <- function(dh.ds.in, conc.total=1e-5) {
  R <- 1.986  #cal/(mol*K)
  tm <- calc.tm(dh=dh.ds.in$dh,ds=dh.ds.in$ds,conc.oligo=conc.total/2, conc.rna=conc.total/2)
  return(tm)
}

calc.tm <- function(dh, ds, conc.oligo=5e-4, conc.rna=5e-4) {
  R <- 1.986 #cal/(mol*K)
  if (conc.oligo>conc.rna) {
    tm <- dh/(ds + R*log(conc.oligo - conc.rna/2))
  } else {
    tm <- dh/(ds + R*log(conc.rna - conc.oligo/2))
  }
  tm <- as.numeric(tm - 273.15)
  return(tm)
}

calc.po.dtm <- function(oligos, conc.total=1e-5, parallel=F, cores=NA) {
  dhds.asdna <- calc.dh.ds(oligo=tolower(oligos), parallel=parallel, cores=cores) 
  tm.asdna <- calc.tm.internal(dh.ds.in=dhds.asdna, conc.total=conc.total)
  l <- nchar(oligos)-1
  tm.sub <- tm.asdna/l
  dtm <- -0.4762 * tm.sub + 0.789
  dtm[dtm>0] <- 0
  dtm[dtm< -2] <- -2
  dtm <- dtm * l
  return(dtm)
}

calc.k <- function(dg, temp.c.in) {
  R <- 1.986 #cal/(mol*K)
  temp.k <- temp.c.in+273.15
  k.out <- exp(-dg/(R*temp.k))
  return(k.out)
}

calc.k.internal <- function(dh.ds.in, temp.c.in=37) {
  dg <- calc.dg.internal(dh.ds.in=dh.ds.in, temp.c.in=temp.c.in)
  k.out <- calc.k(dg=dg, temp.c.in=temp.c.in)
  return(k.out)
}

calculate.affinity <- function(oligos, conc.total=1e-5, temp.c.in=37, parallel=F, cores=NA) {
  ## make sure oligos is a character vector 
  oligos <- as.character(oligos)
  
  ## are there any NAs?
  tf <- is.na(oligos)
  
  ## if not all entries are NAs...
  if (sum(tf)!=length(tf)) {
    oligos.full <- oligos
    oligos <- na.omit(oligos)
    
    vecna <- rep(NA, length(oligos.full))
    y <- data.frame(tm=vecna, tm.unadj=vecna, dg=vecna, dh=vecna, ds=vecna, k=vecna)
      
    ## calculate enthaply and entropy
    dhds.out <- calc.dh.ds(oligo=oligos, parallel=parallel, cores=cores)
    
    ## calculate free energy
    dg.out <- calc.dg.internal(dh.ds.in=dhds.out, temp.c.in=temp.c.in)
    
    ## calculate melting temperature without adjustment for phosphorothiate link
    tm.out <- calc.tm.internal(dh.ds.in=dhds.out, conc.total=conc.total)
    
    ## calculate tm adjusted for phosphorothiate link
    tm.adj.out <- tm.out + calc.po.dtm(oligos=oligos, conc.total=conc.total, parallel=parallel, cores=cores)
    
    ## calculate equilibrium constant at temp.c.in degree celcius
    k.out <- calc.k.internal(dh.ds.in=dhds.out, temp.c.in=temp.c.in)
    y[!tf,] <- data.frame(tm=signif(tm.adj.out,3), tm.unadj=signif(tm.out,3), dg=dg.out, dh=dhds.out$dh, ds=dhds.out$ds, k=signif(k.out,3))
    rownames(y) <- 1:nrow(y)
  } else {
    vecna <- rep(NA, sum(tf))
    y <- data.frame(tm=vecna, tm.unadj=vecna, dg=vecna, dh=vecna, ds=vecna, k=vecna)
    rownames(y) <- 1:nrow(y)
  }
  
  ## collect all calculations and return
  return(y)
}

calculate.rnaseh.activity <- function(oligos.lna) {
  data(rnaseh_param, package="affinity")
  states.di <- paste(rep(c("a","c","g","t"),each=4),rep(c("a","c","g","t"),4), sep="") 
  states.tri <- paste(rep(states.di, each=4), rep(c("a","c","g","t"),16), sep="")
  states.quad <- paste(rep(states.tri, each=4), rep(c("a","c","g","t"),64), sep="")
  
  split.oligo.quad <- function(x) {
    l <- nchar(x)
    a <- table(factor(substring(x, 1:(l - 3), 4:l), 
                      levels = states.quad))
    return(a)
  }
  
  oligos.lna <- as.character(oligos.lna)
  
  ## are there any NAs?
  tf <- is.na(oligos.lna)
  
  ## if not all entries are NAs...
  if (sum(tf)!=length(tf)) {
    oligos.full <- oligos.lna
    oligos <- na.omit(oligos.lna)
  
    count.quad.all <- t(sapply(as.character(oligos), split.oligo.quad))
  
    num <- as.numeric(round(count.quad.all%*%vec.quad[colnames(count.quad.all)]+vec.quad["(Intercept)"],0))
    num[num<0] <- 0
    num[num>100] <- 100
    
    num.all <- rep(NA, length(oligos.full))
    num.all[!tf] <- num
    num <- num.all
  } else {
    num <- rep(NA, sum(tf))
  }
  return(num)
}
Santaris/affinity documentation built on May 9, 2019, 12:43 p.m.