R/findit.R

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# CHNOSZ/findit.R
# find the minimum or maximum of a objective 
# (eg cv, richness, shannon)
# as a function the specified chemical potentials
# 20100831 jmd

findit <- function(lims=list(), objective="CV", niter=NULL, iprotein=NULL, plot.it=TRUE,
  T=25, P="Psat", res=NULL, labcex=0.6, loga2=NULL,
  loga.balance=0, rat=NULL, balance=NULL, normalize=FALSE) {
  # the lims list has the limits of the arguments to affinity()
  # we iteratively move toward a higher/lower value of the objective
  # within these limits
  
  # fun stuff: when running on either side of 100 deg C,
  # set P=1 to force an extreme of some functions near that temperature
  
  # the number of dimensions
  nd <- length(lims)

  # get protein lengths if iprotein is given
  if(!is.null(iprotein)) pl <- protein.length(iprotein)

  # how many gradations in each dimension we can timely 
  # consider goes down with the number of dimensions
  if(is.null(res)) res <- c(128,64,16,8,6,4,4)[nd]
  if(is.null(niter)) niter <- c(4,6,6,8,12,12,12)[nd]
  # the size fraction of the new intervals after each iteration
  # we can zoom in more quickly if the gradations are smaller
  if(is.null(rat)) {
    rat <- 0.95
    if(res > 4) rat <- 0.9
    if(res > 8) rat <- 0.8
    if(res > 16) rat <- 0.7
  }

  # the initial values of the guesses (if midpoint==FALSE)
  basis <- get("thermo")$basis

  # a hack so that we can use pH as a variable
  if("pH" %in% names(lims)) {
    iH <- match("H+",rownames(basis))
    if(length(iH) > 0) {
      rownames(basis)[iH] <- "pH"
      basis$logact[iH] <- -basis$logact[iH]
    } else(stop("pH is a requested variable but H+ is not in the basis"))
  }

  # function to get the current limits of a variable
  limfun <- function(lim,curr,i) {
    if(i==1) mylims <- lim
    else {
      # in the ith loop we consider intervals
      # of rat^(i-1) of the ranges specified at start
      message(paste("optimal:",round(curr,4),"",""), appendLF=FALSE)
      range <- range(lim)
      int <- abs(diff(range)) * rat^(i-1)
      mylims <- c(curr-int/2, curr+int/2)
      # don't go beyond the starting limits
      if(any(mylims < min(range))) mylims <- mylims + (min(range) - min(mylims))
      if(any(mylims > max(range))) mylims <- mylims - (max(mylims) - max(range))
      # reverse the order if the axis is reversed
      if(diff(lim) < 0) mylims <- rev(mylims)
    }
    message(paste("new limits:",round(mylims[1],4),round(mylims[2],4)))
    return(mylims)
  }

  # place to put the output values
  teststat <- numeric()
  out <- vector("list",length(lims))
  names(out) <- names(lims)
  lolim <- out
  hilim <- out
  outlims <- lims

  # loop for the specified number of iterations
  # (todo: loop until an error threshhold is reached)
  for(i in 1:niter) {
    message(paste("\n###### findit: iteration",i,"of",niter))
    # to generate the argument list for affinity
    # with the variables specified in lims
    aargs <- list()
    for(j in 1:length(lims)) {
      if(names(lims)[j] %in% rownames(basis)) {
        ibasis <- match(names(lims)[j],rownames(basis))
        message(paste("###",rownames(basis)[ibasis],""), appendLF=FALSE)
        # the starting search limits for this species
        lim <- lims[[j]]
        # center the search interval on the current values
        curr <- basis$logact[ibasis]
        # update the argument list with the intervals
        myarg <- list(c(limfun(lim,curr,i),res))
        names(myarg) <- rownames(basis)[ibasis]
        aargs <- c(aargs,myarg)
      } else if(names(lims[j]) %in% c("T","P")) {
        message(paste("###",names(lims[j]),""), appendLF=FALSE)
        if(names(lims[j])=="T") {
          lim <- lims$T
          curr <- T
        } else {
          lim <- lims$P
          curr <- P
        }
        myarg <- list(c(limfun(lim,curr,i),res))
        names(myarg) <- names(lims[j])
        aargs <- c(aargs,myarg)
      } else warning(paste("findit: ignoring",names(lims[j]),"which is not a basis species, T or P"))
    }
    # we include single values of T and P in the arguments
    # if their limits weren't given
    if(!"T" %in% names(lims)) aargs <- c(aargs,list(T=T))
    if(!"P" %in% names(lims)) aargs <- c(aargs,list(P=P))
    # include iprotein if given
    if(!is.null(iprotein)) {
      aargs <- c(aargs,list(iprotein=iprotein))
    }

    # now calculate the affinities
    a <- do.call(affinity,aargs)
    # then calculate the values of the objective function
    e <- equilibrate(a, balance=balance, loga.balance=loga.balance, normalize=normalize)
    dd <- revisit(e, objective, loga2=loga2, plot.it=FALSE)$H
    # coordinates of the extreme value (take only the first set of coords)
    iopt <- optimal.index(dd, objective)[1,, drop=FALSE]
    # the extreme value
    teststat <- c(teststat,dd[iopt])

    # loop to update the current parameters
    for(j in 1:length(lims)) {
      # the current limits for this variable
      # we ignore T and P if they are not variable:
      mylims <- aargs[[j]]
      # the increments used
      myinc <- seq(mylims[1],mylims[2],length.out=mylims[3])
      # the value of the variable at the extreme of the function
      myval <- myinc[iopt[j]]
      # update the basis table, T or P
      if(names(lims)[j] %in% rownames(basis)) {
        ibasis <- match(names(lims)[j],rownames(basis))
        basis$logact[ibasis] <- myval
        basis(rownames(basis)[ibasis],myval)
      } else if(names(lims)[j]=="T") {
        T <- myval
      } else if(names(lims)[j]=="P") {
        P <- myval
      }
      # update the output list
      out[[j]] <- c(out[[j]],myval)
      lolim[[j]] <- c(lolim[[j]],mylims[1])
      hilim[[j]] <- c(hilim[[j]],mylims[2])
      outlims[[j]] <- mylims
    }

    # are we making a plot?
    if(plot.it) {
      # add our search lines and extreme points
      # to the plot
      if(nd==1) {
        if(i==1) revisit(e,objective,loga2,xlim=lims[[1]])
        # on a 1-D diagram we add vertical lines to show our search limits
        abline(v=outlims[[1]][1:2])
        lines(myinc,dd)
        points(myval,dd[iopt])
      } else if(nd==2) {
        if(i==1) revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],labcex=labcex)
        else {
          # on a 2-D diagram we add a box around our search limits
          # and an updated map for this region
          ol1 <- outlims[[1]]
          ol2 <- outlims[[2]]
          rect(ol1[1],ol2[1],ol1[2],ol2[2],border=par("fg"),col="white")
          revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],add=TRUE,labcex=labcex)
        }
        text(out[[1]],out[[2]])
        points(out[[1]],out[[2]],cex=2)
      } else {
        if(i==1) add <- FALSE else add <- TRUE
        # we only draw a 2-D diagram even if we're optimizing
        # for more than two variables
        # paint an empty box over the search area
        if(i > 1) {
          ol1 <- outlims[[1]]
          ol2 <- outlims[[2]]
          rect(ol1[1],ol2[1],ol1[2],ol2[2],border=par("fg"),col="white")
        }
        # we have to extract slices for dimensions greater than two
        # we extract the third dimension until only two remain
        for(j in 3:nd) {
          # ai[j] - which slice in this dimension has the extremum
          for(k in 1:length(e$loga.equil)) e$loga.equil[[k]] <- slice(e$loga.equil[[k]],3,iopt[j])
        }
        # now make the plot
        revisit(e,objective,loga2,xlim=lims[[1]],ylim=lims[[2]],add=add,labcex=labcex)
        # indicate the location of the extremum
        text(out[[1]],out[[2]])
        points(out[[1]],out[[2]],cex=2)
      }
      # are we making a legend?
      # it looks something like
      #  n     qqr     O2     H2O   CO2   NH3
      #  1   0.993 -85.21  -6.378 2.032 -6.93
      # ...
      # 12   0.997 -85.26  -6.488 3.111 -6.50
    }
  }  # end main loop
  # build return list: values of chemical variables and test statistic
  teststat <- list(teststat)
  names(teststat) <- objective
  value <- c(out,teststat)
  # build return list: limits at each step
  out <- list(value=value,lolim=lolim,hilim=hilim)
  class(out) <- "findit"
  return(out)
}

# to plot results from findit()
plot_findit <- function(x,which=NULL,mar=c(3.5,5,2,2),xlab="iteration",...) {
  # show the values of the test statistic and of the
  # chemical variables at each iteration
  l <- length(x$value)
  if(is.null(which)) which <- 1:l
  l <- length(which)
  opar <- par(mfrow=c(l,1),mar=mar) 
  for(i in which) {
    niter <- length(x$value[[i]])
    ylab <- names(x$value)[i]
    if(ylab %in% c(rownames(get("thermo")$basis),"T","P","pH","Eh")) ylab <- axis.label(ylab)
    # the values
    plot(1:niter,x$value[[i]],xlab=xlab,ylab=ylab,...)
    lines(1:niter,x$value[[i]])
    # the search intervals
    # (not available for the test statistic)
    if(i!=length(x$value)) {
      lines(1:niter,x$lolim[[i]],lty=2)
      lines(1:niter,x$hilim[[i]],lty=2)
    }
  }
  par(opar)
}

Questions? Problems? Suggestions? or email at ian@mutexlabs.com.

Please suggest features or report bugs with the GitHub issue tracker.

All documentation is copyright its authors; we didn't write any of that.