R/info.R

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# CHNOSZ/info.R
# search database for species names or formulas
# and retrieve thermodynamic properties of species
# 20061024 extraced from species.R jmd
# 20120507 code rewrite and split into info.[character,approx,numeric];
#   these functions expect arguments of length 1; 
#   info() handles longer arguments

info <- function(species=NULL, state=NULL, check.it=TRUE) {
  ## return information for one or more species in thermo$obigt
  ## if no species are requested, summarize the available data  20101129
  thermo <- get("thermo")
  if(is.null(species)) {
    message("info: 'species' is NULL; summarizing information about thermodynamic data...")
    message(paste("thermo$obigt has", nrow(thermo$obigt[thermo$obigt$state=="aq", ]), "aqueous,",
      nrow(thermo$obigt), "total species"))
    message(paste("number of literature sources: ", nrow(thermo$refs), ", elements: ",
      nrow(thermo$element), ", buffers: ", length(unique(thermo$buffers$name)), sep=""))
    message(paste("number of proteins in thermo$protein is", nrow(thermo$protein), "from",
      length(unique(thermo$protein$organism)), "organisms"))
    # print information about SGD.csv, ECO.csv, HUM.csv
    more.aa(organism="Sce")
    more.aa(organism="Eco")
    #pdata.aa(organism="HUM")
    # print information about yeastgfp.csv
    yeastgfp()
    return()
  }
  ## run info.numeric or info.character depending on the input type
  if(is.numeric(species)) {
    out <- lapply(species, info.numeric, check.it)
    # if we different states the column names could be different
    if(length(unique(unlist(lapply(out, names)))) > ncol(thermo$obigt)) {
      # make them the same as thermo$obigt
      out <- lapply(out, function(row) {
        colnames(row) <- colnames(thermo$obigt); return(row)
      }) 
    }
    # turn the list into a data frame
    out <- do.call(rbind, out)
  } else {
    # state and species should be same length
    if(!is.null(state)) {
      lmax <- max(length(species), length(state))
      state <- rep(state, length.out=lmax)
      species <- rep(species, length.out=lmax)
    }
    # loop over the species
    out <- sapply(seq_along(species), function(i) {
      # first look for exact match
      ispecies <- info.character(species[i], state[i])
      # if no exact match and it's not a protein, show approximate matches (side effect of info.approx)
      if(identical(ispecies, NA) & !grepl("_", species[i])) ispecies.notused <- info.approx(species[i], state[i])
      # do not accept multiple matches
      if(length(ispecies) > 1) ispecies <- NA
      return(ispecies)
    })
  }
  ## all done!
  return(out)
}

### unexported functions ###

info.text <- function(ispecies) {
  # a textual description of species name, formula, source, e.g.
  # CO2 [CO2(aq)] (SSW01, SHS89, 11.Oct.07)
  this <- get("thermo")$obigt[ispecies, ]
  sourcetext <- this$ref1
  ref2 <- this$ref2
  if(!is.na(ref2)) sourcetext <- paste(sourcetext, ref2, sep=", ")
  date <- this$date
  if(!is.na(date)) sourcetext <- paste(sourcetext, date, sep=", ")
  out <- paste(this$name, " [", this$formula, "(", this$state, ")", "] (", sourcetext, ")", sep="")
  return(out)
}

info.character <- function(species, state=NULL, check.protein=TRUE) {
  # returns the rownumbers of thermo$obigt having an exact match of 'species' to
  # thermo$obigt$[species|abbrv|formula] or NA otherwise
  # a match to thermo$obigt$state is also required if 'state' is not NULL
  # (first occurence of a match to species is returned otherwise)
  thermo <- get("thermo")
  # all matches for the species
  matches.species <- thermo$obigt$name==species | 
    thermo$obigt$abbrv==species | thermo$obigt$formula==species
  # since thermo$obigt$abbrv contains NAs, convert NA results to FALSE
  matches.species[is.na(matches.species)] <- FALSE
  # turn it in to no match if it's a protein in the wrong state
  ip <- pinfo(species)
  if(any(matches.species) & !is.na(ip) & !is.null(state)) {
    matches.state <- matches.species & grepl(state, thermo$obigt$state)
    if(!any(matches.state)) matches.species <- FALSE
  }
  # no match, not available
  if(!any(matches.species)) {
    # unless it's a protein
    if(check.protein) {
      # did we find a protein? add its properties to obigt
      if(!is.na(ip)) {
        # here we use a default state from thermo$opt$state
        if(is.null(state)) state <- thermo$opt$state
        # add up protein properties
        eos <- protein.obigt(ip, state=state)
        # the real assignment work 
        nrows <- suppressMessages(mod.obigt(eos))
        thermo <- get("thermo", "CHNOSZ")
        matches.species <- rep(FALSE, nrows)
        matches.species[nrows] <- TRUE
      } else return(NA)
    } else return(NA)
  }
  # do we demand a particular state
  if(!is.null(state)) {
    # special treatment for H2O: aq retrieves the liq
    if(species %in% c("H2O", "water") & state=="aq") state <- "liq"
    # the matches for both species and state
    matches.state <- matches.species & grepl(state, get("thermo")$obigt$state)
    if(!any(matches.state)) {
      # the requested state is not available for this species
      available.states <- thermo$obigt$state[matches.species]
      if(length(available.states)==1) a.s.verb <- "is" else a.s.verb <- "are"
      a.s.text <- paste("'", available.states, "'", sep="", collapse=" ")
      message("info.character: requested state '", state, "' for ", species, 
        " but only ", a.s.text, " ", a.s.verb, " available")
      return(NA)
    }
    matches.species <- matches.state
  }
  # all of the species that match
  ispecies <- which(matches.species)
  # we return only the first species that matches
  # unless they are 'cr1' 'cr2' etc. and we requested state 'cr'
  if(identical(state, "cr")) ispecies.out <- ispecies
  else ispecies.out <- ispecies[1]
  # let user know if there is more than one state for this species
  if(length(ispecies) > length(ispecies.out)) {
    ispecies.other <- ispecies[!ispecies %in% ispecies.out]
    othertext <- paste(thermo$obigt$state[ispecies.other], collapse=", ")
    message("info.character: found ", species, "(", thermo$obigt$state[ispecies.out], 
      "), also available in ", othertext)
  }
  return(ispecies.out)
}

info.numeric <- function(ispecies, check.it=TRUE) {
  # from a numeric species index in 'ispecies' return the 
  # thermodynamic properties and equations-of-state parameters
  thermo <- get("thermo")
  # if we're called with NA, return an empty row
  if(is.na(ispecies)) {
    this <- thermo$obigt[1,]
    this[] <- NA
    return(this)
  }
  this <- thermo$obigt[ispecies,]
  # species indices must be in range
  ispeciesmax <- nrow(thermo$obigt)
  if(ispecies > ispeciesmax | ispecies < 1) 
    stop(paste("species index", ispecies, "not found in thermo$obigt\n"))
  # remove scaling factors on EOS parameters depending on state
  # use new obigt2eos function here
  this <- obigt2eos(this, this$state)
  # identify any missing GHS values
  naGHS <- which(is.na(this[8:10]))
  # a missing one of G, H or S can cause problems for subcrt calculations at high T
  if(length(naGHS)==1) {
    # calculate a single missing one of G, H, or S from the others
    GHS <- as.numeric(GHS(as.character(this$formula), G=this[,8], H=this[,9], S=this[,10]))
    message("info.numeric: ", colnames(this)[8:10][naGHS], " of ",
      this$name, "(", this$state, ") is NA; set to ", round(GHS[naGHS],2))
    this[, naGHS+7] <- GHS[naGHS]
  } 
  # now perform consistency checks for GHS and EOS parameters if check.it=TRUE
  if(check.it) {
    # check GHS if they were all present
    if(length(naGHS)==0) calcG <- checkGHS(this)
    # check tabulated heat capacities against EOS parameters
    calcCp <- checkEOS(this, this$state, "Cp")
    # fill in NA heat capacity
    if(!is.na(calcCp) & is.na(this$Cp)) {
      message("info.numeric: Cp of ", this$name, "(", this$state, ") is NA; set by EOS parameters to ", round(calcCp, 2))
      this$Cp <- as.numeric(calcCp)
    }
    # check tabulated volumes - only for aq (HKF equation)
    if(identical(this$state, "aq")) {
      calcV <- checkEOS(this, this$state, "V")
      # fill in NA volume
      if(!is.na(calcV) & is.na(this$V)) {
        message("info.numeric: V of ", this$name, "(", this$state, ") is NA; set by EOS parameters to ", round(calcV, 2))
        this$V <- as.numeric(calcV)
      }
    }
  } # done checking
  # all done!
  return(this)
}

info.approx <- function(species, state=NULL) {
  # returns species indices that have an approximate match of 'species'
  # to thermo$obigt$[name|abbrv|formula], 
  # possibly restricted to a given state
  thermo <- get("thermo")
  if(!is.null(state)) this <- thermo$obigt[thermo$obigt$state==state, ]
  else this <- thermo$obigt
  # only look for fairly close matches
  max.distance <- 0.1
  approx.name <- agrep(species, this$name, max.distance)
  approx.abbrv <- agrep(species, this$abbrv, max.distance)
  approx.formula <- agrep(species, this$formula, max.distance)
  approx.species <- unique(c(approx.name, approx.abbrv, approx.formula))
  if(!is.na(approx.species[1])) {
    # show the names of the species
    if(length(approx.species)==1) {
      message("info.approx: '", species, "' is similar to ", info.text(approx.species))
    } else {
      napprox.max <- 100
      exttext <- ":"
      if(length(approx.species) > napprox.max) exttext <- paste(" (showing first ", napprox.max, ")", sep="")
      message("info.approx: '", species, "' is ambiguous; has approximate matches to ", 
        length(approx.species), " species", exttext)
      printout <- capture.output(print(thermo$obigt$name[head(approx.species, napprox.max)]))
      message(paste(printout, collapse="\n"))
    }
    return(approx.species)
  }
  # if we got here there were no approximate matches
  message("info.approx: '", species, "' has no approximate matches")
  return(NA)
}

Questions? Problems? Suggestions? or email at ian@mutexlabs.com.

Please suggest features or report bugs with the GitHub issue tracker.

All documentation is copyright its authors; we didn't write any of that.