JMDplots: Plots from Papers by Jeffrey M. Dick

Documented in utogig1

# JMDplots/utogig.R
# Plots for perspective paper on geochemical biology
# 20200418 jmd first version (review of Bison Pool and amino acid synthesis [AS98])
# 20210516 Add methanogen tree
# 20210727 Add nH2O-Zc overview plot (deleted)
# 20220220 Make biological hierarchy diagram (deleted)
# 20220420 Move to JMDplots

# Create bold axis labels
Zclab <- quote(bolditalic(Z)[bold(C)])
Tlab <- quote(bolditalic(T)~bold("("*degree*C*")"))
Alab <- quote(bold("Affinity"~(kJ~"(mol C)"^{-1})))
Aresiduelab <- quote(bold("Affinity"~(kJ~"(mol residue)"^{-1})))
Toptlab <- quote(bolditalic(T)[bold(opt)]~bold("("*degree*C*")"))
logaH2maxlab <- quote(bold(log)~bolditalic(a)*bold(H[2])[bold("for maximum activity")])
logalab <- quote(bold(log)~bolditalic(a))
logaH2lab <- quote(bold(log)~bolditalic(a)*bold(H[2]))
logaNH4lab <- quote(bold(log)~bolditalic(a)*bold(NH[4]^"+"))
nH2Olab <- quote(bolditalic(n)*bold(H[2]*O))

# Identify species used in Shock & Canovas (2010)
# Used in Energy() and calc_logaH2_intermediate()
specieslist <- list(
  "C1 and C2" = c("CH4", "methanol", "formaldehyde", "CO", "formic acid",
           "ethane", "ethylene", "ethyne", "ethanol", "acetaldehyde", "acetic acid", "glycolic acid", "oxalic acid"),
  Acid = c("formic acid", "acetic acid", "propanoic acid", "n-hexanoic acid", "n-nonanoic acid", "n-dodecanoic acid"),
  "Amino acid" = c("glycine", "alanine", "valine", "leucine", "aspartic acid", "asparagine", "glutamic acid", "glutamine", "serine", "proline", "phenylalanine", "tryptophan"),
  Sugar = c("ribose", "ribulose", "deoxyribose", "glucose"),
  Nucleobase = c("adenine", "guanine", "thymine", "cytosine"),
  # TCA cycle metabolites added 20211110
  "TCA cycle" = c("pyruvate", "oxaloacetate-2", "citrate-3", "cis-aconitate-3", "isocitrate-3", "a-ketoglutarate-2", "succinate-2", "fumarate-2", "malate-2")
)

# Get seawater from Amend & Shock (1998)
Seawater.AS98 <- data.frame(T = 18, CO2 = log10(1e-4), H2 = log10(2e-9), pH = -log10(5e-9), "NH4+" = log10(5e-8), H2S = log10(1e-15), check.names = FALSE)

# Make semi-transparent colors 20220223
col4 <- adjustcolor(4, alpha.f = 0.69)
col8 <- adjustcolor(8, alpha.f = 0.69)
col2 <- adjustcolor(2, alpha.f = 0.69)

# Read methanogen tree
methanogen_tree <- read.tree(system.file("extdata/utogig/methanogen_tree.txt", package = "JMDplots"))
# Match species names without underscore used for labeling the tree
methanogens <- gsub("_", " ", methanogen_tree$tip.label)
# Indices of Class I and Class II methanogens
iI <- 20:36
iII <- 1:19
# Assign point symbols and colors for methanogens
mcol <- mbg <- mpch <- rep(NA, length(methanogens))
# Open red circle for Class I
mpch[iI] <- 21
mbg[iI] <- "transparent"
mcol[iI] <- col2
# Filled red circle for Methanocaldococcus 20220220
mbg[grepl("Methanocaldococcus", methanogens)] <- col2
# Grey-filled red circle for outliers
iout <- methanogens %in% c("Methanothermobacter marburgensis", "Methanothermobacter thermautotrophicus", "Methanopyrus kandleri")
mbg[iout] <- col8
mcol[iout] <- col2
# Filled blue square for Class II
mpch[iII] <- 22
mbg[iII] <- col4
mcol[iII] <- col4

## To make methanogen_AA.csv
## Read RefSeq amino acid compositions
## NOTE: taxids are in 'organism' column, species names are in 'ref' column
#refseq <- read.csv(system.file("RefDB/RefSeq_206/genome_AA.csv.xz", package = "JMDplots"), as.is = TRUE)
#irefseq <- match(methanogens, refseq$ref)
## Amino acid compositions of reference proteomes of methanogens
#methanogen_AA <- refseq[irefseq, ]
#write.csv(methanogen_AA, "methanogen_AA.csv", row.names = FALSE, quote = FALSE)

# Chemical analysis of reference proteomes of methanogens reveals adaptation to redox conditions 20210516
utogig1 <- function(pdf = FALSE) {

  if(pdf) pdf("Figure_1.pdf", width = 12, height = 8)
  layout(matrix(1:2, nrow = 1), widths = c(1.5, 0.7))
  par(mar = c(2, 0, 1, 32))

  # Read and rotate the tree (Class I at bottom) 20220629
  methanogen_tree <- read.tree(system.file("extdata/utogig/methanogen_tree.txt", package = "JMDplots"))
  labels <- rev(methanogen_tree$tip.label)
  methanogen_tree <- rotateConstr(methanogen_tree, labels)
  # Make the tree with colors to distinguish Class I and Class II
  edge.color <- c(rep(2, 37), rep(4, 37))
  pp <- plot.phylo(methanogen_tree, root.edge = TRUE, edge.color = edge.color, show.tip.label = FALSE, rotate.tree = 90)
  # Add tip labels 20220531
  labels <- gsub("_", " ", labels)
  FStxt <- "sp. FS406-22"
  iFS <- grep(FStxt, labels, fixed = TRUE)
  labels <- gsub(FStxt, "", labels, fixed = TRUE)
  dy <- -0.25
  text(3100, 1:36 + dy, labels, font = 3, adj = c(0, 0), xpd = NA)
  text(4170, iFS + dy, hyphen.in.pdf(FStxt), adj = c(0, 0), xpd = NA)

  # Calculate Zc from protein formulas
  methanogen_AA <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
  Zc <- ZC(protein.formula(methanogen_AA))

  # Add labels
  text(0, 32, "Class II", font = 2, adj = 0, cex = 1.2)
  text(0, 7, "Class I", font = 2, adj = 0, cex = 1.2)
  text(3200, -2.4, Zclab, adj = c(0, 0), xpd = NA)
  label.figure("(a)", cex = 1.6, font = 2, yfrac = 0.97)
  # Setup a new plot in the empty space between the tree and names
  par(new = TRUE)
  par(mar = c(2, 9, 1, 18))
  plot(range(Zc), c(1, length(Zc)), type = "n", ylab = "", axes = FALSE)
  axis(1, at = seq(-0.25, -0.15, 0.01), labels = FALSE, tcl = -0.3)
  axis(1, at = c(-0.25, -0.2, -0.15))
  axis(3, at = seq(-0.25, -0.15, 0.01), labels = FALSE, tcl = -0.3)
  # Plot guidelines 20220405
  abline(v = c(-0.25, -0.20, -0.15), lty = 2, col = 8)
  # Plot lines from Zc to organism name
  for(i in 1:36) lines(c(rev(Zc)[i], -0.145), c(i, i), lty = 3, col = "gray40")

  # Plot the Zc
  points(Zc, rev(c(iII, iI)), pch = mpch, col = mcol, bg = mbg, cex = 1.2)

  # Zc-Topt plot 20220224
  par(mar = c(9, 2, 7, 1))
  # Read Topt
  dat <- read.csv(system.file("extdata/utogig/Topt.csv", package = "JMDplots"))
  # Append Zc column
  methanogen_AA <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
  Zc <- ZC(protein.formula(methanogen_AA))
  dat$Zc <- Zc[match(dat$species, methanogens)]
  # Use par(xpd = NA) to show the y-axis label 20220401
  par(xpd = NA)
  plot(dat$Topt, dat$Zc, pch = mpch, col = mcol, bg = mbg, xlab = Toptlab, ylab = Zclab, ylim = c(-0.26, -0.14))
  par(xpd = FALSE)
  text(50, -0.24, "Class I", font = 2)
  text(40, -0.145, "Class II", font = 2)
  # Uncomment this to check the alignment of separate text items below
  #text(85, -0.168, "High-Zc\nthermophiles\n(Class I)", col = 2)
  text(85, -0.163, bquote(.(hyphen.in.pdf("High-"))*italic(Z)[C]))
  text(85, -0.1705, "thermophiles\n(Class I)")
  text(80, -0.257, "Methanocaldococcus", font = 3)
  # Add convex hulls 20220401
  dat2 <- dat[iII, ]
  i2 <- chull(dat2$Topt, dat2$Zc)
  polygon(dat2$Topt[i2], dat2$Zc[i2], col = "#90CAF960", border = NA)
  dat1 <- dat[setdiff(iI, which(iout)), ]
  i1 <- chull(dat1$Topt, dat1$Zc)
  polygon(dat1$Topt[i1], dat1$Zc[i1], col = "#E5737360", border = NA)
  label.figure("(b)", cex = 1.6, font = 2, yfrac = 0.85, xfrac = -0.05)

  if(pdf) dev.off()

}

# Relative stabilities of organic compounds depend on redox conditions 20211109
utogig2 <- function(pdf = FALSE, logact = -3) {

  if(missing(logact)) {
    if(pdf) pdf("Figure_2.pdf", width = 7, height = 6)
    mat <- matrix(c(1,1,2,2,3,3,4, 0,5,5,5,5,0,0), byrow = TRUE, nrow = 2)
    layout(mat, heights = c(1, 0.7), widths = c(1.1,1,1,1,1,1,1))
    # Activity of H2 to be used with logact = -3
    # (Second plot has zero slope)
    logaH2s <- c(-5, -10.262, -15)
  } else {
    if(pdf) pdf("Figure_S3.pdf", width = 7, height = 3.5)
    mat <- matrix(c(1,1,2,2,3,3,4), byrow = TRUE, nrow = 1)
    layout(mat, widths = c(1.1,1,1,1,1,1,1))
    # Activity of H2 to be used with logact = -6
    # (Second plot has zero slope)
    logaH2s <- c(-5, -10.351, -15)
  }
  par(mar = c(4, 4, 0.5, 1), mgp = c(2.7, 1, 0))

  # Set temperature
  T <- 25
  # Use seawater composition from Amend & Shock (1998)
  # NOTE: H2S is present but is not in any formation reactions 20220403
  basis(c("CO2", "H2", "NH4+", "H2O", "H2S", "H+"), c(Seawater.AS98$CO2, Seawater.AS98$H2, Seawater.AS98$"NH4+", 0, Seawater.AS98$H2S, -Seawater.AS98$pH))
  # Position of slope text
  slopey <- c(-112, -160, -50)
  # Place to store p-values
  pvals <- numeric()

  # Loop over logaH2 activities
  for(ilogaH2 in c(1, 2, 3)) {

    logaH2 <- logaH2s[ilogaH2]
    basis("H2", logaH2)

    # Start plot
    xlim <- c(-4, 3)
    ylim <- c(-310, 100)
    if(ilogaH2 == 1) ylab <- Alab
    if(ilogaH2 > 1) {
      ylab <- ""
      par(mar = c(4, 3, 0.5, 1))
    }

    plot(xlim, ylim, type = "n", xlab = Zclab, ylab = ylab, xaxt = "n")
    axis(1, gap.axis = 0)
    abline(h = 0, lty = 4)

    pchs <- c(21:25, 20)
    cols <- adjustcolor(c(7, 5, 8, 5, 3, 1), alpha.f = 0.69)

    # Loop over groups of species
    # Start with n-carboxylic acids (i = 2)
    AC_all <- Zc_all <- numeric()
    for(i in c(2, 1, 3, 4, 5, 6)) {

      # Load species
      myspecies <- specieslist[[i]]
      species(myspecies, logact)
      # Calculate affinities
      a <- affinity(T = T)
      A <- unlist(a$values)
      # Convert to Gibbs energy (J/mol)
      TK <- convert(T, "K")
      A <- -convert(A, "G", TK)
      # Convert to kJ/mol
      A <- A / 1000
      # Divide A by number of carbon
      nC <- sapply(makeup(info(myspecies)), "[", "C")
      AC <- A / nC
      # Calculate carbon oxidation state
      Zc <- ZC(info(myspecies))
      # Adjust point size for n-carboxylic acids
      cex <- 1
      if(i == 2) cex <- 1.5
      if(i < 6) points(Zc, AC, pch = pchs[i], bg = cols[i], cex = cex)
      else points(Zc, AC, pch = pchs[i], col = cols[i], cex = cex)
      AC_all <- c(AC_all, AC)
      Zc_all <- c(Zc_all, Zc)

    }

    # Calculate linear fit
    thislm <- lm(AC_all ~ Zc_all)
    x <- range(Zc_all)
    y <- predict.lm(thislm, data.frame(Zc_all = x))
    # Plot linear fit
    lines(x, y, lty = 2, lwd = 2, col = 8)
    # Add legend with slope
    slope <- thislm$coefficients[[2]]
    word <- "Slope"
    if(ilogaH2 == 1) {
      word <- "linear fit"
      text(-1.53, -95, "Slope of")
    }
    slopetxt <- bquote(.(word) == .(formatC(slope, digits = 3, format = "f")))
    if(round(slope, 3) == 0) slopetxt <- "Slope = 0"
    text(-0.5, slopey[ilogaH2], slopetxt)
    # Show R2 20220915
    R2 <- summary(thislm)$r.squared
    R2_txt <- bquote(italic(R)^2 == .(formatC(R2, digits = 2, format = "f")))
    text(-0.5, slopey[ilogaH2] - 20, R2_txt)
    # Calculate p-value 20220915
    pvals <- c(pvals, cor.test(AC_all, Zc_all)$p.value)

    # Add title: logaH2
    Htxt <- bquote(bold(log~bolditalic(a)*H[2] == .(logaH2)))
    title(Htxt, line = -1, cex.main = 1)
    if(ilogaH2 == 3) {
      # Add oxidation-intermediate-reduction arrow 20220221
      par(xpd = NA)
      x1 <- -21.5
      x2 <- 0
      dx <- 3.2
      rect(x1 - dx, -317, x2 + dx, -227, col = "white", lty = 3)
      x1.1 <- x1 + 6
      x2.1 <- x2 - 6
      arrows(x1.1, -300, x1, -300, col = "#D50000", length = 0.1, lwd = 2)
      arrows(x2.1, -300, x2, -300, col = "#366BFB", length = 0.1, lwd = 2)
      lines(c(x1.1, x2.1), c(-300, -300), col = 8, lwd = 2)
      # Uncomment this to check the alignment of separate text items below
      #text(x1, -288, "Reducing conditions:\nReduced compounds\nare relatively stable", adj = c(0.5, 0), col = 2)
      text(x1 + 1, -248, "Reducing conditions:", adj = c(0.5, 0), font = 3)
      text(x1 + 1, -288, "Reduced compounds are\nmore stable than oxidized ones", adj = c(0.5, 0))
      #text(x2, -288, "Oxidizing conditions:\nOxidized compounds\nare relatively stable", adj = c(0.5, 0), col = 2)
      text(x2 - 1, -248, "Oxidizing conditions:", adj = c(0.5, 0), font = 3)
      text(x2 - 1, -288, "Oxidized compounds are\nmore stable than reduced ones", adj = c(0.5, 0))
      #text(mean(c(x1, x2)), -288, "Intermediate conditions:\nReduced and oxidized compounds\nare approximately equally stable", adj = c(0.5, 0), col = 2)
      text(mean(c(x1, x2)), -248, "Intermediate conditions:", adj = c(0.5, 0), font = 3)
      text(mean(c(x1, x2)), -288, "Reduced and oxidized compounds\nare approximately equally stable", adj = c(0.5, 0))
      par(xpd = FALSE)
    }
    if(ilogaH2 == 1) {
      if(missing(logact)) {
        text(-3.1, 72, quote(CH[4]))
        label.figure("(a)", cex = 1.5, font = 2, yfrac = 0.97)
      } else {
        text(-3.1, 90, quote(CH[4]))
      }
    }

  }

  # Add legend 20220407
  plot.new()
  par(mar = c(0, 0, 0, 0))
  par(xpd = NA)
  legend <- paste0(" ", names(specieslist))
  legend(-0.8, 0.7, legend, pch = pchs, pt.cex = c(1, 1.5, 1, 1, 1, 1),
         pt.bg = cols, col = c(1, 1, 1, 1, 1, cols[6]), bty = "n", y.intersp = 1.2)
  dT <- describe.property("T", T)
  dP <- describe.property("P", 1)
  dbasis <- describe.basis(ibasis = c(1, 3, 6))
  legend("topright", legend = c(dT, dP, dbasis), bty = "n", y.intersp = 1.2)
  par(xpd = FALSE)

  if(missing(logact)) {
    # Plot intermediate logaH2 20220401
    intermediate_logaH2(logact = logact)
    # Add Psat legend 20220629
    legend("bottomright", legend = quote(bolditalic(P)[bold(sat)]), bty = "n")
    label.figure("(b)", cex = 1.5, font = 2)
  }

  if(pdf) dev.off()

  # Print p-values
  if(missing(logact)) {
    print("p-values for (a):")
  } else {
    print("p-values:")
  }
  print(signif(pvals, 2))

}

# Thermodynamic model for methanogen niche partitioning 20220402
utogig3 <- function(pdf = FALSE) {

  if(pdf) pdf("Figure_3.pdf", width = 7, height = 5)
  mat <- matrix(c(1,1,2,2,3,3,4, 5,5,5,6,6,6,0), nrow = 2, byrow = TRUE)
  layout(mat, widths = c(1.2,1,1,1,1,1,1.2))
  par(mar = c(4, 4, 0.5, 1), mgp = c(2.7, 1, 0))

  # Define basis species and temperature
  basis(c("CO2", "H2", "NH4+", "H2O", "H2S", "H+"), c(Seawater.AS98$CO2, Seawater.AS98$H2, Seawater.AS98$"NH4+", 0, Seawater.AS98$H2S, -Seawater.AS98$pH))
  T <- 25

  # Calculate Zc of methanogen proteomes excluding high-Zc outliers
  methanogen_AA <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
  Zc <- ZC(protein.formula(methanogen_AA[!iout, ]))
  # Add proteins to CHNOSZ
  ip <- add.protein(methanogen_AA[!iout, ])
  pl <- protein.length(ip)

  # Create combinations between Class I and II methanogens
  # Class I methanogens excluding high-Zc outliers
  iIin <- setdiff(1:length(ip), iII)
  ncomb <- length(iIin) * length(iII)
  i1 <- rep(iIin, length.out = ncomb)
  i2 <- rep(iII, each = length(iIin))

  ## Plot A: affinities at specified logaH2
  logaH2s <- c(-5, -7.467, -10)
  for(i in 1:3) {
    if(i == 1) ylab <- Aresiduelab
    if(i > 1) {
      ylab <- ""
      par(mar = c(4, 2.5, 0.5, 1))
    }
    basis("H2", logaH2s[i])
    # Calculate affinities
    a <- affinity(iprotein = ip, T = T)
    A <- unlist(a$values)
    # Convert to Gibbs energy (J/mol)
    TK <- convert(T, "K")
    A <- -convert(A, "G", TK)
    # Convert to kJ/mol
    A <- A / 1000
    # Calculate affinity per residue
    Ares <- A / pl
    plot(Zc, Ares, pch = mpch[!iout], col = mcol[!iout], bg = mbg[!iout], xlab = Zclab, ylab = ylab)
    # Percentage of combinations that favor Class I over Class II
    p1 <- round(sum(sign(Ares[i1] - Ares[i2]) == 1) / ncomb * 100)
    legend <- bquote(.(p1)*"%"~italic(A)[I] > italic(A)[II])
    if(i < 3) inset <- c(-0.1, 0) else inset <- c(0.05, 0)
    legend("bottomleft", legend = legend, bty = "n", inset = inset)
    if(i == 1) legend("bottomleft", c("Pairwise affinity", "differences:", ""), inset = c(-0.1, 0), bty = "n")
    # Add legend: logaH2
    Htxt <- bquote(bold(log~bolditalic(a)*H[2] == .(logaH2s[i])))
    if(i > 1) legend.x <- "topleft" else legend.x <- "top"
    legend(legend.x, legend = Htxt, bty = "n")
    if(i==1) label.figure("(a)", cex = 1.5, font = 2, yfrac = 0.96, xfrac = 0.06)
  }

  # Add legend 20220403
  plot.new()
  par(mar = c(0, 0, 0, 0))
  par(xpd = NA)
  legend <- c("Class II", "Class I")
  legend(-0.6, 0.50, legend = legend, pch = c(22, 21), col = c(col4, col2), pt.bg = c(col4, "white"), bty = "n", y.intersp = 1.2)
  legend(-0.45, 0.35, legend = c("Methanocaldococcus"), pch = 21, col = col2, pt.bg = col2, text.font = 3, bty = "n", cex = 0.9, pt.cex = 1)
  dT <- describe.property("T", T)
  dP <- describe.property("P", 1)
  dbasis <- describe.basis(ibasis = c(1, 3, 5, 6))
  legend("topright", legend = c(dT, dP, dbasis), bty = "n", y.intersp = 1.1)
  par(xpd = FALSE)

  ## Plot B: Contour plot for affinity differences between Class I and Class II proteomes 20220402
  # Calculate affinities as a function of logaH2 and T
  par(mar = c(4, 4, 1, 2.2))
  H2 <- c(-10, -1.5, 200)
  T <- c(20, 100, 200)
  a <- affinity(T = T, H2 = H2, iprotein = ip)
  # Calculate affinity per residue
  Ares <- mapply("/", a$values, pl, SIMPLIFY = FALSE)
  # Calculate sign of difference between all combinations
  Ares.diff <- mapply("-", Ares[i1], Ares[i2], SIMPLIFY = FALSE)
  Ares.diff.sign <- sapply(Ares.diff, sign, simplify = FALSE)
  Ares.diff.positive <- sapply(Ares.diff.sign, "==", y = 1, simplify = FALSE)
  # Sum to get percentage of combinations that favor Class I over Class I
  Ares.diff.positive.sum <- Reduce("+", Ares.diff.positive)
  p1 <- Ares.diff.positive.sum / ncomb * 100
  levels <- c(10, 50, 100)
  contour(a$vals$T, a$vals$H2, p1, xlab = Tlab, ylab = logaH2lab, xaxs = "i", yaxs = "i",
          levels = c(10, 50, 100), drawlabels = FALSE,
          col = c("#366BFB", 1, "#D50000"))
  # Plot intermediate logaH2 background
  intermediate_logaH2(add = TRUE, lines = FALSE, pH = FALSE, NH4 = FALSE, redox = FALSE)
  # Replot contour lines to make them stand out
  contour(a$vals$T, a$vals$H2, p1, add = TRUE,
          levels = c(10, 50, 100), drawlabels = FALSE,
          col = c("#366BFB", 1, "#D50000"))
  arrows(60, -7.8, 60, -4.6, length = 0.1, code = 3)
  text(55, -5.6, "More reducing", adj = 1)
  text(55, -7.9, "Less reducing", adj = 1)
  text(80, -2.6, "Class I proteomes\nare most stable", cex = 0.8)
  text(80, -4.5, "Class I proteomes\nare more stable", cex = 0.8)
  text(80, -6.8, "Class II proteomes\nare more stable", cex = 0.8)
  # Add contour labels in margin 20220407
  CL <- contourLines(a$vals$T, a$vals$H2, p1, levels = c(10, 50, 100))
  par(xpd = NA)
  text(101, tail(CL[[1]]$y, 1), "10%", adj = 0, col = "#366BFB")
  text(101, tail(CL[[2]]$y, 1), "50%", adj = 0, col = 1)
  text(101, tail(CL[[3]]$y, 1), "100%", adj = 0, col = "#D50000")
  par(xpd = FALSE)
  label.figure("(b)", cex = 1.5, font = 2, yfrac = 0.97)

  ## Plot C: logaH2-T plot for habitable niches 20220303
  par(mar = c(4, 2.5, 1, 2.5))
  plot(T[1:2], H2[1:2], xlab = Tlab, ylab = "", type = "n", xaxs = "i", yaxs = "i")

  # Fill area for habitable region
  pu <- par("usr")
  rect(pu[1], pu[3], pu[2], pu[4], col = "#C5E1A580")

  # Add line for mixed fluids at Rainbow 20220303
  srt <- 11
  Rainbow <- read.csv(system.file("extdata/mjenergy/SC10_Rainbow.csv", package = "JMDplots"))
  Tvals <- Rainbow$T
  logaH2.max <- Rainbow$H2
  polygon(c(Tvals, rev(Tvals)), c(logaH2.max, rep(par("usr")[4], length(Tvals))), col = "gray80", lty = 0)
  lines(Tvals, logaH2.max, lty = 2)
  text(Tvals[14], logaH2.max[14], "Rainbow", srt = 0.7 * srt, adj = c(0, 1.3), col = "green4")

  # Add logaCO2 = logaCH4 line 20220223
  Tvals <- seq(par("usr")[1], par("usr")[2], length.out = 100)
  logaH2.min <- subcrt(c("CO2", "H2", "CH4", "H2O"), c(-1, -4, 1, 2), T = Tvals)$out$logK / -4
  polygon(c(Tvals, rev(Tvals)), c(logaH2.min, rep(par("usr")[3], length(Tvals))), col = "gray80", lty = 0)
  lines(Tvals, logaH2.min, lty = 3)
  box()
  text(83, -6.4, quote(italic(a)*CH[4] / italic(a)*CO[2] == 10^0), srt = srt)
  # Add lines for aCO2/aCH4 = 10^2 and 10^4 20220303
  lines(Tvals[33:100], logaH2.min[33:100] + 2, lty = 3)
  text(94, -4.1, quote(10^2), srt = srt)
  lines(Tvals, logaH2.min + 4, lty = 3)
  text(94, -2.0, quote(10^4), srt = srt)

  # Add box for Lost City 20220303
  # 40 to 90 °C (Kelley et al., 2005)
  x1 <- 40
  x2 <- 90
  # 1 to 15 mmol/kg H2 (Kelley et al., 2005)
  y1 <- log10(1e-3)
  y2 <- log10(15e-3)
  rect(x1, y1, x2, y2, col = "white", lty = 0)
  rect(x1, y1, x2, y2, col = "#C5E1A540", border = "green4")
  text(mean(c(x1, x2)), mean(c(y1, y2)), "Lost City", col = "green4")

  # Add lines for sediments (Lovley & Goodwin, 1988) 20220303
  sediment <- read.csv(system.file("extdata/utogig/LG88_Fig1.csv", package = "JMDplots"))
  for(i in 1:nrow(sediment)) {
    if(sediment$type[i] == "Methane") col <- "green4" else col <- "gray40"
    if(sediment$type[i] == "Methane") lty <- 1 else lty <- 2
    lines(c(15, 25), rep(log10(sediment$H2[i]*1e-9), 2), col = col, lty = lty)
  }
  
  # Add affinity contours 20220403
  lines(as.data.frame(do.call(cbind, CL[[1]][c("x", "y")])), col = "#366BFB")
  lines(as.data.frame(do.call(cbind, CL[[2]][c("x", "y")])), col = 1)
  lines(as.data.frame(do.call(cbind, CL[[3]][c("x", "y")])), col = "#D50000")

  # Add labels 20220221
  text(21, -6.4, "Methanogenic\nsediments", adj = 0, cex = 0.9, col = "green4")
  lines(c(26, 22), c(-7, -7.9), col = "green4")
  text(40, -9.5, hyphen.in.pdf("Non-methanogenic sediments"), adj = 0, cex = 0.9, col = "gray40")
  lines(c(26, 39), c(-9, -9.5), col = "gray40")
  par(xpd = NA)
  lines(c(100, 130), c(-2.34, -2.34), lty = 2)
  lines(c(100, 130), rep(tail(logaH2.min, 1), 2), lty = 3)
  text(132, -4, "Habitable\nniches", font = 3, col = "green4", adj = 0)
  arrows(130,-2.34, 130, tail(logaH2.min, 1), code = 3, col = "green4", length = 0.1)
  text(101, tail(CL[[1]]$y, 1), quote("10%"~A[I] > A[II]), adj = 0, col = "#366BFB")
  text(101, tail(CL[[2]]$y, 1), quote("50%"~A[I] > A[II]), adj = 0, col = 1)
  text(101, tail(CL[[3]]$y, 1), quote("100%"~A[I] > A[II]), adj = 0, col = "#D50000")
  par(xpd = FALSE)
  label.figure("(c)", cex = 1.5, font = 2, yfrac = 0.97, xfrac = -0.02)

  if(pdf) dev.off()

}

# Chemical and thermodynamic analysis of evolutionary divergence along redox gradients 20220601
utogig4 <- function(pdf = FALSE) {

  if(pdf) pdf("Figure_4.pdf", width = 9, height = 5)
  layout(matrix(1:8, nrow = 2), widths = c(2.5, 4, 4, 2.5))
  par(mar = c(4, 4, 3, 1))
  
  # Faded colors
  col4 <- adjustcolor(4, alpha.f = 0.69)
  col2 <- adjustcolor(2, alpha.f = 0.69)
  # y-axis limits for boxplots
  ylim <- c(-0.25, -0.10)
  # Axis limits for affinity plots
  xlims <- list(c(-4, -12), c(0, -15), c(0, -15))
  ylims <- list(c(0, 80), c(0, 50), c(0, 50))
  # Where to draw transition line
  trans <- list(c("Class I", "Class II"), c("Nif-B", "Nif-A"), c("Basal", "Terrestrial"))

  # Define basis species and temperature
  basis(c("CO2", "H2", "NH4+", "H2O", "H2S", "H+"), c(Seawater.AS98$CO2, Seawater.AS98$H2, Seawater.AS98$"NH4+", 0, Seawater.AS98$H2S, -Seawater.AS98$pH))
  T <- 25
  # Calculate logK for H2 = 2H+ + 2e- for conversion to Eh
  logK <- subcrt(c("H2", "H+", "e-"), c(-1, 2, 2), T = T)$out$logK
  pH <- Seawater.AS98$pH
  # Adjustments for label position
  dx <- list(c(0, -0.3), c(3.83, 0, -5, -1.5), c(0.2, -0.1, 2, -3))
  dy <- list(c(2, 1), c(-1.3, 0.7, 0, 1), c(1.5, 3, 0, -6))

  # Place to keep logaH2 for printing 20220920
  logaH2s <- numeric()

  for(i in 1:3) {

    if(i == 1) {
      # Methanogen proteomes 20220424
      # Read amino acid composition and compute Zc
      aa <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
      # Indices of Class I and Class II methanogens
      iI <- 20:36
      iII <- 1:19
      col <- c(col2, col4)
      lcol <- c(2, 4)
      # Make Zc plot
      Zc <- ZC(protein.formula(aa))
      Zclist <- list("Class I" = Zc[iI], "Class II" = Zc[iII])
      names(Zclist) <- paste0(names(Zclist), "\n(", sapply(Zclist, length), ")")
      bp <- boxplot(Zclist, ylab = Zclab, col = col, ylim = ylim, names = character(2))
      axis(1, at = 1:2, labels = names(Zclist), line = 1, lwd = 0)
      add_cld(Zclist, bp)
      text(1, -0.12, "Anoxic\nhabitats", font = 2, cex = 0.8)
      text(2, -0.12, "Anoxic\nand oxic\nhabitats", font = 2, cex = 0.8)
      abline(v = 1.5, lty = 2, lwd = 1.5, col = 8)
      title("Methanogens\n(Lyu & Lu, 2018)", font.main = 1, cex.main = 1)
      label.figure("(a)", cex = 1.5, font = 2, xfrac = 0.06)
      # Get the species in each group
      groups <- list("Class I" = iI, "Class II" = iII)
      # Calculate P-values using parametric and non-parametric tests 20220913
      Ptab1 <- KWvsANOVA(Zclist)
      Ptab1 <- cbind(Variable = "Methanogens Zc", Ptab1)
    }

    if(i == 2) {
      # Nif-encoding genomes 20220531
      np <- NifProteomes()
      Zclist <- np$ZClist
      col <- c(col2, col2, col2, col4)
      lcol <- c(2, 2, 2, 4)
      bp <- boxplot(Zclist, ylab = Zclab, col = col, ylim = ylim, names = character(4))
      names(Zclist) <- paste0(names(Zclist), "\n(", sapply(Zclist, length), ")")
      axis(1, at = 1:4, labels = hyphen.in.pdf(names(Zclist)), line = 1, lwd = 0)
      # Names with "-" confuse multcompLetters4()
      names(Zclist) <- gsub("-", "", names(Zclist))
      add_cld(Zclist, bp)
      text(2, -0.12, "Anaerobic", font = 2, cex = 0.8)
      text(4.1, -0.242, "Anaerobic\nand aerobic", font = 2, cex = 0.8)
      abline(v = 3.5, lty = 2, lwd = 1.5, col = 8)
      title(hyphen.in.pdf("Nif-encoding genomes\n(Poudel et al., 2018)"), font.main = 1, cex.main = 1)
      # Get the amino acid compositions and species in each group
      aa <- np$AA
      groupnames <- np$types
      groups <- sapply(groupnames, function(group) aa$protein == group, simplify = FALSE)
      Ptab2 <- KWvsANOVA(Zclist)
      Ptab2 <- cbind(Variable = "Nif-encoding Zc", Ptab2)
    }

    if(i == 3) {
      # Thaumarchaeota 20220414
      # Amino acid compositions of predicted (Glimmer) and database (NCBI or IMG) proteomes
      predicted <- read.csv(system.file("extdata/utogig/Thaumarchaeota_predicted_AA.csv", package = "JMDplots"))
      database <- read.csv(system.file("extdata/utogig/Thaumarchaeota_database_AA.csv", package = "JMDplots"))
      # If both are available, use predicted instead of database
      aa <- rbind(predicted, database)
      aa <- aa[!duplicated(aa$organism), ]
      groupnames <- c("Basal", "Terrestrial", "Shallow", "Deep")
      ## Colors from Ren et al. (2019)
      #col <- c("#b2427e", "#c78d55", "#00a06f", "#4085c3")
      col <- c(col2, col4, col4, col4)
      lcol <- c(2, 4, 4, 4)
      Zc <- Zc(aa)
      Zclist <- lapply(groupnames, function(g) Zc[aa$protein == g])
      names(Zclist) <- groupnames
      names(Zclist) <- paste0(names(Zclist), "\n(", sapply(Zclist, length), ")")
      bp <- boxplot(Zclist, ylab = Zclab, col = col, ylim = ylim, names = character(4))
      add_cld(Zclist, bp)
      axis(1, at = 1:4, labels = names(Zclist), line = 1, lwd = 0, gap.axis = 0)
      text(0.9, -0.12, hyphen.in.pdf("Pre-GOE\norigin"), font = 2, cex = 0.8)
      text(2.1, -0.12, hyphen.in.pdf("Post-GOE\norigin"), font = 2, cex = 0.8)
      abline(v = 1.5, lty = 2, lwd = 1.5, col = 8)
      title("Thaumarchaeota\n(Ren et al., 2019)", font.main = 1, cex.main = 1)
      # Get the species in each group
      groups <- sapply(groupnames, function(group) aa$protein == group, simplify = FALSE)
      Ptab3 <- KWvsANOVA(Zclist)
      Ptab3 <- cbind(Variable = "Thaumarchaeota Zc", Ptab3)
    }

    # Make affinity rank plot 20220602

    # Save graphical settings that are modified by thermo.plot.new()
    opar <- par(c("mar", "mgp", "tcl", "las", "xaxs", "yaxs"))
    # Start plot
    thermo.plot.new(xlim = xlims[[i]], ylim = ylims[[i]], xlab = logaH2lab, ylab = "Mean rank of per-residue affinity (%)", yline = par("mgp")[1] + 0.3)
    # Color reducing and oxidizing areas from organic compounds 20220621
    file <- "H2_intermediate_-3.csv"
    dat <- read.csv(file.path(system.file("extdata/utogig", package = "JMDplots"), file))
    # Just use 25 degC values
    dat <- dat[dat$T==25, ]
    x <- dat[, "T", drop = FALSE]
    # Make rectangles
    rect(xlims[[i]][1], ylims[[i]][1], dat$hiN_hipH, ylims[[i]][2], col = "#E5737360", border = NA)
    rect(xlims[[i]][2], ylims[[i]][1], dat$loN_lopH, ylims[[i]][2], col = "#90CAF960", border = NA)
    rect(dat$hiN_hipH, ylims[[i]][1], dat$loN_lopH, ylims[[i]][2], col = "#9E9E9E60", border = NA)
    rect(dat$hiN_lopH, ylims[[i]][1], dat$loN_hipH, ylims[[i]][2], col = "#9E9E9E90", border = NA)

    # Load proteins and calculate affinity
    ip <- add.protein(aa, as.residue = TRUE)
    a <- affinity(H2 = xlims[[i]], iprotein = ip, T = T)
    # Calculate normalized sum of ranks for each group and make diagram
    arank <- rank.affinity(a, groups)
    names <- hyphen.in.pdf(names(groups))
    diagram(arank, col = lcol, lty = 1, lwd = 1.5, dx = dx[[i]], dy = dy[[i]], names = names, add = TRUE)
    par(opar)
    if(i == 1) label.figure("(b)", cex = 1.5, font = 2, xfrac = 0.06)

    # Draw line at transition
    itrans <- which.min(abs(arank$values[[trans[[i]][1]]] - arank$values[[trans[[i]][2]]]))
    logaH2 <- arank$vals$H2[itrans]
    ytop1 <- ylims[[i]][2] + diff(ylims[[i]]) * 0.05
    #A <- arank$values[[trans[[i]][1]]][itrans]
    #lines(c(logaH2, logaH2), c(A, ytop1), lty = 2, lwd = 1.5, col = 8, xpd = NA)
    ybot <- ylims[[i]][1]
    lines(c(logaH2, logaH2), c(ybot, ytop1), lty = 2, lwd = 1.5, col = 8, xpd = NA)

    # Calculate pe and Eh (mV)
    pe <- -pH - 0.5*logaH2 - 0.5*logK
    Eh <- 1000 * convert(pe, "Eh", pH = pH)
    Ehtext <- paste(round(Eh), "mV")
    ytop2 <- ylims[[i]][2] + diff(ylims[[i]]) * 0.1
    text(logaH2, ytop2, Ehtext, xpd = NA)

    logaH2s <- c(logaH2s, logaH2)

  }

  # Thaumarchaeota nH2O plot 20220622
  nH2O <- nH2O(aa)
  nH2Olist <- lapply(groupnames, function(g) nH2O[aa$protein == g])
  names(nH2Olist) <- groupnames
  names(nH2Olist) <- paste0(names(nH2Olist), "\n(", sapply(nH2Olist, length), ")")
  ylim <- range(nH2O)
  bp <- boxplot(nH2Olist, ylab = nH2Olab, col = col, ylim = ylim, names = character(4))
  add_cld(nH2Olist, bp)
  # Make rotated labels (modified from https://www.r-bloggers.com/rotated-axis-labels-in-r-plots/)
  text(x = 1:4, y = par()$usr[3] - 2 * strheight("A"), labels = groupnames, srt = 45, adj = 1, xpd = TRUE)
  axis(1, at = 1:4, labels = NA)
  title("Thaumarchaeota\n(Ren et al., 2019)", font.main = 1, cex.main = 1)
  label.figure("(c)", cex = 1.5, font = 2, xfrac = 0.06)
  Ptab4 <- KWvsANOVA(nH2Olist)
  Ptab4 <- cbind(Variable = "Thaumarchaeota nH2O", Ptab4)

  # Add legend
  plot.new()
  par(mar = c(0, 0, 0, 0))
  par(xpd = NA)
  dT <- describe.property("T", T)
  dP <- describe.property("P", 1)
  dbasis <- describe.basis(ibasis = c(1, 3, 5, 6))
  legend("left", legend = c(dT, dP, dbasis), bty = "n", y.intersp = 1.5, inset = c(-0.5, 0))
  par(xpd = FALSE)

  if(pdf) dev.off()

  # Print logaH2s
  print("logaH2 at dashed lines in (b):")
  print(round(logaH2s, 1))

  # Return P-value table 20220913
  out <- rbind(Ptab1, Ptab2, Ptab3, Ptab4)
  out$p_Tukey <- signif(out$p_Tukey, 2)
  out$p_Dunn <- signif(out$p_Dunn, 2)
  invisible(out)

}

# Plot intermediate logaH2 20220220
# Add class argument 20220418
intermediate_logaH2 <- function(class = NULL, add = FALSE, parargs = list(mar = c(4, 4, 1, 1), mgp = c(2.7, 1, 0)),
  xlim = c(0, 300), ylim = NULL, lines = TRUE, neutral = TRUE, redox = TRUE, pH = TRUE, NH4 = TRUE, pH.y = -9.4, NH4.y = -6.6, logact = -3) {
  if(!add) do.call(par, parargs)
  file <- paste0("H2_intermediate_", logact, ".csv")
  if(!is.null(class)) file <- paste0("H2_intermediate_", gsub(" ", "", class), "_", logact, ".csv")
  dat <- read.csv(file.path(system.file("extdata/utogig", package = "JMDplots"), file))
  x <- dat[, "T", drop = FALSE]
  y <- dat[, 2:5]
  if(!add) matplot(x, y, type = "n", xlab = Tlab, ylab = logaH2lab, xlim = xlim, ylim = ylim, xaxs = "i")
  # Fill area between lines
  polygon(c(dat$T, rev(dat$T)), c(dat$loN_lopH, rev(dat$hiN_hipH)), border = NA, col = "#9E9E9E60")
  polygon(c(dat$T, rev(dat$T)), c(dat$hiN_lopH, rev(dat$loN_hipH)), border = NA, col = "#9E9E9E90")
  # Fill reducing and oxidizing regions 20220401
  top <- rep(par("usr")[4], length(dat$T))
  polygon(c(dat$T, rev(dat$T)), c(top, rev(dat$hiN_hipH)), border = NA, col = "#E5737360")
  bot <- rep(par("usr")[3], length(dat$T))
  polygon(c(dat$T, rev(dat$T)), c(bot, rev(dat$loN_lopH)), border = NA, col = "#90CAF960")

  # Don't plot ends of lines 20220401
  Tlim <- c(10, 290)
  if(!is.null(class)) Tlim <- c(17, 270)
  if(pH) dat <- dat[dat$T > Tlim[1], ]
  if(NH4) dat <- dat[dat$T < Tlim[2], ]

  x <- dat[, "T", drop = FALSE]
  if(neutral) {
    y <- dat[, 2:7]
    # dashed/solid lines for lo/hi N
    lty <- c(2, 2, 1, 1, 2, 1)
    col <- c(1, 1, 1, 1, 6, 6)
  } else {
    y <- dat[, 2:5]
    lty <- c(2, 2, 1, 1)
    col <- c(1, 1, 1, 1)
  }

  if(lines) {
    matlines(x, y, lty = lty, col = col)
  }
  if(pH) {
    # Add pH labels at low T
    pHdat <- dat[1, ]
    pHlab <- character(ncol(pHdat))
    pHlab[grep("lopH", colnames(pHdat))] <- 3
    pHlab[grep("hipH", colnames(pHdat))] <- 9
    pHdat.x <- 10
    pH.x <- 8
    dy <- -0.1
    if(!is.null(class)) {
      pHdat.x <- c(14, 7, 14, 7)
      pH.x <- 12
      dy <- c(-0.1, -0.6, -0.1, -0.6)
    }
    text(pHdat.x, pHdat + dy, pHlab)
    text(pH.x, pH.y, "pH", font = 2)
  }
  if(NH4) {
    # Add logaNH4+ labels at high T
    NH4dat <- dat[nrow(dat), ]
    NH4lab <- character(ncol(NH4dat))
    NH4lab[grep("loN_", colnames(NH4dat))] <- -8
    NH4lab[grep("hiN_", colnames(NH4dat))] <- -3
    NH4dat.x <- 293
    NH4.x <- 278
    dy <- 0
    if(!is.null(class)) {
      NH4dat.x <- c(278, 290, 290, 278)
      NH4.x <- 270
      dy <- c(0, 0.5, 0.5, 0)
    }
    text(NH4dat.x, NH4dat + dy, NH4lab)
    text(NH4.x, NH4.y, logaNH4lab, font = 2)
  }
  if(redox) {
    # Add more/less oxidizing/reducing labels 20220401
    arrows(100, -12.2, 100, -9.8, length = 0.1, code = 3)
    text(110, -12, "More oxidizing", adj = 0)
    text(110, -10, "Less oxidizing", adj = 0)
    arrows(100, -6.6, 100, -3.6, length = 0.1, code = 3)
    text(90, -3.8, "More reducing", adj = 1)
    text(90, -6.4, "Less reducing", adj = 1)
  }
}

# Find intermediate logaH2: where affinity vs Zc has a slope of 0  20220219
# Add class argument (calculate only for single compound class) 20220418
calc_logaH2_intermediate <- function(class = NULL, logact = -3) {

  # Put species names into vector
  allspecies <- unlist(specieslist)
  # Use single compound class if given 20220418
  if(!is.null(class)) allspecies <- unlist(specieslist[class])

  # Calculate carbon number and oxidation state
  ispecies <- suppressMessages(info(allspecies))
  nC <- sapply(makeup(ispecies), "[", "C")
  Zc <- ZC(ispecies)

  # Load basis species with default activities
  reset()
  basis(c("CO2", "H2", "NH4+", "H2O", "H+"), c(-3, -6, -4, 0, -7))
  # Load formed species
  species(allspecies, logact)
  # Species indices of basis and formed species
  basis_and_species <- c(basis()$ispecies, species()$ispecies)

  # Function to calculate slope of affinity - Zc linear fit
  slopefun <- function(H2, T, sout, plot.it = FALSE) {
    # Set H2 activity
    basis("H2", H2)
    # Calculate affinities
    a <- suppressMessages(affinity(T = T, sout = sout))
    A <- unlist(a$values)
    # Convert to Gibbs energy (J/mol)
    TK <- convert(T, "K")
    A <- -convert(A, "G", TK)
    # Convert to kJ/mol
    A <- convert(A, "J") / 1000
    # Divide A by number of carbon
    AC <- A / nC

    # Calculate linear fit
    thislm <- lm(AC ~ Zc)
    # Get slope
    slope <- thislm$coefficients[[2]]
    slope
  }

  # Function to find root at given temperature(s)
  unifun <- function(T = 25) {
    sapply(T, function(thisT) {
      ## Calculate affinity to get subcrt output at this temperature
      #a <- affinity()
      #sout <- a$sout
      # Get subcrt output at this temperature
      sout <- suppressMessages(subcrt(basis_and_species, T = thisT, property = "logK"))
      H2 <- uniroot(slopefun, c(-50, 50), T = thisT, sout = sout)
      H2$root
    })
  }

  # Find root for different combinations of activities of non-H2 basis species
  T = seq(0, 300, 5)
  basis(c("NH4+", "pH"), c(-8, 3))
  loN_lopH <- round(unifun(T = T), 6)
  basis(c("NH4+", "pH"), c(-8, 9))
  loN_hipH <- round(unifun(T = T), 6)
  basis(c("NH4+", "pH"), c(-3, 3))
  hiN_lopH <- round(unifun(T = T), 6)
  basis(c("NH4+", "pH"), c(-3, 9))
  hiN_hipH <- round(unifun(T = T), 6)

  # Calculate neutral pH
  logK <- subcrt(c("H2O", "OH-", "H+"), c(-1, 1, 1), T = T)$out$logK
  pH <- logK / -2
  # Given lopH == 3, hipH == 9, place neutral pH in the range [lopH, hipH] rescaled to [0, 1]
  pHfrac <- (pH - 3) / (9 - 3)
  # Calculate the value of logaH2 at neutral pH and low and high N
  neutralpH_loN <- round(loN_lopH + pHfrac * (loN_hipH - loN_lopH), 6)
  neutralpH_hiN <- round(hiN_lopH + pHfrac * (hiN_hipH - hiN_lopH), 6)

  # Save results
  out <- data.frame(T, loN_lopH, loN_hipH, hiN_lopH, hiN_hipH, neutralpH_loN, neutralpH_hiN)
  file <- paste0("H2_intermediate_", logact, ".csv")
  if(!is.null(class)) file <- paste0("H2_intermediate_", gsub(" ", "", class), "_", logact, ".csv")
  write.csv(out, file, row.names = FALSE, quote = FALSE)

}

# Comparison of Zc of proteomes predicted by Glimmer and downloaded from NCBI 20220604
utogigS1 <- function(pdf = FALSE) {
  if(pdf) pdf("Figure_S1.pdf", width = 4, height = 4)
  par(mar= c(4.1, 4.1, 1, 1))
  predicted <- read.csv(system.file("extdata/utogig/Thaumarchaeota_predicted_AA.csv", package = "JMDplots"))
  database <- read.csv(system.file("extdata/utogig/Thaumarchaeota_database_AA.csv", package = "JMDplots"))
  organisms <- intersect(predicted$organism, database$organism)
  predicted <- predicted[match(organisms, predicted$organism), ]
  database <- database[match(organisms, database$organism), ]
  y <- Zc_predicted <- Zc(predicted)
  x <- Zc_database <- Zc(database)
  # Colors from Ren et al. (2019)
  groupcol <- c(Basal = "#b2427e", Terrestrial = "#c78d55", Shallow = "#00a06f", Deep = "#4085c3")
  col <- groupcol[match(database$protein, names(groupcol))]
  pch <- match(database$protein, names(groupcol)) + 13
  plot(Zc_database, Zc_predicted, pch = pch, col = col,
       xlab = quote(bolditalic(Z)[bold(C)]~"(NCBI proteins)"), ylab = quote(bolditalic(Z)[bold(C)]~"(Glimmer proteins)"))
  # Add legend
  icol <- which(!duplicated(col))
  legend("topleft", names(col)[icol], col = col[icol], pch = pch[icol])
  # Calculate linear regression
  thislm <- lm(y ~ x)
  xylim <- c(-0.22, -0.10)
  lines(xylim, predict(thislm, data.frame(x = xylim)), col = "#00000080")
  # Show R2 and slope
  R2 <- summary(thislm)$r.squared
  R2txt <- bquote(italic(R)^2 == .(formatC(R2, digits = 3, format = "f")))
  legend("bottomright", legend = R2txt, bty = "n")
  Slope <- coef(thislm)["x"]
  Slopetxt <- paste("slope =", formatC(Slope, digits = 3, format = "f"))
  legend("bottomright", legend = c(Slopetxt, ""), bty = "n")
  if(pdf) dev.off()
}

# Association between redox gradients and Zc of proteins and lipids in alkaline Yellowstone hot springs 20210516
utogigS2 <- function(pdf = FALSE) {

  if(pdf) pdf("Figure_S2.pdf", width = 8.5, height = 4)
  layout(matrix(1:3, nrow = 1), widths = c(1.07, 1, 0.4))
  par(cex = 1)

  ## Plot A: Proteins
  par(mar = c(4, 4.2, 0.5, 1))
  plot(c(50, 100), c(-0.22, -0.12), type = "n", xlab = Tlab, ylab = Zclab, xaxs = "i", axes = FALSE)
  # Remove leading zeros for better visual parallel with lipids
  at <- seq(-0.22, -0.12, 0.02)
  labels <- gsub("-0", "-", formatC(at, digits = 2, format = "f"))
  axis(2, at, labels)
  axis(1)
  box()
  # Get Zc of Bison Pool proteins aggregated by phylum
  aa.phyla <- read.csv(system.file("extdata/bison/DS13.csv", package = "JMDplots"))
  Zc <- ZC(protein.formula(aa.phyla))
  # Get T values
  site <- gsub("bison", "", aa.phyla$protein)
  T <- sapply(site, switch, N = 93.3, S = 79.4, R = 67.5, Q = 65.3, P = 57.1)
  # Plot lines for each phlum
  for(phylum in unique(aa.phyla$organism)) {
    iphylum <- aa.phyla$organism == phylum
    lines(T[iphylum], Zc[iphylum], col = "gray")
  }

  # Make different color groups based on temperature
  ilo <- T < 60
  imid <- T >= 60 & T <= 70
  ihi <- T > 70
  points(T[ilo], Zc[ilo], pch = 22, bg = col4)
  points(T[imid], Zc[imid], pch = 23, bg = col8)
  points(T[ihi], Zc[ihi], pch = 21, bg = col2)
  # Add labels
  text(83, -0.14, "Proteins grouped\nby major phyla")
  label.figure("(a)", cex = 1.5, font = 2, xfrac = 0.06)

  ## Plot B: Lipids
  par(mar = c(4, 3, 0.5, 1))
  plot(c(25, 100), c(-2.2, -1.2), type = "n", xlab = Tlab, ylab = "", xaxs = "i")
  # Add "100" because it doesn't show up 20220408
  axis(1, 100, tick = FALSE)
  # IPL data from Boyer et al., 2020 Fig. 7
  T <- c(29.0, 35.1, 38.1, 40.5, 51.6, 53, 59.8, 60.7, 63.1, 64.8, 70.5, 73.3, 77.3, 80.9, 82.2, 85.4, 89.0, 91.0)
  Zc.alkyl <- c(-1.73, -1.71, -1.77, -1.80, -1.83, -1.82, -1.89, -1.90, -1.88, -1.92, -1.92, -1.84, -1.93, -1.89, -1.96, -1.95, -1.92, -1.94)
  Zc.IPL <- c(-1.36, -1.33, -1.37, -1.38, -1.44, -1.43, -1.48, -1.54, -1.46, -1.52, -1.54, -1.45, -1.61, -1.53, -1.60, -1.56, -1.56, -1.68)
  # Make different color groups based on temperature
  ilo <- T < 60
  imid <- T >= 60 & T <= 70
  ihi <- T > 70
  # Plot points
  points(T[ilo], Zc.alkyl[ilo], pch = 22, bg = col4)
  points(T[imid], Zc.alkyl[imid], pch = 23, bg = col8)
  points(T[ihi], Zc.alkyl[ihi], pch = 21, bg = col2)
  points(T[ilo], Zc.IPL[ilo], pch = 22, bg = col4)
  points(T[imid], Zc.IPL[imid], pch = 23, bg = col8)
  points(T[ihi], Zc.IPL[ihi], pch = 21, bg = col2)
  # Label molecular groups
  text(70, -1.32, "Intact polar lipids")
  text(50, -2, "Alkyl chains")
  label.figure("(b)", cex = 1.5, font = 2, xfrac = 0.01)

  # Add legend 20220221
  # Move to right panel 20220408
  plot.new()
  par(mar = c(0, 0, 0, 0))
  par(xpd = NA)
  legend <- as.expression(c(quote(" "*italic(T) < 60~degree*C*";"), " less reducing", "",
                            quote(" Transition zone"), "",
                            quote(" "*italic(T) > 70~degree*C*";"), " more reducing"))
  legend("left", legend = legend, pch = c(22, NA, NA, 23, NA, 21, NA), pt.bg = c(col4, NA, NA, col8, NA, col2, NA),
    bty = "n", inset = -1, y.intersp = 1)
  par(xpd = FALSE)

  if(pdf) dev.off()

}


# logaH2-T plots for different organic compound classes 20220418
utogigS4 <- function(pdf = FALSE) {
  # To generate files:
  # lapply(names(specieslist), calc_logaH2_intermediate)
  if(pdf) pdf("Figure_S4.pdf", width = 6, height = 8)
  par(mfrow = c(3, 2))
  parargs <- list(mar = c(4, 4, 3, 1), mgp = c(2.7, 1, 0))
  ylim <- c(-25, 0)
  titlefun <- function(class) title(paste0(class, " (", length(specieslist[[class]]), ")"))
  intermediate_logaH2("C1 and C2",  parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH = FALSE, NH4 = FALSE)
  titlefun("C1 and C2")
  intermediate_logaH2("Acid",       parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH = FALSE, NH4 = FALSE)
  titlefun("Acid")
  intermediate_logaH2("Amino acid", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH.y = -8.5, NH4.y = -10)
  titlefun("Amino acid")
  intermediate_logaH2("Sugar",      parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH = FALSE, NH4 = FALSE)
  titlefun("Sugar")
  intermediate_logaH2("Nucleobase", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH.y = -12, NH4.y = -17)
  titlefun("Nucleobase")
  intermediate_logaH2("TCA cycle",  parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, NH4 = FALSE, pH.y = -10)
  titlefun("TCA cycle")
  if(pdf) dev.off()
}

# Tabulate p-values for
# ANOVA + TukeyHSD (parametric) and
# Kruskal-Wallis + Dunn (non-parametric)
# 20220913
KWvsANOVA <- function(Zclist) {
  # Ugly one-liner to turn a list into a data frame with "group" column taken from names of the list elements
  Zcdat <- do.call(rbind, sapply(1:length(Zclist), function(i) data.frame(group = names(Zclist)[i], Zc = Zclist[[i]]), simplify = FALSE))
  # Remove numeric component of labels (after \n)
  Zcdat$group <- sapply(strsplit(Zcdat$group, "\n"), "[", 1)
  # Convert 'group' column to factor to avoid warning from dunnTest()
  Zcdat$group <- as.factor(Zcdat$group)
  # One-way ANOVA and Tukey's Honest Significant Differences
  # Adapted from https://statdoe.com/one-way-anova-and-box-plot-in-r/
  anova <- aov(Zc ~ group, data = Zcdat)
  tukey <- TukeyHSD(anova)
  # Kruskal-Wallis followed by Dunn test
  # Adapted from https://www.statology.org/dunns-test-in-r/
  dunn <- dunnTest(Zc ~ group, data = Zcdat, method = "bonferroni")
  # Put together table
  comparison.tukey <- rownames(tukey$group)
  comparison.dunn <- sapply(lapply(strsplit(comparison.tukey, "-"), rev), paste, collapse = " - ")
  idunn <- match(comparison.dunn, dunn$res$Comparison)
  data.frame(Comparison = comparison.dunn, p_Tukey = tukey$group[, "p adj"], p_Dunn = dunn$res$P.adj[idunn])
}