# JMDplots/utogig.R
# Plots for perspective paper on geochemical biology
# 20200418 jmd first version (review of Bison Pool and amino acid synthesis [AS98])
# 20210516 Add methanogen tree
# 20210727 Add nH2O-Zc overview plot (deleted)
# 20220220 Make biological hierarchy diagram (deleted)
# 20220420 Move to JMDplots
# Create bold axis labels
Zclab <- quote(bolditalic(Z)[bold(C)])
Tlab <- quote(bolditalic(T)~bold("("*degree*C*")"))
Alab <- quote(bold("Affinity"~(kJ~"(mol C)"^{-1})))
Aresiduelab <- quote(bold("Affinity"~(kJ~"(mol residue)"^{-1})))
Toptlab <- quote(bolditalic(T)[bold(opt)]~bold("("*degree*C*")"))
logaH2maxlab <- quote(bold(log)~bolditalic(a)*bold(H[2])[bold("for maximum activity")])
logalab <- quote(bold(log)~bolditalic(a))
logaH2lab <- quote(bold(log)~bolditalic(a)*bold(H[2]))
logaNH4lab <- quote(bold(log)~bolditalic(a)*bold(NH[4]^"+"))
nH2Olab <- quote(bolditalic(n)*bold(H[2]*O))
# Identify species used in Shock & Canovas (2010)
# Used in Energy() and calc_logaH2_intermediate()
specieslist <- list(
"C1 and C2" = c("CH4", "methanol", "formaldehyde", "CO", "formic acid",
"ethane", "ethylene", "ethyne", "ethanol", "acetaldehyde", "acetic acid", "glycolic acid", "oxalic acid"),
Acid = c("formic acid", "acetic acid", "propanoic acid", "n-hexanoic acid", "n-nonanoic acid", "n-dodecanoic acid"),
"Amino acid" = c("glycine", "alanine", "valine", "leucine", "aspartic acid", "asparagine", "glutamic acid", "glutamine", "serine", "proline", "phenylalanine", "tryptophan"),
Sugar = c("ribose", "ribulose", "deoxyribose", "glucose"),
Nucleobase = c("adenine", "guanine", "thymine", "cytosine"),
# TCA cycle metabolites added 20211110
"TCA cycle" = c("pyruvate", "oxaloacetate-2", "citrate-3", "cis-aconitate-3", "isocitrate-3", "a-ketoglutarate-2", "succinate-2", "fumarate-2", "malate-2")
)
# Get seawater from Amend & Shock (1998)
Seawater.AS98 <- data.frame(T = 18, CO2 = log10(1e-4), H2 = log10(2e-9), pH = -log10(5e-9), "NH4+" = log10(5e-8), H2S = log10(1e-15), check.names = FALSE)
# Make semi-transparent colors 20220223
col4 <- adjustcolor(4, alpha.f = 0.69)
col8 <- adjustcolor(8, alpha.f = 0.69)
col2 <- adjustcolor(2, alpha.f = 0.69)
# Read methanogen tree
methanogen_tree <- read.tree(system.file("extdata/utogig/methanogen_tree.txt", package = "JMDplots"))
# Match species names without underscore used for labeling the tree
methanogens <- gsub("_", " ", methanogen_tree$tip.label)
# Indices of Class I and Class II methanogens
iI <- 20:36
iII <- 1:19
# Assign point symbols and colors for methanogens
mcol <- mbg <- mpch <- rep(NA, length(methanogens))
# Open red circle for Class I
mpch[iI] <- 21
mbg[iI] <- "transparent"
mcol[iI] <- col2
# Filled red circle for Methanocaldococcus 20220220
mbg[grepl("Methanocaldococcus", methanogens)] <- col2
# Grey-filled red circle for outliers
iout <- methanogens %in% c("Methanothermobacter marburgensis", "Methanothermobacter thermautotrophicus", "Methanopyrus kandleri")
mbg[iout] <- col8
mcol[iout] <- col2
# Filled blue square for Class II
mpch[iII] <- 22
mbg[iII] <- col4
mcol[iII] <- col4
## To make methanogen_AA.csv
## Read RefSeq amino acid compositions
## NOTE: taxids are in 'organism' column, species names are in 'ref' column
#refseq <- read.csv(system.file("RefDB/RefSeq_206/genome_AA.csv.xz", package = "JMDplots"), as.is = TRUE)
#irefseq <- match(methanogens, refseq$ref)
## Amino acid compositions of reference proteomes of methanogens
#methanogen_AA <- refseq[irefseq, ]
#write.csv(methanogen_AA, "methanogen_AA.csv", row.names = FALSE, quote = FALSE)
# Chemical analysis of reference proteomes of methanogens reveals adaptation to redox conditions 20210516
utogig1 <- function(pdf = FALSE) {
if(pdf) pdf("Figure_1.pdf", width = 12, height = 8)
layout(matrix(1:2, nrow = 1), widths = c(1.5, 0.7))
par(mar = c(2, 0, 1, 32))
# Read and rotate the tree (Class I at bottom) 20220629
methanogen_tree <- read.tree(system.file("extdata/utogig/methanogen_tree.txt", package = "JMDplots"))
labels <- rev(methanogen_tree$tip.label)
methanogen_tree <- rotateConstr(methanogen_tree, labels)
# Make the tree with colors to distinguish Class I and Class II
edge.color <- c(rep(2, 37), rep(4, 37))
pp <- plot.phylo(methanogen_tree, root.edge = TRUE, edge.color = edge.color, show.tip.label = FALSE, rotate.tree = 90)
# Add tip labels 20220531
labels <- gsub("_", " ", labels)
FStxt <- "sp. FS406-22"
iFS <- grep(FStxt, labels, fixed = TRUE)
labels <- gsub(FStxt, "", labels, fixed = TRUE)
dy <- -0.25
text(3100, 1:36 + dy, labels, font = 3, adj = c(0, 0), xpd = NA)
text(4170, iFS + dy, hyphen.in.pdf(FStxt), adj = c(0, 0), xpd = NA)
# Calculate Zc from protein formulas
methanogen_AA <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
Zc <- ZC(protein.formula(methanogen_AA))
# Add labels
text(0, 32, "Class II", font = 2, adj = 0, cex = 1.2)
text(0, 7, "Class I", font = 2, adj = 0, cex = 1.2)
text(3200, -2.4, Zclab, adj = c(0, 0), xpd = NA)
label.figure("(a)", cex = 1.6, font = 2, yfrac = 0.97)
# Setup a new plot in the empty space between the tree and names
par(new = TRUE)
par(mar = c(2, 9, 1, 18))
plot(range(Zc), c(1, length(Zc)), type = "n", ylab = "", axes = FALSE)
axis(1, at = seq(-0.25, -0.15, 0.01), labels = FALSE, tcl = -0.3)
axis(1, at = c(-0.25, -0.2, -0.15))
axis(3, at = seq(-0.25, -0.15, 0.01), labels = FALSE, tcl = -0.3)
# Plot guidelines 20220405
abline(v = c(-0.25, -0.20, -0.15), lty = 2, col = 8)
# Plot lines from Zc to organism name
for(i in 1:36) lines(c(rev(Zc)[i], -0.145), c(i, i), lty = 3, col = "gray40")
# Plot the Zc
points(Zc, rev(c(iII, iI)), pch = mpch, col = mcol, bg = mbg, cex = 1.2)
# Zc-Topt plot 20220224
par(mar = c(9, 2, 7, 1))
# Read Topt
dat <- read.csv(system.file("extdata/utogig/Topt.csv", package = "JMDplots"))
# Append Zc column
methanogen_AA <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
Zc <- ZC(protein.formula(methanogen_AA))
dat$Zc <- Zc[match(dat$species, methanogens)]
# Use par(xpd = NA) to show the y-axis label 20220401
par(xpd = NA)
plot(dat$Topt, dat$Zc, pch = mpch, col = mcol, bg = mbg, xlab = Toptlab, ylab = Zclab, ylim = c(-0.26, -0.14))
par(xpd = FALSE)
text(50, -0.24, "Class I", font = 2)
text(40, -0.145, "Class II", font = 2)
# Uncomment this to check the alignment of separate text items below
#text(85, -0.168, "High-Zc\nthermophiles\n(Class I)", col = 2)
text(85, -0.163, bquote(.(hyphen.in.pdf("High-"))*italic(Z)[C]))
text(85, -0.1705, "thermophiles\n(Class I)")
text(80, -0.257, "Methanocaldococcus", font = 3)
# Add convex hulls 20220401
dat2 <- dat[iII, ]
i2 <- chull(dat2$Topt, dat2$Zc)
polygon(dat2$Topt[i2], dat2$Zc[i2], col = "#90CAF960", border = NA)
dat1 <- dat[setdiff(iI, which(iout)), ]
i1 <- chull(dat1$Topt, dat1$Zc)
polygon(dat1$Topt[i1], dat1$Zc[i1], col = "#E5737360", border = NA)
label.figure("(b)", cex = 1.6, font = 2, yfrac = 0.85, xfrac = -0.05)
if(pdf) dev.off()
}
# Relative stabilities of organic compounds depend on redox conditions 20211109
utogig2 <- function(pdf = FALSE, logact = -3) {
if(missing(logact)) {
if(pdf) pdf("Figure_2.pdf", width = 7, height = 6)
mat <- matrix(c(1,1,2,2,3,3,4, 0,5,5,5,5,0,0), byrow = TRUE, nrow = 2)
layout(mat, heights = c(1, 0.7), widths = c(1.1,1,1,1,1,1,1))
# Activity of H2 to be used with logact = -3
# (Second plot has zero slope)
logaH2s <- c(-5, -10.262, -15)
} else {
if(pdf) pdf("Figure_S3.pdf", width = 7, height = 3.5)
mat <- matrix(c(1,1,2,2,3,3,4), byrow = TRUE, nrow = 1)
layout(mat, widths = c(1.1,1,1,1,1,1,1))
# Activity of H2 to be used with logact = -6
# (Second plot has zero slope)
logaH2s <- c(-5, -10.351, -15)
}
par(mar = c(4, 4, 0.5, 1), mgp = c(2.7, 1, 0))
# Set temperature
T <- 25
# Use seawater composition from Amend & Shock (1998)
# NOTE: H2S is present but is not in any formation reactions 20220403
basis(c("CO2", "H2", "NH4+", "H2O", "H2S", "H+"), c(Seawater.AS98$CO2, Seawater.AS98$H2, Seawater.AS98$"NH4+", 0, Seawater.AS98$H2S, -Seawater.AS98$pH))
# Position of slope text
slopey <- c(-112, -160, -50)
# Place to store p-values
pvals <- numeric()
# Loop over logaH2 activities
for(ilogaH2 in c(1, 2, 3)) {
logaH2 <- logaH2s[ilogaH2]
basis("H2", logaH2)
# Start plot
xlim <- c(-4, 3)
ylim <- c(-310, 100)
if(ilogaH2 == 1) ylab <- Alab
if(ilogaH2 > 1) {
ylab <- ""
par(mar = c(4, 3, 0.5, 1))
}
plot(xlim, ylim, type = "n", xlab = Zclab, ylab = ylab, xaxt = "n")
axis(1, gap.axis = 0)
abline(h = 0, lty = 4)
pchs <- c(21:25, 20)
cols <- adjustcolor(c(7, 5, 8, 5, 3, 1), alpha.f = 0.69)
# Loop over groups of species
# Start with n-carboxylic acids (i = 2)
AC_all <- Zc_all <- numeric()
for(i in c(2, 1, 3, 4, 5, 6)) {
# Load species
myspecies <- specieslist[[i]]
species(myspecies, logact)
# Calculate affinities
a <- affinity(T = T)
A <- unlist(a$values)
# Convert to Gibbs energy (J/mol)
TK <- convert(T, "K")
A <- -convert(A, "G", TK)
# Convert to kJ/mol
A <- A / 1000
# Divide A by number of carbon
nC <- sapply(makeup(info(myspecies)), "[", "C")
AC <- A / nC
# Calculate carbon oxidation state
Zc <- ZC(info(myspecies))
# Adjust point size for n-carboxylic acids
cex <- 1
if(i == 2) cex <- 1.5
if(i < 6) points(Zc, AC, pch = pchs[i], bg = cols[i], cex = cex)
else points(Zc, AC, pch = pchs[i], col = cols[i], cex = cex)
AC_all <- c(AC_all, AC)
Zc_all <- c(Zc_all, Zc)
}
# Calculate linear fit
thislm <- lm(AC_all ~ Zc_all)
x <- range(Zc_all)
y <- predict.lm(thislm, data.frame(Zc_all = x))
# Plot linear fit
lines(x, y, lty = 2, lwd = 2, col = 8)
# Add legend with slope
slope <- thislm$coefficients[[2]]
word <- "Slope"
if(ilogaH2 == 1) {
word <- "linear fit"
text(-1.53, -95, "Slope of")
}
slopetxt <- bquote(.(word) == .(formatC(slope, digits = 3, format = "f")))
if(round(slope, 3) == 0) slopetxt <- "Slope = 0"
text(-0.5, slopey[ilogaH2], slopetxt)
# Show R2 20220915
R2 <- summary(thislm)$r.squared
R2_txt <- bquote(italic(R)^2 == .(formatC(R2, digits = 2, format = "f")))
text(-0.5, slopey[ilogaH2] - 20, R2_txt)
# Calculate p-value 20220915
pvals <- c(pvals, cor.test(AC_all, Zc_all)$p.value)
# Add title: logaH2
Htxt <- bquote(bold(log~bolditalic(a)*H[2] == .(logaH2)))
title(Htxt, line = -1, cex.main = 1)
if(ilogaH2 == 3) {
# Add oxidation-intermediate-reduction arrow 20220221
par(xpd = NA)
x1 <- -21.5
x2 <- 0
dx <- 3.2
rect(x1 - dx, -317, x2 + dx, -227, col = "white", lty = 3)
x1.1 <- x1 + 6
x2.1 <- x2 - 6
arrows(x1.1, -300, x1, -300, col = "#D50000", length = 0.1, lwd = 2)
arrows(x2.1, -300, x2, -300, col = "#366BFB", length = 0.1, lwd = 2)
lines(c(x1.1, x2.1), c(-300, -300), col = 8, lwd = 2)
# Uncomment this to check the alignment of separate text items below
#text(x1, -288, "Reducing conditions:\nReduced compounds\nare relatively stable", adj = c(0.5, 0), col = 2)
text(x1 + 1, -248, "Reducing conditions:", adj = c(0.5, 0), font = 3)
text(x1 + 1, -288, "Reduced compounds are\nmore stable than oxidized ones", adj = c(0.5, 0))
#text(x2, -288, "Oxidizing conditions:\nOxidized compounds\nare relatively stable", adj = c(0.5, 0), col = 2)
text(x2 - 1, -248, "Oxidizing conditions:", adj = c(0.5, 0), font = 3)
text(x2 - 1, -288, "Oxidized compounds are\nmore stable than reduced ones", adj = c(0.5, 0))
#text(mean(c(x1, x2)), -288, "Intermediate conditions:\nReduced and oxidized compounds\nare approximately equally stable", adj = c(0.5, 0), col = 2)
text(mean(c(x1, x2)), -248, "Intermediate conditions:", adj = c(0.5, 0), font = 3)
text(mean(c(x1, x2)), -288, "Reduced and oxidized compounds\nare approximately equally stable", adj = c(0.5, 0))
par(xpd = FALSE)
}
if(ilogaH2 == 1) {
if(missing(logact)) {
text(-3.1, 72, quote(CH[4]))
label.figure("(a)", cex = 1.5, font = 2, yfrac = 0.97)
} else {
text(-3.1, 90, quote(CH[4]))
}
}
}
# Add legend 20220407
plot.new()
par(mar = c(0, 0, 0, 0))
par(xpd = NA)
legend <- paste0(" ", names(specieslist))
legend(-0.8, 0.7, legend, pch = pchs, pt.cex = c(1, 1.5, 1, 1, 1, 1),
pt.bg = cols, col = c(1, 1, 1, 1, 1, cols[6]), bty = "n", y.intersp = 1.2)
dT <- describe.property("T", T)
dP <- describe.property("P", 1)
dbasis <- describe.basis(ibasis = c(1, 3, 6))
legend("topright", legend = c(dT, dP, dbasis), bty = "n", y.intersp = 1.2)
par(xpd = FALSE)
if(missing(logact)) {
# Plot intermediate logaH2 20220401
intermediate_logaH2(logact = logact)
# Add Psat legend 20220629
legend("bottomright", legend = quote(bolditalic(P)[bold(sat)]), bty = "n")
label.figure("(b)", cex = 1.5, font = 2)
}
if(pdf) dev.off()
# Print p-values
if(missing(logact)) {
print("p-values for (a):")
} else {
print("p-values:")
}
print(signif(pvals, 2))
}
# Thermodynamic model for methanogen niche partitioning 20220402
utogig3 <- function(pdf = FALSE) {
if(pdf) pdf("Figure_3.pdf", width = 7, height = 5)
mat <- matrix(c(1,1,2,2,3,3,4, 5,5,5,6,6,6,0), nrow = 2, byrow = TRUE)
layout(mat, widths = c(1.2,1,1,1,1,1,1.2))
par(mar = c(4, 4, 0.5, 1), mgp = c(2.7, 1, 0))
# Define basis species and temperature
basis(c("CO2", "H2", "NH4+", "H2O", "H2S", "H+"), c(Seawater.AS98$CO2, Seawater.AS98$H2, Seawater.AS98$"NH4+", 0, Seawater.AS98$H2S, -Seawater.AS98$pH))
T <- 25
# Calculate Zc of methanogen proteomes excluding high-Zc outliers
methanogen_AA <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
Zc <- ZC(protein.formula(methanogen_AA[!iout, ]))
# Add proteins to CHNOSZ
ip <- add.protein(methanogen_AA[!iout, ])
pl <- protein.length(ip)
# Create combinations between Class I and II methanogens
# Class I methanogens excluding high-Zc outliers
iIin <- setdiff(1:length(ip), iII)
ncomb <- length(iIin) * length(iII)
i1 <- rep(iIin, length.out = ncomb)
i2 <- rep(iII, each = length(iIin))
## Plot A: affinities at specified logaH2
logaH2s <- c(-5, -7.467, -10)
for(i in 1:3) {
if(i == 1) ylab <- Aresiduelab
if(i > 1) {
ylab <- ""
par(mar = c(4, 2.5, 0.5, 1))
}
basis("H2", logaH2s[i])
# Calculate affinities
a <- affinity(iprotein = ip, T = T)
A <- unlist(a$values)
# Convert to Gibbs energy (J/mol)
TK <- convert(T, "K")
A <- -convert(A, "G", TK)
# Convert to kJ/mol
A <- A / 1000
# Calculate affinity per residue
Ares <- A / pl
plot(Zc, Ares, pch = mpch[!iout], col = mcol[!iout], bg = mbg[!iout], xlab = Zclab, ylab = ylab)
# Percentage of combinations that favor Class I over Class II
p1 <- round(sum(sign(Ares[i1] - Ares[i2]) == 1) / ncomb * 100)
legend <- bquote(.(p1)*"%"~italic(A)[I] > italic(A)[II])
if(i < 3) inset <- c(-0.1, 0) else inset <- c(0.05, 0)
legend("bottomleft", legend = legend, bty = "n", inset = inset)
if(i == 1) legend("bottomleft", c("Pairwise affinity", "differences:", ""), inset = c(-0.1, 0), bty = "n")
# Add legend: logaH2
Htxt <- bquote(bold(log~bolditalic(a)*H[2] == .(logaH2s[i])))
if(i > 1) legend.x <- "topleft" else legend.x <- "top"
legend(legend.x, legend = Htxt, bty = "n")
if(i==1) label.figure("(a)", cex = 1.5, font = 2, yfrac = 0.96, xfrac = 0.06)
}
# Add legend 20220403
plot.new()
par(mar = c(0, 0, 0, 0))
par(xpd = NA)
legend <- c("Class II", "Class I")
legend(-0.6, 0.50, legend = legend, pch = c(22, 21), col = c(col4, col2), pt.bg = c(col4, "white"), bty = "n", y.intersp = 1.2)
legend(-0.45, 0.35, legend = c("Methanocaldococcus"), pch = 21, col = col2, pt.bg = col2, text.font = 3, bty = "n", cex = 0.9, pt.cex = 1)
dT <- describe.property("T", T)
dP <- describe.property("P", 1)
dbasis <- describe.basis(ibasis = c(1, 3, 5, 6))
legend("topright", legend = c(dT, dP, dbasis), bty = "n", y.intersp = 1.1)
par(xpd = FALSE)
## Plot B: Contour plot for affinity differences between Class I and Class II proteomes 20220402
# Calculate affinities as a function of logaH2 and T
par(mar = c(4, 4, 1, 2.2))
H2 <- c(-10, -1.5, 200)
T <- c(20, 100, 200)
a <- affinity(T = T, H2 = H2, iprotein = ip)
# Calculate affinity per residue
Ares <- mapply("/", a$values, pl, SIMPLIFY = FALSE)
# Calculate sign of difference between all combinations
Ares.diff <- mapply("-", Ares[i1], Ares[i2], SIMPLIFY = FALSE)
Ares.diff.sign <- sapply(Ares.diff, sign, simplify = FALSE)
Ares.diff.positive <- sapply(Ares.diff.sign, "==", y = 1, simplify = FALSE)
# Sum to get percentage of combinations that favor Class I over Class I
Ares.diff.positive.sum <- Reduce("+", Ares.diff.positive)
p1 <- Ares.diff.positive.sum / ncomb * 100
levels <- c(10, 50, 100)
contour(a$vals$T, a$vals$H2, p1, xlab = Tlab, ylab = logaH2lab, xaxs = "i", yaxs = "i",
levels = c(10, 50, 100), drawlabels = FALSE,
col = c("#366BFB", 1, "#D50000"))
# Plot intermediate logaH2 background
intermediate_logaH2(add = TRUE, lines = FALSE, pH = FALSE, NH4 = FALSE, redox = FALSE)
# Replot contour lines to make them stand out
contour(a$vals$T, a$vals$H2, p1, add = TRUE,
levels = c(10, 50, 100), drawlabels = FALSE,
col = c("#366BFB", 1, "#D50000"))
arrows(60, -7.8, 60, -4.6, length = 0.1, code = 3)
text(55, -5.6, "More reducing", adj = 1)
text(55, -7.9, "Less reducing", adj = 1)
text(80, -2.6, "Class I proteomes\nare most stable", cex = 0.8)
text(80, -4.5, "Class I proteomes\nare more stable", cex = 0.8)
text(80, -6.8, "Class II proteomes\nare more stable", cex = 0.8)
# Add contour labels in margin 20220407
CL <- contourLines(a$vals$T, a$vals$H2, p1, levels = c(10, 50, 100))
par(xpd = NA)
text(101, tail(CL[[1]]$y, 1), "10%", adj = 0, col = "#366BFB")
text(101, tail(CL[[2]]$y, 1), "50%", adj = 0, col = 1)
text(101, tail(CL[[3]]$y, 1), "100%", adj = 0, col = "#D50000")
par(xpd = FALSE)
label.figure("(b)", cex = 1.5, font = 2, yfrac = 0.97)
## Plot C: logaH2-T plot for habitable niches 20220303
par(mar = c(4, 2.5, 1, 2.5))
plot(T[1:2], H2[1:2], xlab = Tlab, ylab = "", type = "n", xaxs = "i", yaxs = "i")
# Fill area for habitable region
pu <- par("usr")
rect(pu[1], pu[3], pu[2], pu[4], col = "#C5E1A580")
# Add line for mixed fluids at Rainbow 20220303
srt <- 11
Rainbow <- read.csv(system.file("extdata/mjenergy/SC10_Rainbow.csv", package = "JMDplots"))
Tvals <- Rainbow$T
logaH2.max <- Rainbow$H2
polygon(c(Tvals, rev(Tvals)), c(logaH2.max, rep(par("usr")[4], length(Tvals))), col = "gray80", lty = 0)
lines(Tvals, logaH2.max, lty = 2)
text(Tvals[14], logaH2.max[14], "Rainbow", srt = 0.7 * srt, adj = c(0, 1.3), col = "green4")
# Add logaCO2 = logaCH4 line 20220223
Tvals <- seq(par("usr")[1], par("usr")[2], length.out = 100)
logaH2.min <- subcrt(c("CO2", "H2", "CH4", "H2O"), c(-1, -4, 1, 2), T = Tvals)$out$logK / -4
polygon(c(Tvals, rev(Tvals)), c(logaH2.min, rep(par("usr")[3], length(Tvals))), col = "gray80", lty = 0)
lines(Tvals, logaH2.min, lty = 3)
box()
text(83, -6.4, quote(italic(a)*CH[4] / italic(a)*CO[2] == 10^0), srt = srt)
# Add lines for aCO2/aCH4 = 10^2 and 10^4 20220303
lines(Tvals[33:100], logaH2.min[33:100] + 2, lty = 3)
text(94, -4.1, quote(10^2), srt = srt)
lines(Tvals, logaH2.min + 4, lty = 3)
text(94, -2.0, quote(10^4), srt = srt)
# Add box for Lost City 20220303
# 40 to 90 °C (Kelley et al., 2005)
x1 <- 40
x2 <- 90
# 1 to 15 mmol/kg H2 (Kelley et al., 2005)
y1 <- log10(1e-3)
y2 <- log10(15e-3)
rect(x1, y1, x2, y2, col = "white", lty = 0)
rect(x1, y1, x2, y2, col = "#C5E1A540", border = "green4")
text(mean(c(x1, x2)), mean(c(y1, y2)), "Lost City", col = "green4")
# Add lines for sediments (Lovley & Goodwin, 1988) 20220303
sediment <- read.csv(system.file("extdata/utogig/LG88_Fig1.csv", package = "JMDplots"))
for(i in 1:nrow(sediment)) {
if(sediment$type[i] == "Methane") col <- "green4" else col <- "gray40"
if(sediment$type[i] == "Methane") lty <- 1 else lty <- 2
lines(c(15, 25), rep(log10(sediment$H2[i]*1e-9), 2), col = col, lty = lty)
}
# Add affinity contours 20220403
lines(as.data.frame(do.call(cbind, CL[[1]][c("x", "y")])), col = "#366BFB")
lines(as.data.frame(do.call(cbind, CL[[2]][c("x", "y")])), col = 1)
lines(as.data.frame(do.call(cbind, CL[[3]][c("x", "y")])), col = "#D50000")
# Add labels 20220221
text(21, -6.4, "Methanogenic\nsediments", adj = 0, cex = 0.9, col = "green4")
lines(c(26, 22), c(-7, -7.9), col = "green4")
text(40, -9.5, hyphen.in.pdf("Non-methanogenic sediments"), adj = 0, cex = 0.9, col = "gray40")
lines(c(26, 39), c(-9, -9.5), col = "gray40")
par(xpd = NA)
lines(c(100, 130), c(-2.34, -2.34), lty = 2)
lines(c(100, 130), rep(tail(logaH2.min, 1), 2), lty = 3)
text(132, -4, "Habitable\nniches", font = 3, col = "green4", adj = 0)
arrows(130,-2.34, 130, tail(logaH2.min, 1), code = 3, col = "green4", length = 0.1)
text(101, tail(CL[[1]]$y, 1), quote("10%"~A[I] > A[II]), adj = 0, col = "#366BFB")
text(101, tail(CL[[2]]$y, 1), quote("50%"~A[I] > A[II]), adj = 0, col = 1)
text(101, tail(CL[[3]]$y, 1), quote("100%"~A[I] > A[II]), adj = 0, col = "#D50000")
par(xpd = FALSE)
label.figure("(c)", cex = 1.5, font = 2, yfrac = 0.97, xfrac = -0.02)
if(pdf) dev.off()
}
# Chemical and thermodynamic analysis of evolutionary divergence along redox gradients 20220601
utogig4 <- function(pdf = FALSE) {
if(pdf) pdf("Figure_4.pdf", width = 9, height = 5)
layout(matrix(1:8, nrow = 2), widths = c(2.5, 4, 4, 2.5))
par(mar = c(4, 4, 3, 1))
# Faded colors
col4 <- adjustcolor(4, alpha.f = 0.69)
col2 <- adjustcolor(2, alpha.f = 0.69)
# y-axis limits for boxplots
ylim <- c(-0.25, -0.10)
# Axis limits for affinity plots
xlims <- list(c(-4, -12), c(0, -15), c(0, -15))
ylims <- list(c(0, 80), c(0, 50), c(0, 50))
# Where to draw transition line
trans <- list(c("Class I", "Class II"), c("Nif-B", "Nif-A"), c("Basal", "Terrestrial"))
# Define basis species and temperature
basis(c("CO2", "H2", "NH4+", "H2O", "H2S", "H+"), c(Seawater.AS98$CO2, Seawater.AS98$H2, Seawater.AS98$"NH4+", 0, Seawater.AS98$H2S, -Seawater.AS98$pH))
T <- 25
# Calculate logK for H2 = 2H+ + 2e- for conversion to Eh
logK <- subcrt(c("H2", "H+", "e-"), c(-1, 2, 2), T = T)$out$logK
pH <- Seawater.AS98$pH
# Adjustments for label position
dx <- list(c(0, -0.3), c(3.83, 0, -5, -1.5), c(0.2, -0.1, 2, -3))
dy <- list(c(2, 1), c(-1.3, 0.7, 0, 1), c(1.5, 3, 0, -6))
# Place to keep logaH2 for printing 20220920
logaH2s <- numeric()
for(i in 1:3) {
if(i == 1) {
# Methanogen proteomes 20220424
# Read amino acid composition and compute Zc
aa <- read.csv(system.file("extdata/utogig/methanogen_AA.csv", package = "JMDplots"))
# Indices of Class I and Class II methanogens
iI <- 20:36
iII <- 1:19
col <- c(col2, col4)
lcol <- c(2, 4)
# Make Zc plot
Zc <- ZC(protein.formula(aa))
Zclist <- list("Class I" = Zc[iI], "Class II" = Zc[iII])
names(Zclist) <- paste0(names(Zclist), "\n(", sapply(Zclist, length), ")")
bp <- boxplot(Zclist, ylab = Zclab, col = col, ylim = ylim, names = character(2))
axis(1, at = 1:2, labels = names(Zclist), line = 1, lwd = 0)
add_cld(Zclist, bp)
text(1, -0.12, "Anoxic\nhabitats", font = 2, cex = 0.8)
text(2, -0.12, "Anoxic\nand oxic\nhabitats", font = 2, cex = 0.8)
abline(v = 1.5, lty = 2, lwd = 1.5, col = 8)
title("Methanogens\n(Lyu & Lu, 2018)", font.main = 1, cex.main = 1)
label.figure("(a)", cex = 1.5, font = 2, xfrac = 0.06)
# Get the species in each group
groups <- list("Class I" = iI, "Class II" = iII)
# Calculate P-values using parametric and non-parametric tests 20220913
Ptab1 <- KWvsANOVA(Zclist)
Ptab1 <- cbind(Variable = "Methanogens Zc", Ptab1)
}
if(i == 2) {
# Nif-encoding genomes 20220531
np <- NifProteomes()
Zclist <- np$ZClist
col <- c(col2, col2, col2, col4)
lcol <- c(2, 2, 2, 4)
bp <- boxplot(Zclist, ylab = Zclab, col = col, ylim = ylim, names = character(4))
names(Zclist) <- paste0(names(Zclist), "\n(", sapply(Zclist, length), ")")
axis(1, at = 1:4, labels = hyphen.in.pdf(names(Zclist)), line = 1, lwd = 0)
# Names with "-" confuse multcompLetters4()
names(Zclist) <- gsub("-", "", names(Zclist))
add_cld(Zclist, bp)
text(2, -0.12, "Anaerobic", font = 2, cex = 0.8)
text(4.1, -0.242, "Anaerobic\nand aerobic", font = 2, cex = 0.8)
abline(v = 3.5, lty = 2, lwd = 1.5, col = 8)
title(hyphen.in.pdf("Nif-encoding genomes\n(Poudel et al., 2018)"), font.main = 1, cex.main = 1)
# Get the amino acid compositions and species in each group
aa <- np$AA
groupnames <- np$types
groups <- sapply(groupnames, function(group) aa$protein == group, simplify = FALSE)
Ptab2 <- KWvsANOVA(Zclist)
Ptab2 <- cbind(Variable = "Nif-encoding Zc", Ptab2)
}
if(i == 3) {
# Thaumarchaeota 20220414
# Amino acid compositions of predicted (Glimmer) and database (NCBI or IMG) proteomes
predicted <- read.csv(system.file("extdata/utogig/Thaumarchaeota_predicted_AA.csv", package = "JMDplots"))
database <- read.csv(system.file("extdata/utogig/Thaumarchaeota_database_AA.csv", package = "JMDplots"))
# If both are available, use predicted instead of database
aa <- rbind(predicted, database)
aa <- aa[!duplicated(aa$organism), ]
groupnames <- c("Basal", "Terrestrial", "Shallow", "Deep")
## Colors from Ren et al. (2019)
#col <- c("#b2427e", "#c78d55", "#00a06f", "#4085c3")
col <- c(col2, col4, col4, col4)
lcol <- c(2, 4, 4, 4)
Zc <- Zc(aa)
Zclist <- lapply(groupnames, function(g) Zc[aa$protein == g])
names(Zclist) <- groupnames
names(Zclist) <- paste0(names(Zclist), "\n(", sapply(Zclist, length), ")")
bp <- boxplot(Zclist, ylab = Zclab, col = col, ylim = ylim, names = character(4))
add_cld(Zclist, bp)
axis(1, at = 1:4, labels = names(Zclist), line = 1, lwd = 0, gap.axis = 0)
text(0.9, -0.12, hyphen.in.pdf("Pre-GOE\norigin"), font = 2, cex = 0.8)
text(2.1, -0.12, hyphen.in.pdf("Post-GOE\norigin"), font = 2, cex = 0.8)
abline(v = 1.5, lty = 2, lwd = 1.5, col = 8)
title("Thaumarchaeota\n(Ren et al., 2019)", font.main = 1, cex.main = 1)
# Get the species in each group
groups <- sapply(groupnames, function(group) aa$protein == group, simplify = FALSE)
Ptab3 <- KWvsANOVA(Zclist)
Ptab3 <- cbind(Variable = "Thaumarchaeota Zc", Ptab3)
}
# Make affinity rank plot 20220602
# Save graphical settings that are modified by thermo.plot.new()
opar <- par(c("mar", "mgp", "tcl", "las", "xaxs", "yaxs"))
# Start plot
thermo.plot.new(xlim = xlims[[i]], ylim = ylims[[i]], xlab = logaH2lab, ylab = "Mean rank of per-residue affinity (%)", yline = par("mgp")[1] + 0.3)
# Color reducing and oxidizing areas from organic compounds 20220621
file <- "H2_intermediate_-3.csv"
dat <- read.csv(file.path(system.file("extdata/utogig", package = "JMDplots"), file))
# Just use 25 degC values
dat <- dat[dat$T==25, ]
x <- dat[, "T", drop = FALSE]
# Make rectangles
rect(xlims[[i]][1], ylims[[i]][1], dat$hiN_hipH, ylims[[i]][2], col = "#E5737360", border = NA)
rect(xlims[[i]][2], ylims[[i]][1], dat$loN_lopH, ylims[[i]][2], col = "#90CAF960", border = NA)
rect(dat$hiN_hipH, ylims[[i]][1], dat$loN_lopH, ylims[[i]][2], col = "#9E9E9E60", border = NA)
rect(dat$hiN_lopH, ylims[[i]][1], dat$loN_hipH, ylims[[i]][2], col = "#9E9E9E90", border = NA)
# Load proteins and calculate affinity
ip <- add.protein(aa, as.residue = TRUE)
a <- affinity(H2 = xlims[[i]], iprotein = ip, T = T)
# Calculate normalized sum of ranks for each group and make diagram
arank <- rank.affinity(a, groups)
names <- hyphen.in.pdf(names(groups))
diagram(arank, col = lcol, lty = 1, lwd = 1.5, dx = dx[[i]], dy = dy[[i]], names = names, add = TRUE)
par(opar)
if(i == 1) label.figure("(b)", cex = 1.5, font = 2, xfrac = 0.06)
# Draw line at transition
itrans <- which.min(abs(arank$values[[trans[[i]][1]]] - arank$values[[trans[[i]][2]]]))
logaH2 <- arank$vals$H2[itrans]
ytop1 <- ylims[[i]][2] + diff(ylims[[i]]) * 0.05
#A <- arank$values[[trans[[i]][1]]][itrans]
#lines(c(logaH2, logaH2), c(A, ytop1), lty = 2, lwd = 1.5, col = 8, xpd = NA)
ybot <- ylims[[i]][1]
lines(c(logaH2, logaH2), c(ybot, ytop1), lty = 2, lwd = 1.5, col = 8, xpd = NA)
# Calculate pe and Eh (mV)
pe <- -pH - 0.5*logaH2 - 0.5*logK
Eh <- 1000 * convert(pe, "Eh", pH = pH)
Ehtext <- paste(round(Eh), "mV")
ytop2 <- ylims[[i]][2] + diff(ylims[[i]]) * 0.1
text(logaH2, ytop2, Ehtext, xpd = NA)
logaH2s <- c(logaH2s, logaH2)
}
# Thaumarchaeota nH2O plot 20220622
nH2O <- nH2O(aa)
nH2Olist <- lapply(groupnames, function(g) nH2O[aa$protein == g])
names(nH2Olist) <- groupnames
names(nH2Olist) <- paste0(names(nH2Olist), "\n(", sapply(nH2Olist, length), ")")
ylim <- range(nH2O)
bp <- boxplot(nH2Olist, ylab = nH2Olab, col = col, ylim = ylim, names = character(4))
add_cld(nH2Olist, bp)
# Make rotated labels (modified from https://www.r-bloggers.com/rotated-axis-labels-in-r-plots/)
text(x = 1:4, y = par()$usr[3] - 2 * strheight("A"), labels = groupnames, srt = 45, adj = 1, xpd = TRUE)
axis(1, at = 1:4, labels = NA)
title("Thaumarchaeota\n(Ren et al., 2019)", font.main = 1, cex.main = 1)
label.figure("(c)", cex = 1.5, font = 2, xfrac = 0.06)
Ptab4 <- KWvsANOVA(nH2Olist)
Ptab4 <- cbind(Variable = "Thaumarchaeota nH2O", Ptab4)
# Add legend
plot.new()
par(mar = c(0, 0, 0, 0))
par(xpd = NA)
dT <- describe.property("T", T)
dP <- describe.property("P", 1)
dbasis <- describe.basis(ibasis = c(1, 3, 5, 6))
legend("left", legend = c(dT, dP, dbasis), bty = "n", y.intersp = 1.5, inset = c(-0.5, 0))
par(xpd = FALSE)
if(pdf) dev.off()
# Print logaH2s
print("logaH2 at dashed lines in (b):")
print(round(logaH2s, 1))
# Return P-value table 20220913
out <- rbind(Ptab1, Ptab2, Ptab3, Ptab4)
out$p_Tukey <- signif(out$p_Tukey, 2)
out$p_Dunn <- signif(out$p_Dunn, 2)
invisible(out)
}
# Plot intermediate logaH2 20220220
# Add class argument 20220418
intermediate_logaH2 <- function(class = NULL, add = FALSE, parargs = list(mar = c(4, 4, 1, 1), mgp = c(2.7, 1, 0)),
xlim = c(0, 300), ylim = NULL, lines = TRUE, neutral = TRUE, redox = TRUE, pH = TRUE, NH4 = TRUE, pH.y = -9.4, NH4.y = -6.6, logact = -3) {
if(!add) do.call(par, parargs)
file <- paste0("H2_intermediate_", logact, ".csv")
if(!is.null(class)) file <- paste0("H2_intermediate_", gsub(" ", "", class), "_", logact, ".csv")
dat <- read.csv(file.path(system.file("extdata/utogig", package = "JMDplots"), file))
x <- dat[, "T", drop = FALSE]
y <- dat[, 2:5]
if(!add) matplot(x, y, type = "n", xlab = Tlab, ylab = logaH2lab, xlim = xlim, ylim = ylim, xaxs = "i")
# Fill area between lines
polygon(c(dat$T, rev(dat$T)), c(dat$loN_lopH, rev(dat$hiN_hipH)), border = NA, col = "#9E9E9E60")
polygon(c(dat$T, rev(dat$T)), c(dat$hiN_lopH, rev(dat$loN_hipH)), border = NA, col = "#9E9E9E90")
# Fill reducing and oxidizing regions 20220401
top <- rep(par("usr")[4], length(dat$T))
polygon(c(dat$T, rev(dat$T)), c(top, rev(dat$hiN_hipH)), border = NA, col = "#E5737360")
bot <- rep(par("usr")[3], length(dat$T))
polygon(c(dat$T, rev(dat$T)), c(bot, rev(dat$loN_lopH)), border = NA, col = "#90CAF960")
# Don't plot ends of lines 20220401
Tlim <- c(10, 290)
if(!is.null(class)) Tlim <- c(17, 270)
if(pH) dat <- dat[dat$T > Tlim[1], ]
if(NH4) dat <- dat[dat$T < Tlim[2], ]
x <- dat[, "T", drop = FALSE]
if(neutral) {
y <- dat[, 2:7]
# dashed/solid lines for lo/hi N
lty <- c(2, 2, 1, 1, 2, 1)
col <- c(1, 1, 1, 1, 6, 6)
} else {
y <- dat[, 2:5]
lty <- c(2, 2, 1, 1)
col <- c(1, 1, 1, 1)
}
if(lines) {
matlines(x, y, lty = lty, col = col)
}
if(pH) {
# Add pH labels at low T
pHdat <- dat[1, ]
pHlab <- character(ncol(pHdat))
pHlab[grep("lopH", colnames(pHdat))] <- 3
pHlab[grep("hipH", colnames(pHdat))] <- 9
pHdat.x <- 10
pH.x <- 8
dy <- -0.1
if(!is.null(class)) {
pHdat.x <- c(14, 7, 14, 7)
pH.x <- 12
dy <- c(-0.1, -0.6, -0.1, -0.6)
}
text(pHdat.x, pHdat + dy, pHlab)
text(pH.x, pH.y, "pH", font = 2)
}
if(NH4) {
# Add logaNH4+ labels at high T
NH4dat <- dat[nrow(dat), ]
NH4lab <- character(ncol(NH4dat))
NH4lab[grep("loN_", colnames(NH4dat))] <- -8
NH4lab[grep("hiN_", colnames(NH4dat))] <- -3
NH4dat.x <- 293
NH4.x <- 278
dy <- 0
if(!is.null(class)) {
NH4dat.x <- c(278, 290, 290, 278)
NH4.x <- 270
dy <- c(0, 0.5, 0.5, 0)
}
text(NH4dat.x, NH4dat + dy, NH4lab)
text(NH4.x, NH4.y, logaNH4lab, font = 2)
}
if(redox) {
# Add more/less oxidizing/reducing labels 20220401
arrows(100, -12.2, 100, -9.8, length = 0.1, code = 3)
text(110, -12, "More oxidizing", adj = 0)
text(110, -10, "Less oxidizing", adj = 0)
arrows(100, -6.6, 100, -3.6, length = 0.1, code = 3)
text(90, -3.8, "More reducing", adj = 1)
text(90, -6.4, "Less reducing", adj = 1)
}
}
# Find intermediate logaH2: where affinity vs Zc has a slope of 0 20220219
# Add class argument (calculate only for single compound class) 20220418
calc_logaH2_intermediate <- function(class = NULL, logact = -3) {
# Put species names into vector
allspecies <- unlist(specieslist)
# Use single compound class if given 20220418
if(!is.null(class)) allspecies <- unlist(specieslist[class])
# Calculate carbon number and oxidation state
ispecies <- suppressMessages(info(allspecies))
nC <- sapply(makeup(ispecies), "[", "C")
Zc <- ZC(ispecies)
# Load basis species with default activities
reset()
basis(c("CO2", "H2", "NH4+", "H2O", "H+"), c(-3, -6, -4, 0, -7))
# Load formed species
species(allspecies, logact)
# Species indices of basis and formed species
basis_and_species <- c(basis()$ispecies, species()$ispecies)
# Function to calculate slope of affinity - Zc linear fit
slopefun <- function(H2, T, sout, plot.it = FALSE) {
# Set H2 activity
basis("H2", H2)
# Calculate affinities
a <- suppressMessages(affinity(T = T, sout = sout))
A <- unlist(a$values)
# Convert to Gibbs energy (J/mol)
TK <- convert(T, "K")
A <- -convert(A, "G", TK)
# Convert to kJ/mol
A <- convert(A, "J") / 1000
# Divide A by number of carbon
AC <- A / nC
# Calculate linear fit
thislm <- lm(AC ~ Zc)
# Get slope
slope <- thislm$coefficients[[2]]
slope
}
# Function to find root at given temperature(s)
unifun <- function(T = 25) {
sapply(T, function(thisT) {
## Calculate affinity to get subcrt output at this temperature
#a <- affinity()
#sout <- a$sout
# Get subcrt output at this temperature
sout <- suppressMessages(subcrt(basis_and_species, T = thisT, property = "logK"))
H2 <- uniroot(slopefun, c(-50, 50), T = thisT, sout = sout)
H2$root
})
}
# Find root for different combinations of activities of non-H2 basis species
T = seq(0, 300, 5)
basis(c("NH4+", "pH"), c(-8, 3))
loN_lopH <- round(unifun(T = T), 6)
basis(c("NH4+", "pH"), c(-8, 9))
loN_hipH <- round(unifun(T = T), 6)
basis(c("NH4+", "pH"), c(-3, 3))
hiN_lopH <- round(unifun(T = T), 6)
basis(c("NH4+", "pH"), c(-3, 9))
hiN_hipH <- round(unifun(T = T), 6)
# Calculate neutral pH
logK <- subcrt(c("H2O", "OH-", "H+"), c(-1, 1, 1), T = T)$out$logK
pH <- logK / -2
# Given lopH == 3, hipH == 9, place neutral pH in the range [lopH, hipH] rescaled to [0, 1]
pHfrac <- (pH - 3) / (9 - 3)
# Calculate the value of logaH2 at neutral pH and low and high N
neutralpH_loN <- round(loN_lopH + pHfrac * (loN_hipH - loN_lopH), 6)
neutralpH_hiN <- round(hiN_lopH + pHfrac * (hiN_hipH - hiN_lopH), 6)
# Save results
out <- data.frame(T, loN_lopH, loN_hipH, hiN_lopH, hiN_hipH, neutralpH_loN, neutralpH_hiN)
file <- paste0("H2_intermediate_", logact, ".csv")
if(!is.null(class)) file <- paste0("H2_intermediate_", gsub(" ", "", class), "_", logact, ".csv")
write.csv(out, file, row.names = FALSE, quote = FALSE)
}
# Comparison of Zc of proteomes predicted by Glimmer and downloaded from NCBI 20220604
utogigS1 <- function(pdf = FALSE) {
if(pdf) pdf("Figure_S1.pdf", width = 4, height = 4)
par(mar= c(4.1, 4.1, 1, 1))
predicted <- read.csv(system.file("extdata/utogig/Thaumarchaeota_predicted_AA.csv", package = "JMDplots"))
database <- read.csv(system.file("extdata/utogig/Thaumarchaeota_database_AA.csv", package = "JMDplots"))
organisms <- intersect(predicted$organism, database$organism)
predicted <- predicted[match(organisms, predicted$organism), ]
database <- database[match(organisms, database$organism), ]
y <- Zc_predicted <- Zc(predicted)
x <- Zc_database <- Zc(database)
# Colors from Ren et al. (2019)
groupcol <- c(Basal = "#b2427e", Terrestrial = "#c78d55", Shallow = "#00a06f", Deep = "#4085c3")
col <- groupcol[match(database$protein, names(groupcol))]
pch <- match(database$protein, names(groupcol)) + 13
plot(Zc_database, Zc_predicted, pch = pch, col = col,
xlab = quote(bolditalic(Z)[bold(C)]~"(NCBI proteins)"), ylab = quote(bolditalic(Z)[bold(C)]~"(Glimmer proteins)"))
# Add legend
icol <- which(!duplicated(col))
legend("topleft", names(col)[icol], col = col[icol], pch = pch[icol])
# Calculate linear regression
thislm <- lm(y ~ x)
xylim <- c(-0.22, -0.10)
lines(xylim, predict(thislm, data.frame(x = xylim)), col = "#00000080")
# Show R2 and slope
R2 <- summary(thislm)$r.squared
R2txt <- bquote(italic(R)^2 == .(formatC(R2, digits = 3, format = "f")))
legend("bottomright", legend = R2txt, bty = "n")
Slope <- coef(thislm)["x"]
Slopetxt <- paste("slope =", formatC(Slope, digits = 3, format = "f"))
legend("bottomright", legend = c(Slopetxt, ""), bty = "n")
if(pdf) dev.off()
}
# Association between redox gradients and Zc of proteins and lipids in alkaline Yellowstone hot springs 20210516
utogigS2 <- function(pdf = FALSE) {
if(pdf) pdf("Figure_S2.pdf", width = 8.5, height = 4)
layout(matrix(1:3, nrow = 1), widths = c(1.07, 1, 0.4))
par(cex = 1)
## Plot A: Proteins
par(mar = c(4, 4.2, 0.5, 1))
plot(c(50, 100), c(-0.22, -0.12), type = "n", xlab = Tlab, ylab = Zclab, xaxs = "i", axes = FALSE)
# Remove leading zeros for better visual parallel with lipids
at <- seq(-0.22, -0.12, 0.02)
labels <- gsub("-0", "-", formatC(at, digits = 2, format = "f"))
axis(2, at, labels)
axis(1)
box()
# Get Zc of Bison Pool proteins aggregated by phylum
aa.phyla <- read.csv(system.file("extdata/bison/DS13.csv", package = "JMDplots"))
Zc <- ZC(protein.formula(aa.phyla))
# Get T values
site <- gsub("bison", "", aa.phyla$protein)
T <- sapply(site, switch, N = 93.3, S = 79.4, R = 67.5, Q = 65.3, P = 57.1)
# Plot lines for each phlum
for(phylum in unique(aa.phyla$organism)) {
iphylum <- aa.phyla$organism == phylum
lines(T[iphylum], Zc[iphylum], col = "gray")
}
# Make different color groups based on temperature
ilo <- T < 60
imid <- T >= 60 & T <= 70
ihi <- T > 70
points(T[ilo], Zc[ilo], pch = 22, bg = col4)
points(T[imid], Zc[imid], pch = 23, bg = col8)
points(T[ihi], Zc[ihi], pch = 21, bg = col2)
# Add labels
text(83, -0.14, "Proteins grouped\nby major phyla")
label.figure("(a)", cex = 1.5, font = 2, xfrac = 0.06)
## Plot B: Lipids
par(mar = c(4, 3, 0.5, 1))
plot(c(25, 100), c(-2.2, -1.2), type = "n", xlab = Tlab, ylab = "", xaxs = "i")
# Add "100" because it doesn't show up 20220408
axis(1, 100, tick = FALSE)
# IPL data from Boyer et al., 2020 Fig. 7
T <- c(29.0, 35.1, 38.1, 40.5, 51.6, 53, 59.8, 60.7, 63.1, 64.8, 70.5, 73.3, 77.3, 80.9, 82.2, 85.4, 89.0, 91.0)
Zc.alkyl <- c(-1.73, -1.71, -1.77, -1.80, -1.83, -1.82, -1.89, -1.90, -1.88, -1.92, -1.92, -1.84, -1.93, -1.89, -1.96, -1.95, -1.92, -1.94)
Zc.IPL <- c(-1.36, -1.33, -1.37, -1.38, -1.44, -1.43, -1.48, -1.54, -1.46, -1.52, -1.54, -1.45, -1.61, -1.53, -1.60, -1.56, -1.56, -1.68)
# Make different color groups based on temperature
ilo <- T < 60
imid <- T >= 60 & T <= 70
ihi <- T > 70
# Plot points
points(T[ilo], Zc.alkyl[ilo], pch = 22, bg = col4)
points(T[imid], Zc.alkyl[imid], pch = 23, bg = col8)
points(T[ihi], Zc.alkyl[ihi], pch = 21, bg = col2)
points(T[ilo], Zc.IPL[ilo], pch = 22, bg = col4)
points(T[imid], Zc.IPL[imid], pch = 23, bg = col8)
points(T[ihi], Zc.IPL[ihi], pch = 21, bg = col2)
# Label molecular groups
text(70, -1.32, "Intact polar lipids")
text(50, -2, "Alkyl chains")
label.figure("(b)", cex = 1.5, font = 2, xfrac = 0.01)
# Add legend 20220221
# Move to right panel 20220408
plot.new()
par(mar = c(0, 0, 0, 0))
par(xpd = NA)
legend <- as.expression(c(quote(" "*italic(T) < 60~degree*C*";"), " less reducing", "",
quote(" Transition zone"), "",
quote(" "*italic(T) > 70~degree*C*";"), " more reducing"))
legend("left", legend = legend, pch = c(22, NA, NA, 23, NA, 21, NA), pt.bg = c(col4, NA, NA, col8, NA, col2, NA),
bty = "n", inset = -1, y.intersp = 1)
par(xpd = FALSE)
if(pdf) dev.off()
}
# logaH2-T plots for different organic compound classes 20220418
utogigS4 <- function(pdf = FALSE) {
# To generate files:
# lapply(names(specieslist), calc_logaH2_intermediate)
if(pdf) pdf("Figure_S4.pdf", width = 6, height = 8)
par(mfrow = c(3, 2))
parargs <- list(mar = c(4, 4, 3, 1), mgp = c(2.7, 1, 0))
ylim <- c(-25, 0)
titlefun <- function(class) title(paste0(class, " (", length(specieslist[[class]]), ")"))
intermediate_logaH2("C1 and C2", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH = FALSE, NH4 = FALSE)
titlefun("C1 and C2")
intermediate_logaH2("Acid", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH = FALSE, NH4 = FALSE)
titlefun("Acid")
intermediate_logaH2("Amino acid", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH.y = -8.5, NH4.y = -10)
titlefun("Amino acid")
intermediate_logaH2("Sugar", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH = FALSE, NH4 = FALSE)
titlefun("Sugar")
intermediate_logaH2("Nucleobase", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, pH.y = -12, NH4.y = -17)
titlefun("Nucleobase")
intermediate_logaH2("TCA cycle", parargs = parargs, ylim = ylim, redox = FALSE, neutral = FALSE, NH4 = FALSE, pH.y = -10)
titlefun("TCA cycle")
if(pdf) dev.off()
}
# Tabulate p-values for
# ANOVA + TukeyHSD (parametric) and
# Kruskal-Wallis + Dunn (non-parametric)
# 20220913
KWvsANOVA <- function(Zclist) {
# Ugly one-liner to turn a list into a data frame with "group" column taken from names of the list elements
Zcdat <- do.call(rbind, sapply(1:length(Zclist), function(i) data.frame(group = names(Zclist)[i], Zc = Zclist[[i]]), simplify = FALSE))
# Remove numeric component of labels (after \n)
Zcdat$group <- sapply(strsplit(Zcdat$group, "\n"), "[", 1)
# Convert 'group' column to factor to avoid warning from dunnTest()
Zcdat$group <- as.factor(Zcdat$group)
# One-way ANOVA and Tukey's Honest Significant Differences
# Adapted from https://statdoe.com/one-way-anova-and-box-plot-in-r/
anova <- aov(Zc ~ group, data = Zcdat)
tukey <- TukeyHSD(anova)
# Kruskal-Wallis followed by Dunn test
# Adapted from https://www.statology.org/dunns-test-in-r/
dunn <- dunnTest(Zc ~ group, data = Zcdat, method = "bonferroni")
# Put together table
comparison.tukey <- rownames(tukey$group)
comparison.dunn <- sapply(lapply(strsplit(comparison.tukey, "-"), rev), paste, collapse = " - ")
idunn <- match(comparison.dunn, dunn$res$Comparison)
data.frame(Comparison = comparison.dunn, p_Tukey = tukey$group[, "p adj"], p_Dunn = dunn$res$P.adj[idunn])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.