# JMDplots/diffexpr.R
# This file has functions, which were previously in the canprot package,
# for chemical analysis of differential expression data
# 20240426 Functions moved from canprot package
# Compile and view vignettes from command line
# 20200414 jmd
mkvig <- function(vig = NULL) {
vig.allowed <- gsub("pdat_", "", grep("pdat_", ls("package:JMDplots"), value = TRUE))
vig.allowed <- vig.allowed[!vig.allowed %in% c("aneuploidy", "fly")]
isnull <- is.null(vig)
toomany <- length(vig) > 1
notallowed <- !any(vig %in% vig.allowed)
if(isnull | toomany | notallowed) stop("'vig' should be one of: ", paste(vig.allowed, collapse = ", "))
# Location of the vignette directory
vigdir <- system.file("vignettes", package = "JMDplots")
# Names of the vignette source and html output files
vigfile <- file.path(vigdir, paste0(vig, ".Rmd"))
htmlfile <- tempfile(pattern = paste0(vig, "_"), fileext = ".html")
# Compile the vignette and open it in the browser
rmarkdown::render(vigfile, output_file = htmlfile, knit_root_dir = vigdir)
# Set 'browser' option so vignettes open under Rstudio 20201017
# https://stackoverflow.com/questions/62536479/the-command-exams2html-does-not-generate-html-page-when-it-is-run-from-rstudio
if(.Platform$OS.type == "windows") oldopt <- options(browser = NULL)
browseURL(htmlfile)
if(.Platform$OS.type == "windows") options(oldopt)
# Return the path of html file
htmlfile
}
# Function to identify known UniProt IDs
# 20160703 jmd
check_IDs <- function(dat, IDcol, aa_file = NULL, updates_file = NULL) {
# The input IDs that are NA
input.NA <- is.na(dat[, IDcol]) | dat[, IDcol] == ""
# The candidate IDs separated into a list
ID_list <- strsplit(dat[, IDcol], ";")
# The list of IDs as a vector
ID <- unlist(ID_list)
# Get the UniProt ID in case we have e.g. sp|P62308|RUXG_HUMAN
# for NJVS19 dataset 20191226
if(any(grepl("\\|", ID))) ID <- sapply(strsplit(ID, "\\|"), "[", 2)
# Human proteins
aa <- get("human.aa", canprot)
# Add amino acid compositions from external file if specified
if(!is.null(aa_file)) {
aa_dat <- read.csv(aa_file, as.is=TRUE)
aa <- rbind(aa_dat, aa)
}
# Assemble known IDs
knownIDs <- sapply(strsplit(aa$protein, "|", fixed = TRUE), "[", 2)
# If that is NA (i.e. no | separator is present) use the entire string
ina <- is.na(knownIDs)
knownIDs[ina] <- aa$protein[ina]
# Also include obsolete UniProt IDs
uniprot_updates <- system.file("extdata/diffexpr/uniprot_updates.csv", package = "JMDplots")
updates <- read.csv(uniprot_updates)
if(!is.null(updates_file)) {
updates_dat <- read.csv(updates_file, as.is = TRUE)
updates <- rbind(updates_dat, updates)
}
knownIDs <- c(knownIDs, updates$old)
# Check if the candidate IDs are known
known <- match(ID, knownIDs)
known_IDs <- ID[known > 0]
# Get the IDs back into list form
known_IDs <- relist(known_IDs, ID_list)
# Take the first (non-NA) match 20191119
ID <- sapply(sapply(known_IDs, na.omit), "[", 1)
# The output IDs that are NA
output.NA <- is.na(ID) | ID == ""
# Print which IDs became NA 20191127
if(sum(output.NA) > sum(input.NA)) {
new.NA <- output.NA & !input.NA
NA.IDs <- dat[new.NA, IDcol]
if(sum(new.NA)==1) IDtxt <- "ID:" else IDtxt <- "IDs:"
print(paste("check_IDs:", sum(new.NA), "unavailable UniProt", IDtxt, paste(NA.IDs, collapse = " ")))
}
# Now apply the updates 20191119
iold <- match(ID, updates$old)
if(any(!is.na(iold))) {
oldIDs <- updates$old[na.omit(iold)]
newIDs <- updates$new[na.omit(iold)]
if(sum(!is.na(iold))==1) IDtxt <- "ID:" else IDtxt <- "IDs:"
print(paste("check_IDs: updating", sum(!is.na(iold)), "old UniProt", IDtxt, paste(oldIDs, collapse = " ")))
ID[!is.na(iold)] <- newIDs
}
dat[, IDcol] <- ID
dat
}
# Function to clean up data (remove proteins with unavailable IDs,
# ambiguous expression ratios, and duplicated IDs)
# 20190407 jmd
cleanup <- function(dat, IDcol, up2 = NULL) {
# Utility function to remove selected entries and print a message 20160713
remove_entries <- function(dat, irm, description) {
if(sum(irm) > 0) {
dat <- dat[!irm, , drop = FALSE]
if(sum(irm)==1) ptxt <- "protein" else ptxt <- "proteins"
print(paste("cleanup: removing", sum(irm), description, ptxt))
}
return(dat)
}
if(is.logical(IDcol)) {
dat <- remove_entries(dat, IDcol, "selected")
} else {
# Add up2 column to dat
if(!is.null(up2)) dat <- cbind(dat, up2 = up2)
# Which column has the IDs
if(is.character(IDcol)) IDcol <- match(IDcol, colnames(dat))
# Drop NA or "" IDs
unav <- is.na(dat[, IDcol]) | dat[, IDcol] == ""
dat <- remove_entries(dat, unav, "unavailable")
if(!is.null(up2)) {
# Drop unquantified proteins 20191120
dat <- remove_entries(dat, is.na(dat$up2), "unquantified")
# Drop proteins with ambiguous expression ratios
up <- dat[dat$up2, IDcol]
down <- dat[!dat$up2, IDcol]
iambi <- dat[, IDcol] %in% intersect(up, down)
dat <- remove_entries(dat, iambi, "ambiguous")
}
# Drop duplicated proteins
dat <- remove_entries(dat, duplicated(dat[, IDcol]), "duplicated")
}
dat
}
# Merge old Zc_nH2O() and CNS() functions, and add volume 20170718
# CNS: elemental abundance (C, N, S) per residue 20170124
# Zc_nH2O: plot and summarize Zc and nH2O/residue of proteins 20160706
get_comptab <- function(pdat, var1="Zc", var2="nH2O", plot.it=FALSE, mfun="median", oldstyle = FALSE) {
# Define functions for the possible variables of interest
# Calculate metrics with canprot functions, not CHNOSZ 20201015
Zc <- function() canprot::Zc(pdat$pcomp$aa)
nH2O <- function() canprot::nH2O(pdat$pcomp$aa)
nO2 <- function() canprot::nO2(pdat$pcomp$aa)
# Calculate protein length 20201015
#AA3 <- CHNOSZ::aminoacids(3)
AA3 <- c("Ala", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", "Lys", "Leu",
"Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", "Val", "Trp", "Tyr")
# The columns for the amino acids
icol <- match(AA3, colnames(pdat$pcomp$aa))
nAA <- function() rowSums(pdat$pcomp$aa[, icol])
# Memoize the volumes so we don't depend on the CHNOSZ database 20200509
# Changed to canprot::V0 20240301
V0 <- function() canprot::V0(pdat$pcomp$aa)
# GRAVY and pI added 20191028
# canprot:: is used to access the functions in the package namespace, not the ones defined here
GRAVY <- function() canprot::GRAVY(pdat$pcomp$aa)
pI <- function() canprot::pI(pdat$pcomp$aa)
# MW (molecular weight) added 20200501
MW <- function() canprot::MW(pdat$pcomp$aa)
# Get the values of the variables using the functions
val1 <- get(var1)()
val2 <- get(var2)()
if(plot.it) {
# Set symbol shape and color
# Hollow red square for up, filled blue circle for down in cancer
col <- ifelse(pdat$up2, 2, 4)
pch <- ifelse(pdat$up2, 0, 19)
cex <- ifelse(pdat$up2, 1, 0.8)
# Shuffle the order of points to mitigate overplotting effects
i <- sample(1:length(val1))
plot(val1[i], val2[i], xlab=cplab[[var1]], ylab=cplab[[var2]], col=col[i], pch=pch[i], cex=cex[i])
title(pdat$description)
mtext(pdat$dataset, side=4, cex=0.85, las=0, adj=0, line=-0.1)
}
# Calculate and print sample size
val1_dn <- val1[!pdat$up2]
val1_up <- val1[pdat$up2]
val2_dn <- val2[!pdat$up2]
val2_up <- val2[pdat$up2]
message(paste0(pdat$dataset, " (", pdat$description ,"): n1 ", length(val1_dn), ", n2 ", length(val1_up)))
# Calculate difference of means/medians, CLES, p-value
mfun1_dn <- get(mfun)(val1_dn)
mfun1_up <- get(mfun)(val1_up)
mfun2_dn <- get(mfun)(val2_dn)
mfun2_up <- get(mfun)(val2_up)
val1.diff <- mfun1_up - mfun1_dn
val2.diff <- mfun2_up - mfun2_dn
out <- data.frame(dataset=pdat$dataset, description=pdat$description,
n1=length(val1_dn), n2=length(val1_up),
val1.median1=mfun1_dn, val1.median2=mfun1_up, val1.diff,
val2.median1=mfun2_dn, val2.median2=mfun2_up, val2.diff, stringsAsFactors=FALSE)
if(oldstyle) {
val1.CLES <- 100*CLES(val1_dn, val1_up, distribution = NA)
val2.CLES <- 100*CLES(val2_dn, val2_up, distribution = NA)
val1.p.value <- val2.p.value <- NA
if(!any(is.na(val1_dn)) & !any(is.na(val1_up))) val1.p.value <- stats::wilcox.test(val1_dn, val1_up)$p.value
if(!any(is.na(val2_dn)) & !any(is.na(val2_up))) val2.p.value <- stats::wilcox.test(val2_dn, val2_up)$p.value
# print summary messages
nchar1 <- nchar(var1)
nchar2 <- nchar(var2)
start1 <- paste0(var1, substr(" MD ", nchar1, 10))
start2 <- paste0(var2, substr(" MD ", nchar2, 10))
message(paste0(start1, format(round(val1.diff, 3), nsmall=3, width=6),
", CLES ", round(val1.CLES), "%",
", p-value ", format(round(val1.p.value, 3), nsmall=3)))
message(paste0(start2, format(round(val2.diff, 3), nsmall=3, width=6),
", CLES ", round(val2.CLES), "%",
", p-value ", format(round(val2.p.value, 3), nsmall=3), "\n"))
out <- data.frame(dataset=pdat$dataset, description=pdat$description,
n1=length(val1_dn), n2=length(val1_up),
val1.median1=mfun1_dn, val1.median2=mfun1_up, val1.diff, val1.CLES, val1.p.value,
val2.median1=mfun2_dn, val2.median2=mfun2_up, val2.diff, val2.CLES, val2.p.value, stringsAsFactors=FALSE)
}
# Convert colnames to use names of variables
colnames(out) <- gsub("val1", var1, colnames(out))
colnames(out) <- gsub("val2", var2, colnames(out))
# Convert colnames
if(mfun == "mean") colnames(out) <- gsub("median", "mean", colnames(out))
return(invisible(out))
}
# Plot mean or median differences of Zc and nH2O, or other variables
# 20160715 jmd
# 20190329 add oldstyle = FALSE (no drop lines; show kernel density)
diffplot <- function(comptab, vars=c("Zc", "nH2O"), col="black", plot.rect=FALSE, pt.text=c(letters, LETTERS),
cex.text = 0.85, oldstyle = FALSE, pch = 1, cex = 2.1, contour = TRUE, col.contour = par("fg"),
probs = 0.5, add = FALSE, labtext = NULL, ...) {
# Convert to data frame if needed
if(!is.data.frame(comptab)) comptab <- do.call(rbind, comptab)
# Which columns we're using
stats <- c("diff", "CLES", "p.value")
iX <- unlist(sapply(paste(vars[1], stats, sep=".*"), grep, colnames(comptab)))
iY <- unlist(sapply(paste(vars[2], stats, sep=".*"), grep, colnames(comptab)))
# Get mean/median difference, common language effect size and p-value
X_d <- comptab[, iX[1]]
Y_d <- comptab[, iY[1]]
# Figure out axis labels
if(oldstyle) {
cplabbar <- list(
Zc = quote(italic(Z)[C]),
DZc = quote(Delta*italic(Z)[C]),
# "oldstyle" labels including overbar
nH2O = quote(bar(italic(n))*H[2]*O),
DnH2O = quote(Delta*bar(italic(n))*H[2]*O)
)
# Only use part before underscore 20191207
Dx <- paste0("D", strsplit(vars[1], "_")[[1]][1])
Dy <- paste0("D", strsplit(vars[2], "_")[[1]][1])
xvar <- cplabbar[[Dx]]
yvar <- cplabbar[[Dy]]
# For oldstyle plots, also get common language effect size and p-value
X_e <- signif(comptab[, iX[2]], 2)
X_p <- comptab[, iX[3]]
Y_e <- signif(comptab[, iY[2]], 2)
Y_p <- comptab[, iY[3]]
} else {
xvar <- bquote(Delta * .(cplab[[strsplit(vars[1], "_")[[1]][1]]]))
yvar <- bquote(Delta * .(cplab[[strsplit(vars[2], "_")[[1]][1]]]))
}
# Use colnames to figure out whether the difference is of the mean or median
if(is.null(labtext)) {
# Treat the x- and y-variables separately in case one is median and one is mean 20191127
xfun <- gsub("1", "", strsplit(grep(vars[1], colnames(comptab), value = TRUE)[1], "\\.")[[1]][2])
yfun <- gsub("1", "", strsplit(grep(vars[2], colnames(comptab), value = TRUE)[1], "\\.")[[1]][2])
xparen <- paste0("(", xfun, " difference)")
yparen <- paste0("(", yfun, " difference)")
} else {
labtext <- rep(labtext, length.out = 2)
if(is.list(labtext)) lt1 <- labtext[[1]] else lt1 <- labtext[1]
if(is.list(labtext)) lt2 <- labtext[[2]] else lt2 <- labtext[2]
xparen <- bquote("("*.(lt1)*")")
yparen <- bquote("("*.(lt2)*")")
}
if(identical(labtext[1], NA)) xlab <- xvar else xlab <- bquote(.(xvar) ~ .(xparen))
if(identical(labtext[2], NA)) ylab <- yvar else ylab <- bquote(.(yvar) ~ .(yparen))
# Initialize plot: add a 0 to make sure we can see the axis
# Prevent NA values from influencing the scale of the plot 20200103
ina <- is.na(X_d) | is.na(Y_d)
if(!add) plot(type="n", c(X_d[!ina], 0), c(Y_d[!ina], 0), xlab=xlab, ylab=ylab, ...)
# Add a reference rectangle
if(plot.rect) rect(-0.01, -0.01, 0.01, 0.01, col="grey80", lwd=0)
# show axis lines
abline(h=0, lty=3, col = "gray30")
abline(v=0, lty=3, col = "gray30")
if(oldstyle) {
# Show drop lines: dotted/solid if p-value/effect size meet criteria
lty.X <- ifelse(abs(X_e - 50) >= 10, 1, ifelse(X_p < 0.05, 2, 0))
lty.Y <- ifelse(abs(Y_e - 50) >= 10, 1, ifelse(Y_p < 0.05, 2, 0))
for(i in seq_along(X_d)) {
lines(rep(X_d[i], 2), c(0, Y_d[i]), lty=lty.X[i])
lines(c(0, X_d[i]), rep(Y_d[i], 2), lty=lty.Y[i])
}
# Point symbols: open circle, filled circle, filled square (0, 1 or 2 vars with p-value < 0.05)
p_signif <- rowSums(data.frame(X_p < 0.05, Y_p < 0.05))
pch <- ifelse(p_signif==2, 15, ifelse(p_signif==1, 19, 21))
}
# Plot points with specified color (and point style, only for oldstyle = FALSE)
col <- rep(col, length.out=nrow(comptab))
pch <- rep(pch, length.out=nrow(comptab))
points(X_d, Y_d, pch=pch, col=col, bg="white", cex=cex)
if(!identical(pt.text, NA) | !identical(pt.text, FALSE)) {
# Add letters; for dark background, use white letters
col[pch %in% c(15, 19)] <- "white"
text(X_d, Y_d, pt.text[seq_along(X_d)], col=col, cex=cex.text)
}
# Contour 2-D kernel density estimate 20190329
# https://stats.stackexchange.com/questions/31726/scatterplot-with-contour-heat-overlay
if(!oldstyle & any(contour)) {
# Include points specified by 'contour' 20191102
Xcont <- X_d[contour]
Ycont <- Y_d[contour]
# Remove NA points 20191127
iNA <- is.na(Xcont) | is.na(Ycont)
Xcont <- Xcont[!iNA]
Ycont <- Ycont[!iNA]
if(length(Xcont) > 0) {
dens <- kde2d(Xcont, Ycont, n = 200)
# Add contour around 50% of points (or other fractions specified by 'probs') 20191126
# https://stackoverflow.com/questions/16225530/contours-of-percentiles-on-level-plot
# (snippet from emdbook::HPDregionplot from @benbolker)
dx <- diff(dens$x[1:2])
dy <- diff(dens$y[1:2])
sz <- sort(dens$z)
c1 <- cumsum(sz) * dx * dy
levels <- sapply(probs, function(x) {
approx(c1, sz, xout = 1 - x)$y
})
# Use lty = 2 and lwd = 1 if points are being plotted, or lty = 1 and lwd = 2 otherwise 20191126
if(identical(pch[1], NA)) lty <- 1 else lty <- 2
if(identical(pch[1], NA)) lwd <- 2 else lwd <- 1
# Don't try to plot contours for NA levels 20191207
if(!any(is.na(levels))) contour(dens, drawlabels = FALSE, levels = levels, lty = lty, lwd = lwd, add = TRUE, col = col.contour)
}
}
}
# Quantile distribution for up- and down-regulated proteins
# First version 20200428
# Use ecdf() to calculate knots 20200506
qdist <- function(pdat, vars = c("Zc", "nH2O"), show.steps = FALSE) {
# Initialize plot
if(length(vars)==2) par(mfrow = c(2, 1))
par(yaxs = "i")
for(var in vars) {
if(var=="Zc") {
X <- Zc(pdat$pcomp$aa)
xlab <- cplab$Zc
}
if(var=="nH2O") {
X <- nH2O(pdat$pcomp$aa)
xlab <- cplab$nH2O
}
up <- X[pdat$up2]
dn <- X[!pdat$up2]
# Start the plot
plot(range(up, dn), c(0, 1), type = "n", xlab = xlab, ylab = "Quantile point")
# A function to plot the points and lines
pfun <- function(x, ...) {
Fn <- ecdf(x)
# Plot the values (knots) and verticals
if(show.steps) plot(Fn, add = TRUE, cex = 0.25, col.01line = NA, verticals = TRUE, col.hor = "gray70", ...)
# Add lines that bisect the verticals (to intersect the quantile points)
x <- knots(Fn)
y <- sort(Fn(x)) - 0.5 / length(x)
lines(x, y, ...)
}
pfun(dn)
pfun(up, col = 2, lty = 2)
# Draw a line at the 0.5 quantile
lines(c(median(dn), median(up)), c(0.5, 0.5), lwd = 2, col = "slategray3")
}
}
# Format the summary table using xtable
# 20160709 jmd
xsummary <- function(comptab, vars=c("Zc", "nH2O")) {
# Convert to data frame if needed
if(!is.data.frame(comptab)) comptab <- do.call(rbind, comptab)
# Create letter labels
rownames(comptab) <- c(letters, LETTERS)[1:nrow(comptab)]
# Return this data frame when the function exits
comptab.out <- comptab
# Get the publication key from the dataset name
publication <- sapply(strsplit(comptab$dataset, "_"), "[", 1)
# Format the publication key (monospaced font)
publication <- paste0("<code>", publication, "</code>")
# Combine the publication and description
comptab$description <- paste0(publication, " (", comptab$description, ")")
# Datasets have letter labels
comptab$dataset <- c(letters, LETTERS)[1:nrow(comptab)]
# To save column width, change "dataset" to "set"
colnames(comptab)[1] <- "set"
# Select the columns to print
comptab <- comptab[, c(1:4, 7:9, 12:14)]
# Round values in some columns
comptab[, c(5, 8)] <- round(comptab[, c(5, 8)], 3) # mean difference
comptab[, c(6, 9)] <- signif(comptab[, c(6, 9)], 2) # CLES
# Place a marker around high effect size and low p-value
for(k in vars) {
# Effect size
jes <- paste0(k, ".CLES")
ihigh <- abs(comptab[, jes] - 50) >= 10
comptab[ihigh, jes] <- paste("**", comptab[ihigh, jes], "**")
# p-value
jpv <- paste0(k, ".p.value")
ilow <- comptab[, jpv] < 0.05
comptab[, jpv] <- format(comptab[, jpv], digits=1)
comptab[ilow, jpv] <- paste("**", comptab[ilow, jpv], "**")
# Bold mean difference if both effect size and p-value are highlighted
jmd <- paste0(k, ".diff")
comptab[, jmd] <- format(comptab[, jmd])
comptab[ihigh & ilow, jmd] <- paste("**", comptab[ihigh & ilow, jmd], "**")
# Underline mean difference if only p-value or only effect size is highlighted
comptab[xor(ilow, ihigh), jmd] <- paste("++", comptab[xor(ilow, ihigh), jmd], "++")
}
# Create xtable
x <- xtable::xtable(comptab, align=c("c", "l", "l", rep("r", 8)))
x <- capture.output(xtable::print.xtable(x, type="html",
include.rownames=FALSE,
math.style.exponents=TRUE,
sanitize.text.function=function(x){x}))
# Bold the indicated effect size, p-values and mean differences
x <- gsub("> ** ", "> <B>", x, fixed=TRUE)
x <- gsub(" ** <", "</B> <", x, fixed=TRUE)
# Underline the indicated mean differences
x <- gsub("> ++ ", "> <U>", x, fixed=TRUE)
x <- gsub(" ++ <", "</U> <", x, fixed=TRUE)
# Add headers that span multiple columns
span_empty <- "<th colspan=\"4\"></th>"
span_Zc <- "<th colspan=\"3\"><i>Z</i><sub>C</sub></th>"
span_nH2O <- "<th colspan=\"3\"><i>n</i><sub>H<sub>2</sub>O</sub></sub></th>"
span_nO2 <- "<th colspan=\"3\"><i>n</i><sub>O<sub>2</sub></sub></sub></th>"
span_nAA <- "<th colspan=\"3\"><i>n</i><sub>AA</sub></th>"
span_var1 <- get(paste0("span_", vars[1]))
span_var2 <- get(paste0("span_", vars[2]))
x <- gsub("<table border=1>",
paste("<table border=1> <tr>", span_empty, span_var1, span_var2, "</tr>"),
x, fixed=TRUE)
# More formatting of the headers
x <- gsub("description", "reference (description)", x, fixed=TRUE)
x <- gsub("n1", "<i>n</i><sub>1</sub>", x, fixed=TRUE)
x <- gsub("n2", "<i>n</i><sub>2</sub>", x, fixed=TRUE)
x <- gsub(paste0(vars[1], ".diff"), "MD", x, fixed=TRUE)
x <- gsub(paste0(vars[2], ".diff"), "MD", x, fixed=TRUE)
x <- gsub(paste0(vars[1], ".CLES"), "ES", x, fixed=TRUE)
x <- gsub(paste0(vars[2], ".CLES"), "ES", x, fixed=TRUE)
x <- gsub(paste0(vars[1], ".p.value"), "<i>p</i>-value", x, fixed=TRUE)
x <- gsub(paste0(vars[2], ".p.value"), "<i>p</i>-value", x, fixed=TRUE)
# Take out extraneous spaces (triggers pre-formatted text - in markdown?)
x <- gsub(" ", " ", x, fixed=TRUE)
# Done!
write.table(x, "", row.names=FALSE, col.names=FALSE, quote=FALSE)
return(invisible(comptab.out))
}
# Make table for new vignettes
# 20191206
xsummary2 <- function(comptab1, comptab2) {
ct1 <- do.call(rbind, comptab1)
ct2 <- do.call(rbind, comptab2)
# Get all data
# Include medians or means for up and down groups for making summary .csv files 20200125
out <- data.frame(
dataset = ct1$dataset,
description = ct2$description,
n1 = ct1$n1,
n2 = ct1$n2,
Zc.down = ct1$Zc.median1,
Zc.up = ct1$Zc.median2,
Zc.diff = ct1$Zc.diff,
nH2O.down = ct1$nH2O.median1,
nH2O.up = ct1$nH2O.median2,
nH2O.diff = ct1$nH2O.diff,
nAA.down = ct2$nAA.median1,
nAA.up = ct2$nAA.median2,
nAA.diff = ct2$nAA.diff,
MW.down = ct2$MW.median1,
MW.up = ct2$MW.median2,
MW.diff = ct2$MW.diff,
stringsAsFactors = FALSE
)
# Prepare table
x <- out[, c("dataset", "description", "n1", "n2", "Zc.diff", "nH2O.diff",
"nAA.diff", "MW.diff")]
# Get the publication key from the dataset name
publication <- sapply(strsplit(x$dataset, "_"), "[", 1)
# Format the publication key (monospaced font)
publication <- paste0("<code>", publication, "</code>")
# Italicize species names (first words in description, surrounded by underscore)
x$description <- gsub("^_", "<i>", x$description)
x$description <- gsub("_", "</i>", x$description)
# Combine the publication and description
x$description <- paste0(publication, " (", x$description, ")")
# Datasets have letter labels
x$dataset <- c(letters, LETTERS)[1:nrow(x)]
# To save column width, change "dataset" to "set"
colnames(x)[1] <- "set"
# Multiply values of Zc and nH2O by 1000
x[, 5:6] <- x[, 5:6] * 1000
# Multiply values of MW by 100
x[, 8] <- x[, 8] * 100
# Round values
x[, 5:8] <- round(x[, 5:8])
# Put markers around negative values
for(icol in 5:8) {
ineg <- x[, icol] < 0
ineg[is.na(ineg)] <- FALSE
x[ineg, icol] <- paste("**", x[ineg, icol], "**")
}
# Create xtable
x <- xtable::xtable(x, align=c("c", "l", "l", rep("r", ncol(x) - 2)))
x <- capture.output(xtable::print.xtable(x, type="html",
include.rownames=FALSE,
math.style.exponents=TRUE,
sanitize.text.function=function(x){x}))
# Make the marked negative values bold
x <- gsub("> ** ", "> <B>", x, fixed=TRUE)
x <- gsub(" ** <", "</B> <", x, fixed=TRUE)
# Change "NaN" to "NA"
x <- gsub("NaN", "NA", x, fixed=TRUE)
# Add headers that span multiple columns
span_empty6 <- "<td align=\"center\" colspan=\"6\"></td>"
span_empty2 <- "<td align=\"center\" colspan=\"2\"></td>"
border <- paste("<table border=1> <tr>", span_empty6, span_empty2, "</tr>")
x <- gsub("<table border=1>", border, x, fixed=TRUE)
# More formatting of the headers
x <- gsub("description", "reference (description)", x, fixed=TRUE)
x <- gsub("n1", "<i>n</i><sub>down</sub>", x, fixed=TRUE)
x <- gsub("n2", "<i>n</i><sub>up</sub>", x, fixed=TRUE)
x <- gsub("Zc.diff", "Δ<i>Z</i><sub>C</sub>", x, fixed=TRUE)
x <- gsub("nH2O.diff", "Δ<i>n</i><sub>H<sub>2</sub>O</sub>", x, fixed=TRUE)
x <- gsub("nAA.diff", "Δ<i>n</i><sub>AA</sub>", x, fixed=TRUE)
x <- gsub("MW.diff", "ΔMW", x, fixed=TRUE)
# Done!
write.table(x, "", row.names=FALSE, col.names=FALSE, quote=FALSE)
return(invisible(out))
}
# Make table for new vignettes (with GRAVY and pI)
# 20200418
xsummary3 <- function(comptab1, comptab2, comptab3) {
ct1 <- do.call(rbind, comptab1)
ct2 <- do.call(rbind, comptab2)
ct3 <- do.call(rbind, comptab3)
# Get all data
# Include medians or means for up and down groups for making summary .csv files 20200125
out <- data.frame(
dataset = ct1$dataset,
description = ct2$description,
n1 = ct1$n1,
n2 = ct1$n2,
Zc.down = ct1$Zc.median1,
Zc.up = ct1$Zc.median2,
Zc.diff = ct1$Zc.diff,
nH2O.down = ct1$nH2O.median1,
nH2O.up = ct1$nH2O.median2,
nH2O.diff = ct1$nH2O.diff,
pI.down = ct2$pI.median1,
pI.up = ct2$pI.median2,
pI.diff = ct2$pI.diff,
GRAVY.down = ct2$GRAVY.median1,
GRAVY.up = ct2$GRAVY.median2,
GRAVY.diff = ct2$GRAVY.diff,
nAA.down = ct3$nAA.median1,
nAA.up = ct3$nAA.median2,
nAA.diff = ct3$nAA.diff,
MW.down = ct3$MW.median1,
MW.up = ct3$MW.median2,
MW.diff = ct3$MW.diff,
stringsAsFactors = FALSE
)
# Prepare table
x <- out[, c("dataset", "description", "n1", "n2", "Zc.diff", "nH2O.diff",
"pI.diff", "GRAVY.diff", "nAA.diff", "MW.diff")]
# Get the publication key from the dataset name
publication <- sapply(strsplit(x$dataset, "_"), "[", 1)
# Format the publication key (monospaced font)
publication <- paste0("<code>", publication, "</code>")
# Italicize species names (first words in description, surrounded by underscore)
x$description <- gsub("^_", "<i>", x$description)
x$description <- gsub("_", "</i>", x$description)
# Combine the publication and description
x$description <- paste0(publication, " (", x$description, ")")
# Datasets have letter labels
x$dataset <- c(letters, LETTERS)[1:nrow(x)]
# To save column width, change "dataset" to "set"
colnames(x)[1] <- "set"
# Multiply values of Zc, nH2O and GRAVY by 1000
x[, c(5:6, 8)] <- x[, c(5:6, 8)] * 1000
# Multiply values of pI and MW by 100
x[, c(7, 10)] <- x[, c(7, 10)] * 100
# Round values
x[, 5:10] <- round(x[, 5:10])
# Put markers around negative values
for(icol in 5:10) {
ineg <- x[, icol] < 0
ineg[is.na(ineg)] <- FALSE
x[ineg, icol] <- paste("**", x[ineg, icol], "**")
}
# Create xtable
x <- xtable::xtable(x, align=c("c", "l", "l", rep("r", ncol(x) - 2)))
x <- capture.output(xtable::print.xtable(x, type="html",
include.rownames=FALSE,
math.style.exponents=TRUE,
sanitize.text.function=function(x){x}))
# Make the marked negative values bold
x <- gsub("> ** ", "> <B>", x, fixed=TRUE)
x <- gsub(" ** <", "</B> <", x, fixed=TRUE)
# Change "NaN" to "NA"
x <- gsub("NaN", "NA", x, fixed=TRUE)
# More formatting of the headers
x <- gsub("description", "reference (description)", x, fixed=TRUE)
x <- gsub("n1", "<i>n</i><sub>down</sub>", x, fixed=TRUE)
x <- gsub("n2", "<i>n</i><sub>up</sub>", x, fixed=TRUE)
x <- gsub("Zc.diff", "Δ<i>Z</i><sub>C</sub>", x, fixed=TRUE)
x <- gsub("nH2O.diff", "Δ<i>n</i><sub>H<sub>2</sub>O</sub>", x, fixed=TRUE)
x <- gsub("pI.diff", "ΔpI", x, fixed=TRUE)
x <- gsub("GRAVY.diff", "ΔGRAVY", x, fixed=TRUE)
x <- gsub("nAA.diff", "Δ<i>n</i><sub>AA</sub>", x, fixed=TRUE)
x <- gsub("MW.diff", "ΔMW", x, fixed=TRUE)
# Done!
write.table(x, "", row.names=FALSE, col.names=FALSE, quote=FALSE)
return(invisible(out))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.