R/canH2O.R

Defines functions contplot canH2OS2 canH2OS1 canH2OT1 canH2OT2 canH2O4 canH2O3 canH2O2 canH2O1

Documented in canH2O1 canH2O2 canH2O3 canH2O4 canH2OS1 canH2OS2 canH2OT1 canH2OT2

# JMDplots/canH2O.R
# Make plots for the paper:
# Water as a reactant in the differential expression of proteins in cancer
# 20191126 jmd first version
# 20200203 started moving functions to JMDplots
# 20200404 include liver cancer

# Study overview 20191203
canH2O1 <- function(pdf = FALSE) {
  if(pdf) pdf("canH2O1.pdf", width = 7, height = 7)
  opar <- par(mar = c(0, 0, 0, 0))
  mat <- matrix(c(1, 1, 1, 1, 2, 1), nrow = 2, byrow = TRUE)
  layout(mat, widths = c(0.2, 1, 1), heights = c(1.5, 1))
  par(cex = 1)
  openplotmat(frame.plot = TRUE)
  # Three rows with eleven positions
  npos <- c(11, 11, 11)
  pos <- coordinates(npos)
  # Positions and x-sizes of the boxes
  p1 <- 2; r1 <- 0.08
  p2 <- 5; r2 <- 0.1
  p3 <- 9; r3 <- 0.15
  p4 <- 19; r4 <- 0.115
  p5 <- 16; r5 <- 0.16
  p6 <- 13; r6 <- 0.07
  p7 <- 28; r7 <- 0.09
  # Position for the overview plot
  pO <- p1 + 2*npos[1] 

  straightarrow(from = pos[p1, ], to = pos[p2, ], arr.pos = 0.56, endhead = TRUE)
  straightarrow(from = pos[p2, ], to = pos[p3, ], arr.pos = 0.53, endhead = TRUE)
  curvedarrow(from = pos[p3, ], to = pos[p4, ] + c(0, 0.08), curve = 0.5, arr.pos = 0.67, endhead = TRUE)
  curvedarrow(from = pos[p3, ] + c(0, -0.05), to = pos[p5, ] + c(0.08, 0.08), curve = 0.3, arr.pos = 0.725, endhead = TRUE)
  curvedarrow(from = pos[p4, ] + c(-0.06, -0.01), to = pos[pO, ] + c(0.27, 0.33), curve = -0.3, arr.pos = 0.40, endhead = TRUE)
  curvedarrow(from = pos[p6, ] + c(-0.07, 0), to = pos[pO, ] + c(0.01, 0.27), curve = 0.2, arr.pos = 0.75, endhead = TRUE)

  # Hard-coded colors (palette.colors is not available in R < 4.0.0) 20200415
  #cols <- palette.colors(8, "Set 2")
  cols <- c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3")
  cex <- 1.3
  ry <- 0.08
  textrect(pos[p1, ], r1, ry, lab = c("Papers", "SI Tables"), cex = cex, box.col = cols[1])
  textplain(pos[p1, ] + c(0, 0.12), lab = "Literature Search", font = 2)
  textrect(pos[p2, ], r2, ry, lab = c("UniProt IDs", "Up   Down", "...      ...   "), cex = cex, box.col = cols[2])
  textplain(pos[p2, ] + c(0, 0.12), lab = c("Differential", "Expression"), font = 2, height = 0.04)
  Zctext <- quote(italic(Z)[C]~"oxidation state")
  nH2Otext <- quote(italic(n)[H[2]*O]~"hydration state")
  comptext <- as.expression(c(Zctext, nH2Otext))
  textrect(pos[p3, ], r3, ry, lab = comptext, cex = cex, box.col = cols[2])
  textplain(pos[p3, ] + c(0, 0.12), lab = c("Compositional", "Analysis"), font = 2, height = 0.04)

  # Show numbers of datasets 
  cond1 <- c("hypoxia", "secreted", "osmotic_euk", "glucose", "3D")
  cond2 <- c("breast", "colorectal", "liver", "lung", "pancreatic", "prostate")
  vigout <- system.file("diffexpr", package = "JMDplots")
  conddat <- function(cond) read.csv(paste0(vigout, "/", cond, ".csv"), as.is = TRUE)
  culture <- lapply(cond1, conddat); names(culture) <- cond1
  cancer <- lapply(cond2, conddat); names(cancer) <- cond2
  total <- sum(sapply(culture, nrow), sapply(cancer, nrow))
  # calculate number of studies
  cultsets <- unlist(sapply(culture, "[[", "dataset"))
  cansets <- unlist(sapply(cancer, "[[", "dataset"))
  nstudies <- length(unique(sapply(strsplit(c(cultsets, cansets), "_"), "[", 1)))

  # Add "Cancer vs normal" box
  textrect(pos[p4, ] + c(0.02, 0.065), r4, ry + 0.015, lab = "", cex = cex, box.col = cols[4])
  calab <- paste(sapply(cancer, nrow), cond2)
  textplain(pos[p4, ] + c(-0.087, 0.065), lab = calab, height = 0.105, cex = cex, adj = c(0, 0.5))
  textplain(pos[p4, ] + c(0.02, 0.2), lab = c("Cancer", "vs normal"), font = 2, height = 0.04)
  # Add color legend 20200506
  #col2 <- palette.colors(8, "Classic Tableau")[c(7, 6, 2, 8, 5, 4)]
  col2 <- c("#E377C2", "#8C564B", "#FF7F0E", "#7F7F7F", "#9467BD", "#D62728")
  # function to get y-coordinates of text lines, extracted from diagram::textplain()
  ycoords <- function (mid, height = 0.1, lab = "", adj = c(0.5, 0.5), ...) {
    y <- numeric()
    if (length(lab) == 1) y <- mid[2]
    else {
      y1 <- mid[2] + height
      ddy <- 2 * height/(length(lab) + 1)
      for (i in 1:length(lab)) y <- c(y, y1 - ddy * i)
    }
    y
  }
  ycancer <- ycoords(pos[p4, ] + c(-0.08, 0.065), lab = calab, height = 0.105)
  xcancer <- rep(pos[p4, 1] + 0.117, length(ycancer))
  points(xcancer, ycancer, bg = col2, pch = 21, cex = 1.7)

  # Add "Cell culture" box
  textrect(pos[p5, ] + c(-0.011, 0.08), r5, ry, lab = "", cex = cex, box.col = cols[3])
  # sum osmotic (salt) and glucose datasets for hyperosmotic stress 20200411
  culab <- sapply(culture, nrow)
  culab["osmotic_euk"] <- culab["osmotic_euk"] + culab["glucose"]
  culab <- culab[-4]
  # Change "osmotic_euk" to "hyperosmotic" 20200414
  names(culab)[3] <- "hyperosmotic"
  # Write "secreted in hypoxia" and "3D vs 2D culture" 20200118
  culab <- paste(culab, names(culab))
  shlab <- c(paste(culab[2], "in"), "     hypoxia")
  textplain(pos[p5, ] + c(-0.163, 0.095), lab = shlab, height = 0.038, cex = cex, adj = c(0, 0.5))
  culab <- c(culab[1], "", "", culab[3:4])
  culab[5] <- paste(culab[5], "vs 2D culture")
  textplain(pos[p5, ] + c(-0.163, 0.08), lab = culab, height = 0.09, cex = cex, adj = c(0, 0.5))
  textplain(pos[p5, ] + c(-0.02, 0.2), lab = c("Cell", "culture"), font = 2, height = 0.04)
  # Add color legend 20200506
  #col1 <- palette.colors(9, "Okabe-Ito")[c(6, 4, 9, 3, 2)]
  col1 <- c(blue = "#0072B2", bluishgreen = "#009E73", gray = "#999999", skyblue = "#56B4E9", orange = "#E69F00")
  yculture <- ycoords(pos[p5, ] + c(-0.16, 0.08), lab = culab, height = 0.09)
  xculture <- rep(pos[p5, 1] + 0.13, length(yculture))
  # Point for salt-inducded hyperosmotic stress on same line as glucose
  points(xculture[-3], yculture[-3], bg = col1[-3], pch = 21, cex = 1.7)
  points(xculture[3] - 0.025, yculture[4], bg = col1[3], pch = 21, cex = 1.7)

  # Add dataset summary, pan-cancer, and findings text
  textplain(pos[21, ] + c(-0.02, 0.1), lab = c(paste("Total:", total), "datasets", paste("from", nstudies), "studies"),
            adj = c(0, 0.5), font = 3, height = 0.07, cex = 1.2)

  textrect(pos[p6, ] + c(0, 0.08), r6, ry, lab = c("GEPIA2", "(TCGA /", "GTEx)", ""), cex = cex, box.col = cols[7])
  textplain(pos[p6, ] + c(-0.025, 0.025), r6, ry, lab = "HPA", cex = cex)
  textplain(pos[p6, ] + c(0, 0.2), lab = c("Pan-cancer", "datasets"), font = 2, height = 0.04)

  textplain(pos[25, ] + c(0.105, 0.295), lab = "Main finding", font = 2)
  cantext1 <- "Most cancer types have higher"
  cantext2 <- "hydration state of proteins"
  cantext3 <- "compared to normal tissue."
  cantext <- c(cantext1, cantext2, cantext3)
  textplain(pos[25, ] + c(0.105, 0.24), lab = cantext, height = 0.05, font = 4)

  textplain(pos[27, ] + c(0.16, 0.07), lab = "Other findings", font = 2)
  hyptext1 <- quote(italic("Hypoxia experiments show no consistent"))
  hyptext2 <- quote(italic("difference in oxidation state of proteins."))
  hyptext <- as.expression(c(hyptext1, hyptext2))
  textplain(pos[26, ] + c(0.265, 0.005), lab = hyptext, height = 0.04)

  hydtext1 <- quote(italic("Most hyperosmotic stress and 3D vs 2D culture"))
  hydtext2 <- quote(italic("experiments yield lower hydration state of proteins."))
  hydtext <- as.expression(c(hydtext1, hydtext2))
  textplain(pos[26, ] + c(0.21, -0.08), lab = hydtext, height = 0.04)

  # Make overview plot
  plot.new()
  par(mar = c(2, 2, 0, 0), xaxs = "i", yaxs = "i")
  plot.window(c(-1, 1), c(-1, 1))
  axis(1, tck = 0, labels = FALSE)
  axis(2, tck = 0, labels = FALSE)
  mtext("oxidation state", 1, 0.5)
  mtext("hydration state", 2, 0.5)
  # draw arrows to mean values, muliplied by a constant to scale to (-1, 1) axis range
  for(i in 5:1) arrows(0, 0, 40*mean(culture[[i]]$Zc.diff), 40*mean(culture[[i]]$nH2O.diff), col = col1[i], lwd = 3, length = 0.15)
  for(i in 1:6) arrows(0, 0, 40*mean(cancer[[i]]$Zc.diff), 40*mean(cancer[[i]]$nH2O.diff), col = col2[i], lwd = 3, length = 0.15)

  # Restore these defaults to be able to re-run this script with expected results
  par(xaxs = "r", yaxs = "r")
  if(pdf) {
    dev.off()
    addexif("canH2O1", "Study overview", "Dick (2021)")
  }
}

# Median differences of nH2O and Zc for cell culture and cancer tissue 20191126
canH2O2 <- function(pdf = FALSE) {
  if(pdf) pdf("canH2O2.pdf", width = 8, height = 6)
  # Layout with spaces between groups of plots
  # Spaces
  p00 <- rep(0, 2); p0 <- rep(0, 15)
  # (A) cell culture
  p1 <- rep(1, 15); p2 <- rep(2, 15); p3 <- rep(3, 15); p4 <- rep(4, 15); p5 <- rep(5, 15)
  # (B) cancer
  p6 <- rep(6, 15); p7 <- rep(7, 15); p8 <- rep(8, 15)
  p9 <- rep(9, 15); p10 <- rep(10, 15); p11 <- rep(11, 15)
  # (C) credible regions
  p12 <- rep(12, 15); p13 <- rep(13, 15)
  # Assemble columns (each one is 48 units high)
  c1 <- c(0, p1, p00, p6, p9)
  c2 <- c(0, p2, p00, p7, p10)
  c3 <- c(0, p3, p00, p8, p11)
  c4 <- c(0, p4, p00, p0, p0)
  c5 <- c(0, p4, p00, p12, p13)
  c6 <- c(0, p5, p00, p12, p13)
  mat <- matrix(c(c1, c2, c3, c4, c5, c6), nrow = 48)
  layout(mat, widths = c(1, 1, 1, 0.5, 0.5, 1))
  par(mar = c(3.2, 3.3, 1.5, 1), mgp = c(2.2, 1, 0))

  # Read data for all conditions
  cond1 <- c("hypoxia", "secreted", "osmotic_euk", "glucose", "3D")
  cond2 <- c("breast", "colorectal", "liver", "lung", "pancreatic", "prostate")
  conds <- c(cond1, cond2)
  vigout <- system.file("diffexpr", package = "JMDplots")
  alldat <- lapply(conds, function(cond) {
    file <- paste0(vigout, "/", cond, ".csv")
    read.csv(file)
  })
  names(alldat) <- conds

  #col1 <- palette.colors(9, "Okabe-Ito")[c(6, 4, 9, 3, 2)]
  col1 <- c(blue = "#0072B2", bluishgreen = "#009E73", gray = "#999999", skyblue = "#56B4E9", orange = "#E69F00")
  #col2 <- palette.colors(8, "Classic Tableau")[c(7, 6, 2, 8, 5, 4)]
  col2 <- c("#E377C2", "#8C564B", "#FF7F0E", "#7F7F7F", "#9467BD", "#D62728")
  col <- c(col1, col2)

  # Make scatter plots for nH2O and Zc in cell culture and cancer types
  lapply(1:length(alldat), function(i) {
    thisone <- names(alldat)[i]
    if(thisone %in% cond1) {
      # Use open/filled symbols for cancer/non-cancer cells 20200103
      pch <- rep(21, nrow(alldat[[i]]))
      pch[grepl("cancer", alldat[[i]]$tags)] <- 19
      # use squares for yeast datasets 20200407
      pch[grepl("yeast", alldat[[i]]$tags)] <- 0
    } else {
      # Use filled/open symbols for human/[mouse or rat] cancer 20200415
      pch <- rep(19, nrow(alldat[[i]]))
      pch[grepl("mouse", alldat[[i]]$tags)] <- 21
      pch[grepl("rat", alldat[[i]]$tags)] <- 21
    }
    dpargs <- list(comptab = alldat[i], pch = pch, pt.text = NA, col = col[i], cex = 1, labtext = NA,
                   xlim = c(-0.06, 0.06), ylim = c(-0.07, 0.07), axes = FALSE, frame.plot = TRUE)
    do.call(diffplot, dpargs)
    labels <- at <- seq(-0.06, 0.06, 0.02)
    labels[c(2, 3, 5, 6)] <- NA
    axis(1, at, labels)
    axis(2, at, labels)
    main <- names(alldat)[i]
    substr(main, 1, 1) <- toupper(substr(main, 1, 1))
    if(thisone == "secreted") main <- "hypoxia"
    if(thisone == "osmotic_euk") main <- "(salt)"
    if(thisone == "glucose") main <- "(glucose)"
    if(thisone == "3D") main <- "3D / 2D"
    title(main, font.main = 1)
    if(thisone == "secreted") title("Secreted in", font.main = 1, line = 1.4, xpd = NA)
    if(thisone %in% c("osmotic_euk", "glucose")) title("Hyperosmotic", font.main = 1, line = 1.4, xpd = NA)
    if(thisone == "hypoxia") label.figure("A", cex = 2, font = 2, yfrac = 1)
    if(thisone == "breast") label.figure("B", cex = 2, font = 2, yfrac = 1)
  })
  
  # Compare density contours for cell culture and cancer types 20191126
  # CSV files generated by running the cancer and cell culture vignettes
  vigout <- system.file("diffexpr", package = "JMDplots")
  conddat <- function(cond) read.csv(paste0(vigout, "/", cond, ".csv"), as.is = TRUE)
  culture <- lapply(cond1, conddat); names(culture) <- cond1
  cancer <- lapply(cond2, conddat); names(cancer) <- cond2
  names <- names(culture)
  names[3] <- "salt"
  contplot(culture, "Cell culture", col1,
           dx = c(0.038, 0.030, -0.03, 0.0042, -0.026), dy = c(0.025, -0.054, 0.025, -0.036, -0.02), names = names)
  lines(c(0.0147, 0.024), c(0.0035, 0.016))
  lines(c(-0.0255, -0.0158), c(0.0143, 0.013))
  label.figure("C", cex = 2, font = 2, xfrac = 0.02, yfrac = 0.9)
  contplot(cancer, "Cancer tissue", col2,
           dx = c(-0.019, NA, 0.022, -0.022, 0.030, -0.025), dy = c(-0.028, NA, 0.027, 0.038, -0.035, 0.023))
  text(0.009, -0.021, "CRC", col = col2[2])

  if(pdf) {
    dev.off()
    addexif("canH2O2", "Median differences of protein length, nH2O and Zc for cell culture and cancer tissue", "Dick (2021)")
  }
}

# nH2O-Zc plots for TCGA and HPA datasets 20191126
canH2O3 <- function(pdf = FALSE) {
  vigout2 <- system.file("diffexpr", package = "JMDplots")
  HPA <- read.csv(file.path(vigout2, paste0("HPA.csv")), as.is = TRUE)
  TCGA <- read.csv(file.path(vigout2, paste0("TCGA.csv")), as.is = TRUE)
  TCGA_labels <- TCGA$description
  HPA_labels <- HPA$description
  HPA_labels <- sapply(strsplit(HPA_labels, " "), "[", 1)
  HPA_labels[grepl("head", HPA_labels)] <- "head and neck"

  # Get colors for six cancers in paper 20191208
  cond2 <- c("breast", "colorectal", "liver", "lung", "pancreatic", "prostate")
  #col2 <- palette.colors(8, "Classic Tableau")[c(7, 6, 2, 8, 5, 4)]
  col2 <- c("#E377C2", "#8C564B", "#FF7F0E", "#7F7F7F", "#9467BD", "#D62728")
  jHPA <- match(cond2, sapply(strsplit(HPA$description, " "), "[", 1))
  colHPA <- rep("darkslategray", nrow(HPA))
  colHPA[jHPA] <- col2
  shapeHPA <- rep(15, nrow(HPA))
  shapeHPA[jHPA] <- 19
  # Now do TCGA
  TCGAnames <- names(HTmap)[match(cond2, sapply(strsplit(HTmap, " "), "[", 1))]
  jTCGA <- match(TCGAnames, TCGA$description)
  colTCGA <- rep("darkslategray", nrow(TCGA))
  colTCGA[jTCGA] <- col2
  shapeTCGA <- rep(15, nrow(TCGA))
  shapeTCGA[jTCGA] <- 19

  # Calculate 2D density and 50% probability level 20200318
  densfun <- function(x, y) {
    n <- 200
    dens <- kde2d(x, y, n = n)
    # Find 50% probability level
    # https://stackoverflow.com/questions/16225530/contours-of-percentiles-on-level-plot
    # (snippet from emdbook::HPDregionplot from @benbolker)
    dx <- diff(dens$x[1:2])
    dy <- diff(dens$y[1:2])
    sz <- sort(dens$z)
    c1 <- cumsum(sz) * dx * dy
    probs <- 0.5
    levels <- sapply(probs, function(x) {
      approx(c1, sz, xout = 1 - x)$y
    })
    # Turn density into data frame for ggplot
    # Get x- and y- values
    xr <- range(x)
    xs <- seq(xr[1], xr[2], length.out = n)
    yr <- range(y)
    ys <- seq(yr[1], yr[2], length.out = n)
    dat <- expand.grid(xs, ys)
    colnames(dat) <- c("x", "y")
    dat <- cbind(dat, z = as.vector(dens$z))
    list(levels = levels, dat = dat)
  }

  TCGAstuff <- densfun(TCGA$Zc.diff, TCGA$nH2O.diff)
  TCGAdens <- TCGAstuff$dat
  TCGAlevels <- TCGAstuff$levels
  HPAstuff <- densfun(HPA$Zc.diff, HPA$nH2O.diff)
  HPAdens <- HPAstuff$dat
  HPAlevels <- HPAstuff$levels

  # Median differences of nH2O-Zc for HPA and TCGA datasets
  # Common elements for both plots
  pl1.common <- list(
    theme_bw(),
    xlab(quote(Delta*italic(Z)[C])),
    ylab(quote(Delta*italic(n)[H[2]*O])),
    geom_hline(yintercept = 0, linetype = 3, colour = "gray30"),
    geom_vline(xintercept = 0, linetype = 3, colour = "gray30"),
    theme(plot.tag = element_text(size = 20), plot.title = element_text(hjust = 0.5))
  )
  # Create plots
  nudge_x <- ifelse(TCGA_labels %in% c("SKCM"), 0.001, 0)
  # Workaround for "no visible binding for global variable ‘Zc.diff’" etc. in R CMD check 20200317
  Zc.diff <- nH2O.diff <- x <- y <- z <- NULL
  pl1 <- list(
    ggplot(TCGA, aes(Zc.diff, nH2O.diff, label = TCGA_labels)) + 
      pl1.common + geom_point(color = colTCGA, size = 1.5, shape = shapeTCGA, stroke = 1.5) + 
      geom_contour(data = TCGAdens, aes(x, y, z = z), breaks = TCGAlevels, inherit.aes = FALSE, color = "slateblue4", lty = 2) +
      ggrepel::geom_text_repel(size = 2.5, nudge_x = nudge_x, seed = 42, box.padding = 0.12, point.padding = 0.1) +
      ggtitle("TCGA / GTEx") + labs(tag = expression(bold(A))),
    ggplot(HPA, aes(Zc.diff, nH2O.diff, label = HPA_labels)) + 
      pl1.common + geom_point(color = colHPA, size = 1.5, shape = shapeHPA, stroke = 1.5) + 
      geom_contour(data = HPAdens, aes(x, y, z = z), breaks = HPAlevels, inherit.aes = FALSE, color = "darkslategray4", lty = 2) +
      ggrepel::geom_text_repel(size = 3, seed = 42) +
      ggtitle("HPA") + labs(tag = expression(bold(B)))
  )

  # Put together the figure
  mat <- matrix(c(1, 2), byrow = TRUE, nrow = 1)
  ml <- gridExtra::marrangeGrob(pl1, layout_matrix = mat, top = NULL)
  if(pdf) {
    ggsave("canH2O3.pdf", ml, width = 10, height = 4)
    addexif("canH2O3", "nH2O-Zc and plots for TCGA and HPA datasets", "Dick (2021)")
  } else ml
}

# Differentially expressed genes in aneuploid and osmotically shocked yeast cells 20200505
canH2O4 <- function(pdf = FALSE) {
  if(pdf) pdf("canH2O4.pdf", width = 5, height = 4)
  # Aneuploid yeast cells (Tsai et al., 2019)
  pdat <- pdat_aneuploidy("TNC+19")
  layout(matrix(c(1, 2, 4, 1, 3, 5), nrow = 3), heights = c(0.2, 1, 1))
  par(mar = c(0, 0, 0, 0), mgp = c(2.2, 0.8, 0))
  plot.new()
  uptxt <- paste("Proteins coded by", sum(pdat$up2), "up-regulated genes")
  dntxt <- paste("Proteins coded by", sum(!pdat$up2), "down-regulated genes")
  legend("center", c(dntxt, uptxt, ""), lty = c(1, 2, NA), col = c(1, 2, NA), bty = "n")
  par(mar = c(3.5, 3.5, 0.5, 0.5))
  qdist(pdat, "Zc")
  label.figure("A", xfrac = 0.04, yfrac = 1, font = 2, cex = 1.5)
  qdist(pdat, "nH2O")
  label.figure("B", xfrac = 0.04, yfrac = 1, font = 2, cex = 1.5)
  # Hyper- and hypo-osmotic experiments (Gasch et al., 2000)
  vigout2 <- system.file("diffexpr", package = "JMDplots")
  yeast_stress <- read.csv(file.path(vigout2, paste0("yeast_stress.csv")), as.is = TRUE)
  ihyper <- grepl("sorbitol", yeast_stress$dataset)
  ihypo <- grepl("Hypo", yeast_stress$dataset)
  # Plot Zc
  plot(c(1, 7), extendrange(yeast_stress$Zc.diff), xlab = "Time (minutes)", ylab = quote(Delta*italic(Z)[C]), xaxt = "n", type = "n")
  abline(h = 0, lty = 3, col = "gray30")
  axis(1, at = 1:7, labels = c(5, 15, 30, 45, 60, 90, 120))
  lines(1:7, yeast_stress$Zc.diff[ihyper], pch = 1, type = "b")
  lines(1:5, yeast_stress$Zc.diff[ihypo], pch = 0, type = "b")
  text(c(2.5, 2.7), c(-0.006, 0.0135), c("hyperosmotic", "hypoosmotic"))
  label.figure("C", xfrac = 0.04, yfrac = 1, font = 2, cex = 1.5)
  # Plot nH2O
  plot(c(1, 7), extendrange(yeast_stress$nH2O.diff), xlab = "Time (minutes)", ylab = quote(Delta*italic(n)[H[2]*O]), xaxt = "n", type = "n")
  abline(h = 0, lty = 3, col = "gray30")
  axis(1, at = 1:7, labels = c(5, 15, 30, 45, 60, 90, 120))
  lines(1:7, yeast_stress$nH2O.diff[ihyper], pch = 1, type = "b")
  lines(1:5, yeast_stress$nH2O.diff[ihypo], pch = 0, type = "b")
  text(c(2.2, 2.8), c(-0.036, 0.024), c("hyperosmotic", "hypoosmotic"))
  label.figure("D", xfrac = 0.04, yfrac = 1, font = 2, cex = 1.5)
  if(pdf) {
    dev.off()
    addexif("canH2O4", "Differentially expressed genes in aneuploid and osmotically shocked yeast cells", "Dick (2021)")
  }
}

###############
### TABLE 2 ###
###############

# Mean differences and p-values for all datasets in each condition 20200125
canH2OT2 <- function() {
  cond1 <- c("hypoxia", "secreted", "osmotic_euk", "glucose", "3D")
  cond2 <- c("breast", "colorectal", "liver", "lung", "pancreatic", "prostate")
  cond3 <- c("TCGA", "HPA")
  vigout <- system.file("diffexpr", package = "JMDplots")
  conddat <- function(cond) read.csv(paste0(vigout, "/", cond, ".csv"), as.is = TRUE)
  culture <- lapply(cond1, conddat); names(culture) <- cond1
  cancer <- lapply(cond2, conddat); names(cancer) <- cond2
  vigout2 <- system.file("diffexpr", package = "JMDplots")
  conddat <- function(cond) read.csv(paste0(vigout2, "/", cond, ".csv"))
  pancan <- lapply(cond3, conddat); names(pancan) <- cond3
  # Make data frames for mean differences and p-values
  # Include comparison of up- and down-regulated proteins for "Secreted" in hypoxia compared to "Hypoxia" (whole-cell)
  pdat <- mdat <- data.frame(condition = c(names(culture), names(cancer), names(pancan), "up", "down"))
  for(property in c("Zc", "nH2O")) {
    pvals <- mvals <- numeric()
    idn <- grep(paste0(property, ".down"), colnames(culture[[1]]))
    iup <- grep(paste0(property, ".up"), colnames(culture[[1]]))
    for(type in c("culture", "cancer", "pancan", "up", "down")) {
      if(type %in% c("up", "down")) {
        if(type=="up") idiff <- iup
        if(type=="down") idiff <- idn
        hypoxia <- na.omit(culture$hypoxia[, idiff])
        secreted <- na.omit(culture$secreted[, idiff])
        mvals <- c(mvals, mean(secreted) - mean(hypoxia))
        pvals <- c(pvals, t.test(hypoxia, secreted)$p.value)
      } else {
        thisdat <- get(type)
        for(i in 1:length(thisdat)) {
          dn <- thisdat[[i]][, idn]
          up <- thisdat[[i]][, iup]
          isNA <- is.na(dn) | is.na(up)
          dn <- dn[!isNA]
          up <- up[!isNA]
          mvals <- c(mvals, mean(up) - mean(dn))
          pvals <- c(pvals, t.test(dn, up, paired = TRUE)$p.value)
        }
      }
    }
    mcol <- data.frame(X = mvals)
    pcol <- data.frame(X = round(log10(pvals), 1))
    colnames(mcol)[1] <- property
    colnames(pcol)[1] <- property
    mdat <- cbind(mdat, mcol)
    pdat <- cbind(pdat, pcol)
  }
  # Format values and put p-values in parentheses (bold for values below 0.05 significance threshold)
  fmts <- c(NA, "%.3f", "%.3f")
  for(icol in 2:3) {
    for(irow in 1:nrow(mdat)) {
      pround <- round(pdat[irow, icol], 1)
      ptxt <- sprintf("%.1f", pdat[irow, icol])
      if(pround < -1.3) ptxt <- paste0("(**", ptxt, "**)")
      else ptxt <- paste0("(", ptxt, ")")
      mdat[irow, icol] <- paste(sprintf(fmts[icol], as.numeric(mdat[irow, icol])), ptxt)
    }
  }
  # Adjustments for pretty kable output
  rownames(mdat) <- mdat[, 1]
  rownames(mdat)[rownames(mdat)=="osmotic_euk"] <- "salt"
  mdat <- mdat[, -1]
  colnames(mdat) <- c("&Delta;*Z*~C~", "&Delta;*n*~H2O~")
  mdat
}

#############################
### SI TABLES AND FIGURES ###
#############################

# Stoichiometric matrix for amino acids with QEC basis species 20200104
canH2OT1 <- function() {
  basis("QEC")
  species(aminoacids(""))
  out <- species()[, c(9, 1:5)]
  # Adjustments for pretty kable output
  rownames(out) <- out[, 1]
  out <- out[, -1]
  colnames(out) <- gsub("([[:digit:]])", "~\\1~", colnames(out))
  out
}

# nO2-Zc and nH2O-Zc correlations using QEC basis species 20201016
canH2OS1 <- function(pdf = FALSE) {
  # set up figure
  if(pdf) pdf("canH2OS1.pdf", width = 7, height = 3.5)
  par(mfrow = c(1, 2))
  par(mar = c(3.5, 3.5, 1, 1))
  par(mgp = c(2.5, 0.7, 0))
  par(las = 1)
  par(cex.lab = 1.2)

  # Define axis labels
  nH2Olab <- expression(italic(n)*H[2]*O)
  nO2lab <- expression(italic(n)*O[2])
  Zclab <- expression(italic(Z)[C])

  # Function to plot values for amino acids
  aaplot <- function(x, y, xlab, ylab, legend.x, lmlim = c(-1, 1)) {
    plot(x, y, type = "p", pch = aminoacids(1), xlab = xlab, ylab = NA)
    mtext(ylab, side = 2, line = 2.4, las = 0, cex = 1.2)
    lmfun(x, y, legend.x, lmlim)
  }

  # Set up amino acid compositions (one amino acid per composition)
  AAcomp <- as.data.frame(diag(20))
  names(AAcomp) <- aminoacids(3)

  # Plot 1: nO2-Zc of amino acids
  aaplot(Zc(AAcomp), nO2(AAcomp, "QEC"), Zclab, nO2lab, "bottomright")
  label.figure("A", cex = 1.7, font = 2)

  # Plot 2: nH2O-Zc of amino acids
  aaplot(Zc(AAcomp), nH2O(AAcomp, "QEC"), Zclab, nH2Olab, "bottomright")
  label.figure("B", cex = 1.7, font = 2)

  if(pdf) {
    dev.off()
    addexif("canH2OS1", "nO2-Zc and nH2O-Zc correlations using QEC basis species", "Dick (2021)")
  }
}

# HPA-TCGA scatterplots for Zc and nH2O 20200127
canH2OS2 <- function(pdf = FALSE) {
  vigout2 <- system.file("diffexpr", package = "JMDplots")
  HPA <- read.csv(file.path(vigout2, paste0("HPA.csv")), as.is = TRUE)
  TCGA <- read.csv(file.path(vigout2, paste0("TCGA.csv")), as.is = TRUE)
  TCGA_labels <- TCGA$description
  HPA_labels <- HPA$description
  HPA_labels <- sapply(strsplit(HPA_labels, " "), "[", 1)
  HPA_labels[grepl("head", HPA_labels)] <- "head and neck"

  # Get colors for 6 cancers in paper 20191208
  cond2 <- c("breast", "colorectal", "liver", "lung", "pancreatic", "prostate")
  #col2 <- palette.colors(8, "Classic Tableau")[c(7, 6, 2, 8, 5, 4)]
  col2 <- c("#E377C2", "#8C564B", "#FF7F0E", "#7F7F7F", "#9467BD", "#D62728")
  jHPA <- match(cond2, sapply(strsplit(HPA$description, " "), "[", 1))
  colHPA <- rep("slateblue4", nrow(HPA))
  colHPA[jHPA] <- col2
  sizeHPA <- rep(1.5, nrow(HPA))
  sizeHPA[jHPA] <- 2
  shapeHPA <- rep(15, nrow(HPA))
  shapeHPA[jHPA] <- 19

  # HPA-TCGA mappings
  iHPA <- match(HTmap, HPA$description)
  iTCGA <- match(names(HTmap), TCGA$description)

  # HPA-TCGA scatterplots for Zc and nH2O
  Zc <- data.frame(TCGA = TCGA$Zc.diff[iTCGA], HPA = HPA$Zc.diff[iHPA])
  nH2O <- data.frame(TCGA = TCGA$nH2O.diff[iTCGA], HPA = HPA$nH2O.diff[iHPA])

  labels <- TCGA_labels[iTCGA]
  col <- "slateblue4"
  size <- 1.5
  shape <- 15

  r.squared.Zc <- format(summary(lm(HPA ~ TCGA, Zc))$r.squared, digits = 2)
  Zc.title <- paste0("italic(R)^2 == '", r.squared.Zc, "'")
  pl1 <- ggplot(Zc, aes(x = TCGA, y = HPA, label = labels)) +
    theme_classic() + geom_smooth(method = "lm") +
    annotate("text", -Inf, Inf, label = Zc.title, parse = TRUE, hjust = -0.2, vjust = 1.5) +
    xlab(quote(Delta*italic(Z)[C]*" (TCGA / GTEx)")) +
    ylab(quote(Delta*italic(Z)[C]*" (HPA)")) +
    geom_hline(yintercept = 0, linetype = 3, colour = "gray30") +
    geom_vline(xintercept = 0, linetype = 3, colour = "gray30") +
    geom_point(shape = shapeHPA, size = sizeHPA, col = colHPA, stroke = 1.5) +
    ggrepel::geom_text_repel(size = 3, seed = 42) +
    labs(tag = expression(bold(A))) +
    theme(plot.tag = element_text(size = 20), plot.title = element_text(hjust = 0.5),
          panel.border = element_rect(colour = "black", fill=NA))
  pl1 <- list(pl1)

  r.squared.nH2O <- format(summary(lm(HPA ~ TCGA, nH2O))$r.squared, digits = 2)
  nH2O.title <- paste0("italic(R)^2 == '", r.squared.nH2O, "'")
  pl2 <- ggplot(nH2O, aes(x = TCGA, y = HPA, label = labels)) +
    theme_classic() + geom_smooth(method = "lm") +
    annotate("text", Inf, -Inf, label = nH2O.title, parse = TRUE, hjust = 1.1, vjust = -0.6) +
    xlab(quote(Delta*italic(n)[H[2]*O]*" (TCGA / GTEx)")) +
    ylab(quote(Delta*italic(n)[H[2]*O]*" (HPA)")) +
    geom_hline(yintercept = 0, linetype = 3, colour = "gray30") +
    geom_vline(xintercept = 0, linetype = 3, colour = "gray30") +
    geom_point(shape = shapeHPA, size = sizeHPA, col = colHPA, stroke = 1.5) +
    ggrepel::geom_text_repel(size = 3, seed = 42) +
    labs(tag = expression(bold(B))) +
    theme(plot.tag = element_text(size = 20), plot.title = element_text(hjust = 0.5),
          panel.border = element_rect(colour = "black", fill=NA))
  pl2 <- list(pl2)

  # Put together the figure
  mat <- matrix(1:2, nrow = 1)
  ml <- gridExtra::marrangeGrob(c(pl1, pl2), layout_matrix = mat, top = NULL)
  if(pdf) {
    ggsave("canH2OS2.pdf", ml, width = 8, height = 4)
    addexif("canH2OS2", "HPA-TCGA scatterplots for Zc and nH2O", "Dick (2021)")
  }
  else ml
}

#########################
### UNEXPORTED OBJECT ###
#########################

# Mapping between HPA and TCGA names
HTmap <- c(
  BRCA = "breast cancer / breast",
  CESC = "cervical cancer / cervix, uterine",
  COAD = "colorectal cancer / colon",
  UCEC = "endometrial cancer / endometrium 1",
  GBM = "glioma / cerebral cortex",
  HNSC = "head and neck cancer / salivary gland",
  LIHC = "liver cancer / liver",
  LUAD = "lung cancer / lung",
  DLBC = "lymphoma / lymph node",
  SKCM = "melanoma / skin 1",
  OV = "ovarian cancer / ovary",
  PAAD = "pancreatic cancer / pancreas",
  PRAD = "prostate cancer / prostate",
  KIRC = "renal cancer / kidney",
  STAD = "stomach cancer / stomach 1",
  TGCT = "testis cancer / testis",
  THCA = "thyroid cancer / thyroid gland",
  BLCA = "urothelial cancer / urinary bladder"
)

############################
### UNEXPORTED FUNCTIONS ###
############################

# Function to plot 50 percentile contours / extracted from canH2O2 20191201
contplot <- function(dat, main, col, xvar = "Zc", yvar = "nH2O", dx = NULL, dy = NULL,
                     labtext = NULL, names = NULL) {
  xlab <- bquote(Delta * .(cplab[[strsplit(xvar, "_")[[1]][1]]]))
  ylab <- bquote(Delta * .(cplab[[strsplit(yvar, "_")[[1]][1]]]))
  if(!is.null(labtext)) {
    xlab <- substitute(xlab ~ "("*labtext*")", list(xlab = xlab, labtext = labtext))
    ylab <- substitute(ylab ~ "("*labtext*")", list(ylab = ylab, labtext = labtext))
  }
  plot(c(-0.06, 0.06), c(-0.06, 0.06), xlab = xlab, ylab = ylab, type = "n")
  abline(h = 0, v = 0, lty = 3, col = "grey30")
  if(is.null(dx)) dx <- rep(0, length(dat))
  if(is.null(dy)) dy <- rep(0, length(dat))
  if(is.null(names)) names <- names(dat)
  lapply(1:length(dat), function(i) {
    diffplot(dat[[i]], c(xvar, yvar), pch = NA, pt.text = NA, col.contour = col[i], add = TRUE)
    x <- mean(dat[[i]][, paste0(xvar, ".diff")], na.rm = TRUE)
    y <- mean(dat[[i]][, paste0(yvar, ".diff")], na.rm = TRUE)
    text(x + dx[i], y + dy[i], names[i], col = col[i])
  })
  title(main, font.main = 1)
}

# Function to plot linear model
# Extracted from canH2OS1 20200224
lmfun <- function(x, y, legend.x = NULL, lmlim = NULL, ...) {
  mylm <- lm(y ~ x)
  if(is.null(lmlim)) lmlim <- range(x)
  lines(lmlim, predict(mylm, data.frame(x = lmlim)), ...)
  # add R-squared text
  if(!is.null(legend.x)) {
    R2 <- format(round(summary(mylm)$r.squared, 2), nsmall = 2)
    R2txt <- substitute(bolditalic(R)^bold("2")*bold(" = "*R2), list(R2 = R2))
    legend(legend.x, legend = R2txt, bty = "n")
  }
  invisible(round(residuals(mylm), 3))
}
jedick/JMDplots documentation built on April 12, 2025, 1:35 p.m.