JMDplots: Plots from Papers by Jeffrey M. Dick

Documented in pdat_osmotic_gene

# JMDplots/pdat_osmotic_gene.R
# retrieve protein IDs for differentially expressed genes in osmotic stress
# 20200418 jmd

pdat_osmotic_gene <- function(dataset = 2020) {
  if(identical(dataset, 2020)) {
    return(c(
             "KSA+02_NaCl", "KSA+02_sorbitol", "WJ02",
             "HZP+05_HSS", "HZP+05_HOS", "LGW+05", "SLA+05",
             "GCP08_30", "GCP08_43",
             "SBB+09_NaCl", "SBB+09_Sucrose",
             "HMO+10_transcriptomics",
             # don't include yeast 20200424
             #"LTH+11_RNA_30", "LTH+11_RNA_60", "LTH+11_RNA_90", "LTH+11_RNA_120", "LTH+11_RNA_240",
             "BBWB12_37_2.5", "BBWB12_37_5", "BBWB12_37_10", "BBWB12_37_20", #"BBWB12_37_exp", 29 up, 6 down
             "LB12_NaCl",
             "QHT+13_Gene.24.h", "QHT+13_Gene.48.h", "QHT+13_Gene.72.h",
             "WGB+13_N", "WGB+13_U",
             "ADW+14_Gene", "KKG+14_Gene_30min", "KKG+14_Gene_80min", "KKG+14_Gene_310min",
             "KSM+14_NaCl", "KSM+14_GB",
             "MGM+14_3.5", "MGM+14_4.5", "MGM+14_5", "MGM+14_5.5",
             "SLM+14_5", "SLM+14_30", "SLM+14_60",
             "FRH+15_NaCl_1h", "FRH+15_NaCl_6h", "FRH+15_NaCl_24h",
             "FRH+15_KCl_1h", "FRH+15_KCl_6h", "FRH+15_KCl_24h",
             "FRH+15_glycerol_1h", "FRH+15_glycerol_6h", "FRH+15_glycerol_24h",
             "KLB+15_trans-NaCl", "KLB+15_trans-suc",
             "HLL17_45min", "HLL17_14h",
             "MWZ+18"
             ))
  }
  # remove tags
  dataset <- strsplit(dataset, "=")[[1]][1]
  # get study and stage/condition
  study <- strsplit(dataset, "_")[[1]][1]
  stage <- paste(strsplit(dataset, "_")[[1]][-1], collapse="_")
  extdatadir <- system.file("extdata", package="JMDplots")
  datadir <- paste0(extdatadir, "/diffexpr/osmotic_gene/")
  if(study %in% c("KLB+15", "KKG+14", "ADW+14", "QHT+13")) {
    return(pdat_osmotic_bact(dataset))
  } else if(study=="HMO+10") {
    # 20191102 Bacillus subtilis, Hahne et al., 2010
    # HMO+10_prot-cytosol, HMO+10_prot-membrane, HMO+10_transcriptomics
    dat <- read.csv(paste0(datadir, "HMO+10.csv.xz"), as.is=TRUE)
    #description <- paste("Bacillus subtilis", stage)
    description <- "_Bacillus subtilis_ in 6% w/v NaCl vs control"
    # use selected dataset
    dat <- dat[dat$Experiment==stage, ]
    # count up- or down-expression at each time point
    diff <- apply(sign(dat[, 4:7]), 1, sum)
    dat <- cbind(dat, diff = diff)
    # keep proteins that have same expression in at least 3 out of 4 time points
    dat <- dat[abs(dat$diff) >= 2, ]
    up2 <- dat$diff > 0
    # update IDs / remove duplicated IDs
    aa_file <- file.path(extdatadir, "aa/bacteria/HMO+10_aa.csv.xz")
    dat <- check_IDs(dat, "UniProtKB", aa_file)
    dat <- cleanup(dat, "UniProtKB", up2)
    pcomp <- protcomp(dat$UniProtKB, aa_file = aa_file)
  } else if(study=="WJ02") {
    # 20200419 E. coli, Weber and Jung, 2002
    dat <- read.csv(paste0(datadir, "WJ02.csv.xz"), as.is=TRUE)
    description <- "_Escherichia coli_ in 0.4 M NaCl vs control"
    up2 <- dat$Regulation == "up"
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/WJ02_aa.csv.xz"))
  } else if(study=="KSA+02") {
    # 20200419 Synechocystis, Kanesaki et al., 2002
    # KSA+02_NaCl, KSA+02_sorbitol
    dat <- read.csv(paste0(datadir, "KSA+02.csv.xz"), as.is=TRUE)
    description <- paste("_Synechocystis_ sp. PCC 6803 in 0.5 M", stage, "vs control")
    # use highly differential proteins for the indicated experiment
    icol <- grep(stage, colnames(dat))
    dat <- dat[abs(dat[, icol]) >= 3, ]
    up2 <- dat[, icol] >= 3
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/KSA+02_aa.csv.xz"))
  } else if(study=="GCP08") {
    # 20200419 E. coli, Gunasekera et al., 2008
    # GCP08_30, GCP08_43
    dat <- read.csv(paste0(datadir, "GCP08.csv.xz"), as.is=TRUE)
    description <- paste("_Escherichia coli_ in 0.5 M vs 0 M NaCl at", stage, "deg C")
    if(stage == "30") icol <- grep("C2.C1", colnames(dat))
    if(stage == "43") icol <- grep("C5.C4", colnames(dat))
    dat <- dat[abs(dat[, icol]) > 1, ]
    up2 <- dat[, icol] > 1
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/GCP08_aa.csv.xz"))
  } else if(study=="LGW+05") {
    # 20200419 Shewanella oneidensis, Liu et al., 2005
    dat <- read.csv(paste0(datadir, "LGW+05.csv.xz"), as.is=TRUE)
    description <- "_Shewanella oneidensis_ MR-1 in 0.5 vs 0.1 M NaCl"
    up2 <- dat$AVG > 1
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/LGW+05_aa.csv.xz"))
  } else if(study=="HLL17") {
    # 20200419 Methylocystis, Han et al., 2017
    # HLL17_45min, HLL17_14h
    dat <- read.csv(paste0(datadir, "HLL17.csv.xz"), as.is=TRUE)
    description <- paste("_Methylocystis_ sp. strain SC2 in 0.75% NaCl vs control at", stage)
    icol <- grep(stage, colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    up2 <- dat[, icol] > 0
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/HLL17_aa.csv.xz"))
  } else if(study=="BBWB12") {
    # 20200419 Listeria monocytogenes, Bergholz et al., 2012
    # BBWB12_37_2.5, 37_5, 37_10, 37_20, 37_exp, 7_2.5, 7_5, 7_10, 7_20, 7_exp
    dat <- read.csv(paste0(datadir, "BBWB12.csv.xz"), as.is=TRUE)
    temp <- strsplit(stage, "_")[[1]][1]
    phase <- strsplit(stage, "_")[[1]][2]
    description <- paste0("_Listeria monocytogenes_ strain H7858 in 6% NaCl at ", phase, "% lag-phase duration")
    icol <- grep(paste0("X", stage), colnames(dat))
    dat <- dat[abs(dat[, icol]) > 1, ]
    up2 <- dat[, icol] > 1
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/BBWB12_aa.csv.xz"))
  } else if(study=="KSM+14") {
    # 20200419 Bacillus subtilis, Kohlstedt et al., 2014
    # KSM+14_NaCl, KSM+14_GB
    dat <- read.csv(paste0(datadir, "KSM+14.csv.xz"), as.is=TRUE)
    if(stage=="NaCl") description <- "_Bacillus subtilis_ in 1.2 M NaCl vs control"
    if(stage=="GB") description <- "_Bacillus subtilis_ with vs without glycine betaine in 1.2 M NaCl"
    icol <- grep(stage, colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    up2 <- dat[, icol] > 0
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/KSM+14_aa.csv.xz"))
  } else if(study=="SLM+14") {
    # 20200419 Enterococcus faecalis, Solheim et al., 2014
    # SLM+14_5, SLM+14_30, SLM+14_60
    dat <- read.csv(paste0(datadir, "SLM+14.csv.xz"), as.is=TRUE)
    description <- paste("_Enterococcus faecalis_ in 6.5% NaCl vs control at", stage, "min")
    icol <- grep(stage, colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    up2 <- dat[, icol] > 0
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/SLM+14_aa.csv.xz"))
  } else if(study=="MWZ+18") {
    # 20200419 Lactobacillus paracasei, Ma et al., 2018
    dat <- read.csv(paste0(datadir, "MWZ+18.csv.xz"), as.is=TRUE)
    description <- "_Lactobacillus paracasei_ L9 with vs without 0.13% ox bile"
    up2 <- dat$Log2Fold_change > 0
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/MWZ+18_aa.csv.xz"))
  } else if(study=="HZP+05") {
    # 20200419 Yersinia pestis, Han et al., 2005
    # HZP+05_HOS, HZP+05_HSS
    dat <- read.csv(paste0(datadir, "HZP+05.csv.xz"), as.is=TRUE)
    if(stage=="HOS") description <- paste("_Yersinia pestis_ in 0.5 M sorbitol vs control")
    if(stage=="HSS") description <- paste("_Yersinia pestis_ in 0.5 M NaCl vs control")
    icol <- grep(stage, colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    dat <- dat[dat[, icol] >= 2 | dat[, icol] <= 0.5, ]
    up2 <- dat[, icol] >= 2
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/HZP+05_aa.csv.xz"))
  } else if(study=="SLA+05") {
    # 20200420 Synechocystis, Shapiguzov et al., 2005
    dat <- read.csv(paste0(datadir, "SLA+05.csv.xz"), as.is=TRUE)
    description <- "_Synechocystis_ sp. PCC 6803 in 0.5 M sorbitol vs control"
    up2 <- dat$IF > 0
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/SLA+05_aa.csv.xz"))
  } else if(study=="LB12") {
    # 20200420 Synechococcus, Ludwig and Bryant, 2012
    # LB12_NaCl, LB12_glycerol
    dat <- read.csv(paste0(datadir, "LB12.csv.xz"), as.is=TRUE)
    if(stage=="NaCl") description <- "_Synechococcus_ sp. strain PCC 7002 in 1.5 M NaCl vs control"
    if(stage=="glycerol") description <- "_Synechococcus_ sp. strain PCC 7002 in 10 mM glycerol vs control"
    icol <- grep(stage, colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    dat <- dat[dat[, icol] > 2 | dat[, icol] < 0.5, ]
    up2 <- dat[, icol] > 2
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/LB12_aa.csv.xz"))
  } else if(study=="FRH+15") {
    # 20200420 Salmonella enterica, Finn et al., 2015
    # FRH+15_NaCl_1h, FRH+15_NaCl_6h, FRH+15_NaCl_24h
    # FRH+15_KCl_1h, FRH+15_KCl_6h, FRH+15_KCl_24h
    # FRH+15_glycerol_1h, FRH+15_glycerol_6h, FRH+15_glycerol_24h
    dat <- read.csv(paste0(datadir, "FRH+15.csv.xz"), as.is=TRUE)
    solute <- strsplit(stage, "_")[[1]][1]
    time <- strsplit(stage, "_")[[1]][2]
    description <- paste("_Salmonella enterica_ in", solute, "vs control for", time, "h")
    icol <- grep(stage, colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    up2 <- dat[, icol] > 1
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/FRH+15_aa.csv.xz"))
  } else if(study=="MGM+14") {
    # 20200423 E. coli, Metris et al., 2014
    # MGM+14_3.5, MGM+14_4.5, MGM+14_5, MGM+14_5.5 
    dat <- read.csv(file.path(datadir, "MGM+14.csv.xz"), as.is=TRUE)
    description <- paste("_Escherichia coli_ in", stage, "vs 2% NaCl (with glycine betaine)")
    icol <- grep(paste0("_", stage, "_"), colnames(dat))
    dat <- dat[abs(dat[, icol]) > 1, ]
    up2 <- dat[, icol] > 1
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/MGM+14_aa.csv.xz"))
  } else if(study=="SBB+09") {
    # 20200423 E. coli, Shabala et al., 2009
    # SBB+09_NaCl, SBB+09_Sucrose
    dat <- read.csv(file.path(datadir, "SBB+09.csv.xz"), as.is=TRUE)
    description <- paste("_Escherichia coli_ in ~2.7 Os/kg", gsub("Sucrose", "sucrose", stage), "vs control")
    icol <- grep(stage, colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    up2 <- dat[, icol] == "up"
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/SBB+09_aa.csv.xz"))
  } else if(study=="WGB+13") {
    # 20200423 E. coli in NaCl or urea, Withman et al., 2013
    # WGB+13_N, WGB+13_U
    dat <- read.csv(file.path(datadir, "WGB+13.csv.xz"), as.is=TRUE)
    if(stage=="N") description <- "_Escherichia coli_ in 0.3 M NaCl vs control"
    if(stage=="U") description <- "_Escherichia coli_ in 0.6 M urea vs control"
    icol <- grep(paste0(stage, ".vs.K"), colnames(dat))
    dat <- dat[!is.na(dat[, icol]), ]
    up2 <- dat[, icol] > 0
    dat <- cleanup(dat, "Entry", up2)
    pcomp <- protcomp(dat$Entry, aa_file = file.path(extdatadir, "aa/bacteria/WGB+13_aa.csv.xz"))
  } else stop(paste("osmotic_gene dataset", dataset, "not available"))
  print(paste0("pdat_osmotic_gene: ", description, " [", dataset, "]"))
  # use the up2 from the cleaned-up data, if it exists 20190407
  if("up2" %in% colnames(dat)) up2 <- dat$up2
  return(list(dataset=dataset, pcomp=pcomp, up2=up2, description=description))
}