library(knitr) library(kableExtra) ## Use pngquant to reduce size of PNG images knit_hooks$set(pngquant = hook_pngquant) pngquant <- "--speed=1 --quality=0-25" # In case pngquant isn't available if (!nzchar(Sys.which("pngquant"))) pngquant <- NULL
options(width = 80)
nH2O <- "<i>n</i><sub>H<sub>2</sub>O</sub>" nO2 <- "<i>n</i><sub>O<sub>2</sub></sub>"
This vignette runs the functions to make the plots from the following manuscript:
Dick JM. 2024. Adaptations of microbial genomes to human body chemistry. bioRxiv preprint doi: 10.1101/2023.02.12.528246
Update: In July 2024, the taxonomic classifications and reference database were updated to use GTDB release 220. The plots therefore display minor differences compared to the preprint, which used GTDB release 207. In addition, two 16S rRNA gene sequence datasets have been added for nasopharyngeal and gut communities in COVID-19.
This vignette was compiled on r Sys.Date()
with
JMDplots r packageDescription("JMDplots")$Version
,
chem16S r packageDescription("chem16S")$Version
, and
canprot r packageDescription("canprot")$Version
.
library(JMDplots)
microhum_1()
Data source: @HMP12.
microhum_2()
Data sources: Genome Taxonomy Database (GTDB release 220) [@PCR+22]; Unified Human Gastrointestinal Genome (UHGG v2.0.1) [@ANB+21].
microhum_3()
Data sources: (A) @BPB+21; (B) See Table 1 in manuscript; (C) @LLZ+21, @CZH+22, @ZZL+20; (D) @TWC+22, @MLL+17, @JZW+22, @GNT+21.
microhum_4()
Data sources: (A) See Table 1 in manuscript; (B) MAGs [@KWL22], based on data from @YZL+21 and @ZZL+20; (C) @HZX+21 and @GPM+22; (D) See Table 1 in manuscript; (E) @LAA+19.
Figure_5_genera <- microhum_5()
microhum_6()
Data sources: (a)--(b) List of Prokaryotes according to their Aerotolerant or Obligate Anaerobic Metabolism [@MR18] with modifications made in this study; (c) See Table 1 in manuscript.
microhum_S1()
microhum_S2()
Data source: @BPB+21.
microhum_S3()
Data source: List of Prokaryotes according to their Aerotolerant or Obligate Anaerobic Metabolism [@MR18] with modifications made in this study.
microhum_S4()
datadir <- system.file("extdata/microhum/ARAST", package = "JMDplots") dataset <- c("HMP12", "HMP12_no_screening", "LLZ+21", "CZH+22", "ZZL+20", "LAA+19") summary_list <- lapply(dataset, function(this_dataset) { file <- file.path(datadir, paste0(this_dataset, "_stats.csv")) dat <- read.csv(file) runs <- nrow(dat) average_input_sequences <- sum(dat$input_sequences) / runs protein_prediction_rate <- dat$coding_sequences / dat$input_sequences * 100 runs_with_low_protein_prediction <- sum(protein_prediction_rate < 40) average_protein_prediction_rate <- round(sum(protein_prediction_rate) / runs, 2) average_protein_length <- round(sum(dat$protein_length) / runs, 2) data.frame(runs, average_input_sequences, average_protein_prediction_rate, runs_with_low_protein_prediction, average_protein_length) }) description <- c("HMP with human DNA screening", "HMP without screening", "COVID-19 nasopharyngeal", "COVID-19 oropharyngeal", "COVID-19 gut", "IBD gut") summary_table <- cbind(description, dataset, do.call(rbind, summary_list)) col.names <- c("Description", "Dataset", "Number of sequencing runs", "Average number of input sequences", "Average protein prediction rate (%)", "Runs with protein prediction rate < 40%", "Average protein length") kable(summary_table, col.names = col.names)
GTDB_taxonomy <- read.csv(system.file("RefDB/GTDB_220/taxonomy.csv.xz", package = "JMDplots")) print(paste("GTDB:", nrow(GTDB_taxonomy), "genomes and", length(unique(GTDB_taxonomy$genus)), "genera")) UHGG_taxonomy_full <- read.csv(system.file("RefDB/UHGG_2.0.1/fullset/taxonomy.csv.xz", package = "JMDplots")) print(paste("UHGG (contamination < 5% and completeness > 50%):", nrow(UHGG_taxonomy_full), "species-level clusters and", length(unique(UHGG_taxonomy_full$genus)), "genera")) UHGG_taxonomy <- read.csv(system.file("RefDB/UHGG_2.0.1/taxonomy.csv.xz", package = "JMDplots")) print(paste("UHGG (contamination < 2% and completeness > 95%):", nrow(UHGG_taxonomy), "species-level clusters and", length(unique(UHGG_taxonomy$genus)), "genera"))
not_in_UHGG <- paste(Figure_5_genera[!Figure_5_genera %in% UHGG_taxonomy_full$genus], collapse = " ") paste("Figure 5 shows", length(Figure_5_genera), "genera; of these,", not_in_UHGG, "is not in UHGG")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.