Creating the HMP example dataset

Sample data

library(readr)
library(dplyr)
set.seed(1)
hmp_samples <- read_tsv("~/Downloads/v35_map_uniquebyPSN.txt")
hmp_otus <- read_tsv("~/Downloads/v35_psn_otu.genus.fixed.txt", skip = 1)

remove uneeded columns

hmp_samples <- hmp_samples[, c("#SampleID", "sex", "HMPbodysubsite")]

rename columns

colnames(hmp_samples) <- c("sample_id", "sex", "body_site")

subset treatments

sites <- c("Saliva", "Throat", "Stool", "Right_Antecubital_fossa", "Anterior_nares")
hmp_samples <- hmp_samples[hmp_samples$body_site %in% sites, ]

rename treatments

hmp_samples[hmp_samples$body_site == "Right_Antecubital_fossa", "body_site"] <- "Skin"
hmp_samples[hmp_samples$body_site == "Anterior_nares", "body_site"] <- "Nose"

remove samples not in abundance data

hmp_samples <- hmp_samples[hmp_samples$sample_id %in% colnames(hmp_otus), ]

remove low count samples

hmp_samples <- hmp_samples[colSums(hmp_otus[, as.character(hmp_samples$sample_id)]) >= 1000, ]

subsample sites

hmp_samples <- hmp_samples %>%
  group_by(body_site, sex) %>%
  sample_n(size = 5)

convert sample IDs to character

hmp_samples$sample_id <- as.character(hmp_samples$sample_id)

Abundance matrix

Subset samples

hmp_otus <- hmp_otus[, c("#OTU ID", "Consensus Lineage", hmp_samples$sample_id)]

rename columns

colnames(hmp_otus)[1:2] <- c("otu_id", "lineage")

Remove OTUs with missing info

hmp_otus <- hmp_otus[! endsWith(hmp_otus$lineage, "__"), ]
hmp_otus <- hmp_otus[! grepl(hmp_otus$lineage, pattern = "__;", fixed = TRUE), ]

Remove ambiguous OTUs

hmp_otus <- hmp_otus[! grepl(hmp_otus$lineage, pattern = "IncertaeSedis"), ]

Remove singletons

hmp_otus <- hmp_otus[rowSums(hmp_otus[, hmp_samples$sample_id]) > 1, ]

random subsample of OTUs

hmp_otus <- hmp_otus %>% sample_n(1000, weight = rowSums(hmp_otus[, hmp_samples$sample_id]))

Add root rank

hmp_otus$lineage <- paste0("r__", hmp_otus$lineage)

Add to package

devtools::use_data(hmp_otus, overwrite = TRUE)
devtools::use_data(hmp_samples, overwrite = TRUE)

How to parse

x = parse_tax_data(hmp_otus, class_cols = "lineage", class_sep = ";",
               class_key = c(tax_rank = "info", tax_name = "taxon_name"),
               class_regex = "^(.+)__(.+)$")
library(metacoder)
heat_tree(x, node_size = n_obs, node_color = n_obs, node_label = taxon_names)


grunwaldlab/metacoder documentation built on Feb. 22, 2024, 3:47 a.m.