data-raw/sewage_resistance.R

library(readxl)
library(tidyverse)
library(zCompositions)

# Download Supplementary Data 4 from global sewage paper
# https://www.nature.com/articles/s41467-019-08853-3
if (!file.exists("./data-raw/sewage_file.xlsx")) {
  download.file("https://static-content.springer.com/esm/art%3A10.1038%2Fs41467-019-08853-3/MediaObjects/41467_2019_8853_MOESM7_ESM.xlsx",
                "./data-raw/sewage_file.xlsx")
}

raw_counts <- read_xlsx("./data-raw/sewage_file.xlsx", sheet = "ResFind.Gene.count")
colnames(raw_counts)[1] <- "gene_id"

sewage_counts <- as.data.frame(t(as.matrix(column_to_rownames(raw_counts, "gene_id"))))

sewage_zerocorrected <- cmultRepl(sewage_counts, method = "SQ", output = "p-counts")

sewage_clr <- as.data.frame(t(apply(sewage_zerocorrected, 1, function(x){log(x) - mean(log(x))})))

use_data(sewage_clr)
roeder/codaqol documentation built on Nov. 5, 2019, 3:14 a.m.