data-raw/biobank_ad.R

source("data-raw/helper-functions.R")
require("data.table")
data("mfg_functions")

digester_data <- function(
  otutable_path = "data-raw/biobank_ad/ASVtable.zip",
  metadata_path,
  metadata_cols = c(
    "Seq_ID" = "SampleID",
    "SampleSite" = "Plant",
    "SampleDate" = "Date",
    "Digester_Type" = "Type"
  )
) {
  metadata <- fread(
    metadata_path,
    encoding = "Latin-1",
    select = names(metadata_cols),
    col.names = metadata_cols
  )
  d <- amp_load(
    otutable = otutable_path,
    metadata = metadata[!apply(metadata == "", 1, all)], #removes blank rows
    taxonomy = "data-raw/biobank_ad/ASVs.R1.midas481.zip"
  )
  #check controls and remove
  amp_subset_samples(
    d,
    tolower(Plant) %chin% "aau",
    normalise = FALSE
  ) %>% 
    amp_heatmap(normalise = FALSE)
  d <- amp_subset_samples(
    d,
    !tolower(Plant) %chin% "aau",
    normalise = TRUE,
    minreads = 1000,
    removeAbsents = TRUE
  )
  #add/rename reactor column
  #append reactor to plant
  d <- filter_otus(d, 0.1)
  d$metadata$Date <- lubridate::ymd(d$metadata$Date)
  d$metadata <- fix_metadata(d$metadata)
  d <- genusfunctions(d, function_data = mfg_functions)
  
  invisible(d)
}

## archaea
biobank_ad_arc <- digester_data(
  metadata_path = "data-raw/biobank_ad/220114_metadataBioBank_Archaea.txt",
)
# half are actually bacteria, remove them by name
table(biobank_ad_arc$tax$Kingdom)
biobank_ad_arc <- amp_subset_taxa(
  biobank_ad_arc,
  "Archaea"
)
usethis::use_data(biobank_ad_arc, overwrite = TRUE)

## bacteria
biobank_ad_bac <- digester_data(
  metadata_path = "data-raw/biobank_ad/220114_metadataBioBank_Bacteria.txt"
)
# check for bacteria and remove by name
table(biobank_ad_bac$tax$Kingdom)
biobank_ad_bac <- amp_subset_taxa(
  biobank_ad_bac,
  "Bacteria"
)
usethis::use_data(biobank_ad_bac, overwrite = TRUE)
cmc-aau/midasdashboard documentation built on Jan. 29, 2024, 1:42 p.m.