data-raw/sweden.R

require("data.table")
source("data-raw/helper-functions.R")
metadata <- fread("data-raw/amplicon_data/midas_sweden/metadata.csv", encoding = "Latin-1", header = TRUE)

#this is hopefully FALSE, otherwise manually check which samples to remove:
any(duplicated(metadata[[1]]))

#remove control samples and empty rows
metadata[grepl("extneg|pcrpos|pcrnpos|pcrneg", tolower(LibID)), Plant := "CTRL"]
metadata <- metadata[!is.na(LibID)]

#append Line ID to Plant ID
metadata[!Plant %chin% "CTRL", Plant := ifelse(!is.na(Line), paste0(Plant, "-", Line), Plant)]

#assure all dates are parsed correctly before continuing
metadata[,Date := lubridate::dmy(Date)]

biobanksweden <- amp_load(
  otutable = "data-raw/amplicon_data/midas_sweden/ASVtable_midas481.zip",
  metadata = metadata
)

#check control samples before removing them
#check controls and remove
amp_subset_samples(
  biobanksweden,
  Plant %chin% "CTRL",
  normalise = FALSE
) %>%
  amp_heatmap(normalise = FALSE, group_by = "LibID")
biobanksweden <- amp_subset_samples(
  biobanksweden,
  !Plant %chin% "CTRL",
  !Sample %chin% "MQ221101-127",
  normalise = TRUE,
  minreads = 5000
)

#this step uses an awful lot of memory
biobanksweden <- filter_otus(biobanksweden, 0.1)

data("mfg_functions")
#bacteria
biobanksweden_bac <- biobanksweden %>% 
  amp_subset_samples(
    Primer %chin% "Bacteria"
  ) %>% 
  amp_subset_taxa(
    "k__Bacteria"
  ) %>% 
  genusfunctions(
    function_data = mfg_functions
  )
biobanksweden_bac$metadata <- fix_metadata(biobanksweden_bac$metadata)

#archaea
biobanksweden_arc <- biobanksweden %>% 
  amp_subset_samples(
    Primer %chin% "Archaea"
  ) %>% 
  amp_subset_taxa(
    "k__Archaea"
  ) %>% 
  genusfunctions(
    function_data = mfg_functions
  )
biobanksweden_arc$metadata <- fix_metadata(biobanksweden_arc$metadata)

usethis::use_data(biobanksweden_bac, overwrite = TRUE)
usethis::use_data(biobanksweden_arc, overwrite = TRUE)
#biobanksweden_arc_PeriodAvg <- periodAvg(biobanksweden_arc$metadata)
#biobanksweden_bac_PeriodAvg <- periodAvg(biobanksweden_bac$metadata)

#usethis::use_data(biobanksweden_bac_PeriodAvg, overwrite = TRUE)
#usethis::use_data(biobanksweden_arc_PeriodAvg, overwrite = TRUE)
cmc-aau/midasdashboard documentation built on Jan. 29, 2024, 1:42 p.m.