In bihealth/metaquac: Metabolomics Targeted Quality Control

Samples and Compounds {.tabset}

# Get expected compounds
expected_compounds <-
  KNOWN_COMPOUNDS %>%
  filter(Method == meas_type) %>%
  mutate(Batch = "Expected") %>%
  mutate(Class = tolower(Class))

biocrates_compounds <- biocrates %>%
  select(Compound, Class, Batch) %>%
  unique()

# In case of Biocrates p400 Kit, check for missing compounds and try to complete
IS_P400 <- params$kit == KIT_BIOCRATES_P400

# Missing compounds
all_compounds <- unique(expected_compounds$Compound)
missing_compounds <-
  biocrates_compounds %>%
  group_by(Batch) %>%
  do(Missing.Compound = all_compounds[!all_compounds %in% .$Compound]) %>%
  filter(length(Missing.Compound) > 0)
ANY_MISSING <- nrow(missing_compounds) > 0

HANDLE_MISSING <- IS_P400 && ANY_MISSING

warning_text <- "Warning: Missing compounds identified (see Compounds Missing)!  
Background: For the Biocrates AbsoluteIDQ p400 HR Kit, MetIDQ may export data without columns for
specific compounds, if no data was acquired in any of the samples. This might not be the case for all
batches of a multi-batch experiment/study. Integrating such batches might result in wrong
calculations and conclusions for instant with respect to numbers and ratios of missing values. Thus,
MeTaQuaC complements these missing compounds as missing values (currently with no class or status)."
message(warning_text)

Samples

# Sample info
biocrates_samples <- biocrates %>%
  select(
    Sample.Identification, Sample.Type, Sequence.Position,
    Sample.Name, contains("Batch"), contains("Well.Position")) %>%
  unique()
cat(paste("\n**Number of samples in total:**", nrow(biocrates_samples), "\n"))
cat("\n**List of all samples:**\n")
easy_datatable(biocrates_samples, show_type = "statistics")

# Number of samples per batch
cat("\n**Number of samples per batch:**\n")
easy_datatable(biocrates_samples %>% group_by(Batch) %>% summarize(`Sample Count` = n()),
               show_type = "statistics")

# Sample type distribution per batch
print(
  ggplot(data = biocrates_samples, aes(x = Sample.Type, fill = Sample.Type)) +
    geom_bar(alpha = 0.75) + coord_flip() +
    geom_text(stat='count', aes(label=..count..)) +
    theme(legend.position = "bottom") +
    facet_wrap(~ Batch)
)

Compounds

# Table of available compounds
biocrates_compounds <- biocrates %>%
  select(Compound, Class) %>%
  unique()

## Overall
# Number of available compounds
cat(paste("\n**Number of compounds in total:**", length(biocrates_compounds$Compound), "\n"))
cat("\n**List of all compounds:**\n")
easy_datatable(biocrates_compounds, show_type = "statistics")

## Per batch
biocrates_compounds <- biocrates %>%
  select(Compound, Class, Batch) %>%
  unique()

# Number of compounds per batch
cat("\n**Number of compounds per batch:**\n")
easy_datatable(biocrates_compounds %>% group_by(Batch) %>% summarize(`Compound Count` = n()),
               show_type = "statistics")

# Compound class distribution per batch
print(
  ggplot(data = biocrates_compounds, aes(x = Class, fill = Class)) +
    geom_bar(alpha = 0.75) + coord_flip() +
    geom_text(stat='count', aes(label=..count..)) +
    theme(legend.position = "bottom") +
    facet_wrap(~ Batch)
)

cat("\n### Compounds Missing\n")
missing_compounds <- missing_compounds %>% tidyr::unnest(Missing.Compound)

cat(paste("\n**Number of missing compounds in total (intersected):**",
          length(unique(missing_compounds$Missing.Compound)), "\n"))
cat("\n**List of missing compounds:**\n")
easy_datatable(missing_compounds, show_type = "statistics")

# Number of missing compounds per batch
cat("\n**Number of missing compounds per batch:**\n")
easy_datatable(missing_compounds %>% group_by(Batch) %>% summarize(`Compound Count` = n()),
               show_type = "statistics")

print(
  ggplot(data = missing_compounds, aes(x = Missing.Compound, y = Batch)) +
    geom_point(shape = 4, color = "red", size = 4) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1))
)

# # Compounds with an average frequency per sample != 1
# as.data.frame(table(Compound = biocrates$Compound, Sample = biocrates$Sample.Name)) %>%
#   group_by(Compound) %>% summarize(Average.Freq = mean(Freq)) %>%
#   filter(Average.Freq != 1) %>%
#   arrange(Average.Freq)
#
# # Samples with an average frequency per compound != 1
# as.data.frame(table(Compound = biocrates$Compound, Sample = biocrates$Sample.Name)) %>%
#   group_by(Sample) %>% summarize(Average.Freq = mean(Freq)) %>%
#   filter(Average.Freq != 1) %>%
#   arrange(Average.Freq)

cat("\n### Compounds Completed\n")
# Complete missing compounds/measurements as NA
biocrates <- complete_missing_compounds(data = biocrates,
                                        expected_compounds = KNOWN_COMPOUNDS,
                                        method = meas_type)

# Table of available compounds
biocrates_compounds <- biocrates %>%
  select(Compound, Class) %>%
  unique()

## Overall
# Number of available compounds
cat(paste("\n**Number of compounds in total:**", length(biocrates_compounds$Compound), "\n"))
cat("\n**List of all compounds:**\n")
easy_datatable(biocrates_compounds, show_type = "statistics")

## Per batch
biocrates_compounds <- biocrates %>%
  select(Compound, Class, Batch) %>%
  unique()

# Number of compounds per batch
cat("\n**Number of compounds per batch:**\n")
easy_datatable(biocrates_compounds %>% group_by(Batch) %>% summarize(`Compound Count` = n()),
               show_type = "statistics")

# Compound class distribution per batch
print(
  ggplot(data = biocrates_compounds, aes(x = Class, fill = Class)) +
    geom_bar(alpha = 0.75) + coord_flip() +
    geom_text(stat='count', aes(label=..count..)) +
    theme(legend.position = "bottom") +
    facet_wrap(~ Batch)
)

# # Missing compounds
# # Total set might still missing compounds not available in all batches!
# all_compounds <- unique(expected_compounds$Compound)
# missing_compounds <-
#   biocrates_compounds %>%
#   group_by(Batch) %>%
#   do(Missing.Compound = all_compounds[!all_compounds %in% .$Compound]) %>%
#   filter(length(Missing.Compound) > 0)
#
# if (nrow(missing_compounds) > 0){
#   missing_compounds <- missing_compounds %>% unnest(Missing.Compound)
#   print(
#     ggplot(data = missing_compounds, aes(x = Missing.Compound, y = Batch)) +
#       geom_point(shape = 4, color = "red", size = 4) +
#       theme(axis.text.x = element_text(angle = 90, hjust = 1))
#   )
#   
#   missing_compounds
# }

# # Compounds with an average frequency per sample != 1
# as.data.frame(table(Compound = biocrates$Compound, Sample = biocrates$Sample.Name)) %>%
#   group_by(Compound) %>% summarize(Average.Freq = mean(Freq)) %>%
#   filter(Average.Freq != 1) %>%
#   arrange(Average.Freq)
#
# # Samples with an average frequency per compound != 1
# as.data.frame(table(Compound = biocrates$Compound, Sample = biocrates$Sample.Name)) %>%
#   group_by(Sample) %>% summarize(Average.Freq = mean(Freq)) %>%
#   filter(Average.Freq != 1) %>%
#   arrange(Average.Freq)