R/species_nutrients.R

Defines functions species_nutrients

Documented in species_nutrients

#' Match nutritional content to nearest taxa
#'
#' Get nutritional information for aquatic animals from the Aquatic Foods Composition Database. This function uses the hierarchical approach described in Golden et al 2021, where nutritional content is assigned based on the closest taxa possible. 
#'
#' @param sci_name A character vector of species scientific names
#' @param prep A character vector of fish preparation types 
#' @param part A character vector of fish parts
#' @param nut A character vector of nutrients to search for
#' @return A dataframe containing species, nutrient, nutrient units, nutritional value (value), closest taxa match (taxa_match), number of observations which nutritional value was extracted (count), standard error (se), startard deviation (ssd) and confidence intervals  
#' @examples
#' # Get nutritional info
#' # species_nutrients(sci_name = c("Salmo salar", "Oreochromis niloticus"), prep = "raw", part = "muscle tissue", nut = c("Iron, total", "Zinc"))
#' @export
species_nutrients = function(sci_name, prep, part, nut){
  # to test with a mix of easy and hard to find species and nutrients
  # sci_name=c("Thunnus alalunga","Thunnus obesus","Istiompax indica","Makaira nigricans","Coryphaena hippurus")
  # prep = "raw"
  # part = "muscle tissue"
  # nut=c(
  #   "Iron, total","Zinc","Iodine",
  #   "Vitamin A; sum of retinol/carotenoids, expressed as retinol activity equivalent (RAE)",
  #   "Riboflavin","Selenium","Magnesium","Vitamin B12",
  #   "Fatty acid 20:5, n3","Fatty acid 22:6, n3",
  #   "Calcium",
  #   "Protein, total; calculated from total nitrogen"
  # )
  ##Calculate confidence intervals
  lower_ci <- function(mean, se, n, conf_level = 0.95){
    lower_ci <- mean - qt(1 - ((1 - conf_level) / 2), n - 1) * se
  }
  upper_ci <- function(mean, se, n, conf_level = 0.95){
    upper_ci <- mean + qt(1 - ((1 - conf_level) / 2), n - 1) * se
  }
  
  ##Taxa_table
  # This loads as you call it due to lazyLoad, no need to readRDS
  # taxa_table = readRDS(file=file.path("data-raw/processed/taxa_table.Rds"))
  
  ##Load AFCD
  # again, this loads as you call it due to lazyLoad, no need to readRDS
  # AFCD = readRDS("data-raw/processed/AFCD_data_taxa.Rds") %>% 
  AFCD = afcd %>%
    filter(nutrient %in% nut,
           food_prep %in% prep,
           food_part %in% part)
  
  nutrient_key = AFCD %>% 
    distinct(nutrient, .keep_all = T) %>% 
    select(nutrient, nutrient_units)
  
  afcd_species = AFCD %>% 
    group_by(sciname, nutrient) %>% 
    summarize(smean = mean(value),
              ssd = sd(value),
              count = n(),
              smedian = median(value)) %>% 
    mutate(se = ssd / sqrt(count),
           lower_ci = lower_ci(smean, se, count),
           upper_ci = upper_ci(smean, se, count),
           lower_ci = if_else(lower_ci<0, 0, lower_ci),
           upper_ci = if_else(upper_ci<0, 0, upper_ci)) %>% 
    ungroup() %>% 
    drop_na(sciname) %>% 
    mutate(sciname = tolower(sciname)) %>% 
    rename(value = smedian)
  
    #calculate mean values for genus
    afcd_genus = AFCD %>%
    group_by(genus, nutrient) %>% 
    summarize(smean = mean(value),
              ssd = sd(value),
              count = n(),
              smedian = median(value)) %>% 
    mutate(se = ssd / sqrt(count),
           lower_ci = lower_ci(smean, se, count),
           upper_ci = upper_ci(smean, se, count),
           lower_ci = if_else(lower_ci<0, 0, lower_ci),
           upper_ci = if_else(upper_ci<0, 0, upper_ci)) %>% 
    ungroup() %>% 
    drop_na(genus) %>% 
    mutate(genus = tolower(genus)) %>% 
    rename(value = smedian)
  
  #calculate mean values for family
  afcd_family = AFCD %>%
    group_by(family, nutrient) %>% 
    summarize(smean = mean(value),
              ssd = sd(value),
              count = n(),
              smedian = median(value)) %>% 
    mutate(se = ssd / sqrt(count),
           lower_ci = lower_ci(smean, se, count),
           upper_ci = upper_ci(smean, se, count),
           lower_ci = if_else(lower_ci<0, 0, lower_ci),
           upper_ci = if_else(upper_ci<0, 0, upper_ci)) %>% 
    ungroup() %>% 
    drop_na(family) %>% 
    mutate(family = tolower(family)) %>% 
    rename(value = smedian)
  
  #calculate mean values for order
  afcd_order = AFCD %>%
    group_by(order, nutrient) %>% 
    summarize(smean = mean(value),
              ssd = sd(value),
              count = n(),
              smedian = median(value)) %>% 
    mutate(se = ssd / sqrt(count),
           lower_ci = lower_ci(smean, se, count),
           upper_ci = upper_ci(smean, se, count),
           lower_ci = if_else(lower_ci<0, 0, lower_ci),
           upper_ci = if_else(upper_ci<0, 0, upper_ci)) %>% 
    ungroup() %>% 
    drop_na(order) %>% 
    mutate(order = tolower(order)) %>% 
    rename(value = smedian)
  
  #calculate mean values for class
  afcd_class = AFCD %>%
    group_by(class, nutrient) %>% 
    summarize(smean = mean(value),
              ssd = sd(value),
              count = n(),
              smedian = median(value)) %>% 
    mutate(se = ssd / sqrt(count),
           lower_ci = lower_ci(smean, se, count),
           upper_ci = upper_ci(smean, se, count),
           lower_ci = if_else(lower_ci<0, 0, lower_ci),
           upper_ci = if_else(upper_ci<0, 0, upper_ci)) %>% 
    ungroup() %>% 
    drop_na(class) %>% 
    mutate(class = tolower(class)) %>% 
    rename(value = smedian)
  
  ##Fill taxonomic information for species
  all_spp = data.frame(species = rep(sci_name, length(nut)),
                       nutrient = rep(nut, length(sci_name)) %>% sort()) %>%
    mutate(species = tolower(species)) %>% 
    separate(species, c("genus", "spp"), " ", remove=FALSE) %>% 
    left_join(taxa_table, by = c("genus"))
  
  #Fill taxa by family
  missing_family = all_spp %>% 
    filter(is.na(class)) %>% 
    select(-order,-phylum,-kingdom,-class) %>% 
    left_join(taxa_table %>% select(-genus) %>% distinct(family, .keep_all=T), by = c("genus" = "family"))
  
  all_spp = all_spp %>% 
    filter(!is.na(class)) %>% 
    rbind(missing_family)
  
  #Fill taxa by order
  missing_order = all_spp %>% 
    filter(is.na(class)) %>% 
    select(-class,-phylum,-kingdom) %>% 
    left_join(taxa_table %>% select(-genus, -family) %>% distinct(order, .keep_all=T), by = c("genus" = "order"))
  
  all_spp = all_spp %>% 
    filter(!is.na(class)) %>% 
    rbind(missing_order) %>% 
    mutate(genus = recode(genus, 
                          "osteichthyes" = "actinopterygii",
                          "elasmobranchii" = "chondrichthyes"))
  
  ##########################Fill nutritional data by species scientific name 
  #join databases
  spp_nutrient = left_join(all_spp, afcd_species, by=c("species" = "sciname", "nutrient"))
  
  #find NAs
  missing = spp_nutrient %>% 
    filter(is.na(value)) %>% 
    select(-value,-smean, -ssd, -count, -se, -lower_ci, -upper_ci)
  
  spp_nutrient = spp_nutrient %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "species")
  
  ###############Fill missing with genus #######################
  #Join datasets 
  missing = left_join(missing, afcd_genus, by=c("genus", "nutrient"))
  missing_genus = missing %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "genus")
  
  ##include filled values
  spp_nutrient = rbind(spp_nutrient, missing_genus)
  
  ##Seperate remaining missing values
  missing = missing %>% 
    filter(is.na(value)) %>% 
    dplyr::select(-value,-smean, -ssd, -count, -se, -lower_ci, -upper_ci)
  
  
  ###############Fill missing with family #######################
  
  #Join datasets
  missing = left_join(missing, afcd_family, by=c("family", "nutrient"))
  missing_family = missing %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "family")
  
  ##include filled values
  spp_nutrient = rbind(spp_nutrient, missing_family)
  
  ##Seperate remaining missing values
  missing = missing %>% 
    filter(is.na(value)) %>% 
    dplyr::select(-value,-smean, -ssd, -count, -se, -lower_ci, -upper_ci)
  
  ##Join for family = genus
  #Join datasets
  missing = left_join(missing, afcd_family, by=c("genus" = "family", "nutrient"))
  missing_family2 = missing %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "family")
  
  ##include filled values
  spp_nutrient = rbind(spp_nutrient, missing_family2)
  
  ##Seperate remaining missing values
  missing = missing %>% 
    filter(is.na(value)) %>% 
    dplyr::select(-value,-smean, -ssd, -count, -se, -lower_ci, -upper_ci)
  
  ###############Fill missing with order #######################
  #Join datasets
  missing = left_join(missing, afcd_order, by=c("order", "nutrient"))
  missing_order = missing %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "order")

  ##include filled values
  spp_nutrient = rbind(spp_nutrient, missing_order)
  
  ##Seperate remaining missing values
  missing = missing %>% 
    filter(is.na(value)) %>% 
    dplyr::select(-value,-smean, -ssd, -count, -se, -lower_ci, -upper_ci)
  
  ##genus = order
  #Join datasets
  missing = left_join(missing, afcd_order, by=c("genus" = "order", "nutrient"))
  missing_order2 = missing %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "order")
  
  ##include filled values
  spp_nutrient = rbind(spp_nutrient, missing_order2)
  
  ##Seperate remaining missing values
  missing = missing %>% 
    filter(is.na(value)) %>% 
    dplyr::select(-value,-smean, -ssd, -count, -se, -lower_ci, -upper_ci)
  
  ###############Fill missing with class #######################
  
  #Join datasets
  missing = left_join(missing, afcd_class, by=c("class", "nutrient"))
  missing_class = missing %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "class")
  
    ##include filled values
  spp_nutrient = rbind(spp_nutrient, missing_class)
  
  ##Seperate remaining missing values
  missing = missing %>% 
    filter(is.na(value)) %>% 
    dplyr::select(-value,-smean, -ssd, -count, -se, -lower_ci, -upper_ci)
  
  ##Genus = class
  #Join datasets
  missing = left_join(missing, afcd_class, by=c("genus" = "class", "nutrient"))
  missing_class2 = missing %>% 
    filter(!is.na(value)) %>% 
    mutate(taxa_match = "class")
  
  ##Seperate remaining missing values
  missing = missing %>% 
    filter(is.na(value)) %>% 
    mutate(taxa_match = NA)
  
  ##include filled values
  spp_nutrient = rbind(spp_nutrient, missing_class2, missing) %>%
    mutate_all(~ifelse(is.nan(.), NA, .)) %>% 
    left_join(nutrient_key, by = c("nutrient")) %>% 
    select(species, nutrient, nutrient_units, value, taxa_match, ssd, count, se, lower_ci, upper_ci)
  
 return(spp_nutrient)
  
}
Aquatic-Food-Composition-Database/AFCD documentation built on July 29, 2024, 3:12 a.m.