knitr::opts_chunk$set(echo = TRUE)
mt <- list.files(system.file("extdata", "marinetraits", package = "biodivTS")) %>%
  system.file("extdata", "marinetraits", ., package = "biodivTS") %>%
  map_dfr(., read.csv) %>%
  select(AphiaID, scientificName = ScientificName, taxonRank, taxonomicStatus, measurementID, 
         measurementType, measurementValue, measurementUnit) %>%
  filter(taxonRank == "Species") %>% 
  distinct()

#create ID column
id_num <- 0
obsID <- vector("list", length = length(mt$measurementType))

for (i in 1:length(mt$measurementType)){
  if (isTRUE(grepl(">", mt$measurementType[i]))){
    obsID[i] <- id_num
  } else{
    id_num <- id_num + 1
    obsID[i] <- id_num
  }
}

mt <- cbind(mt, obsID = unlist(obsID)) %>% 
  spread(measurementType, measurementValue) %>%
  select(-c("Body size > Stage", "Feedingtype > Host", "Feedingtype > Stage", 
            "Body size > Locality (MRGID)", "Body size > Dimension", "Body size > Gender"))

coalesce_all_columns <- function(df) {
  return(coalesce(!!! as.list(df)))
}

mt_clean <- mt %>%
  group_by(obsID) %>%
  summarise_all(coalesce_all_columns) %>% 
  group_by(scientificName) %>%
  summarise_all(coalesce_all_columns)


karinorman/biodivTS documentation built on Dec. 23, 2024, 10 p.m.