data-raw/Extracts/extracts.R

## code to prepare `DATASET` dataset goes here
## code to prepare `DATASET` dataset goes here
library(readr)
library(dplyr)
library(janitor)
library(stringr)
extracts <- if(file.exists("data-raw/Extracts/working/extracts-CURRENT.csv")){
  read_csv("data-raw/Extracts/working/extracts-CURRENT.csv")
} else{
  read_csv(list.files("data-raw/Extracts/archived", full.names = T)[1])
}

extracts = extracts %>% clean_names() %>%
  mutate(weirdos = as.numeric(ifelse(str_length(poop_vial_id) > 15, poop_vial_id, NA)), #some samples are read in with excess decimals. This fixes those weirdos
         poop_vial_id = ifelse(is.na(weirdos), poop_vial_id, weirdos),
         tvar_id = as.numeric(poop_vial_id), #Used to compare tvariable and field poop id. If they differ, then we trust the field poop_id more.
         tvar_id = ifelse(tvar_id == field_poop_vial_id, tvar_id, field_poop_vial_id)) %>%
  select(-c(weirdos, tvar_id)) %>%
  rename(poop_id = poop_vial_id) %>%
  mutate(poop_id = str_extract_all(poop_id, "([A-Z]{1}[0-9]{3,4}|[0-9]{1,3}-[0-9]{2})|[0-9]{5}|[0-9]{1,3}.[0-9]{1,2}"))

extracts$third_poop_id = sapply(extracts$poop_id, function(x){
  nth(x, 3)
})
extracts$second_poop_id = sapply(extracts$poop_id, function(x){
  nth(x, 2)
})
extracts$poop_id = sapply(extracts$poop_id, function(x){
  nth(x, 1)
})

#Fix known issues
extracts = extracts %>%
  mutate(sample_extracted_if_two_samples = case_when(
    poop_id == "P1620" ~ "P1669",
    TRUE ~ as.character(sample_extracted_if_two_samples)
  ))

usethis::use_data(extracts, overwrite = TRUE)
mwhalen18/krspfecals documentation built on Dec. 21, 2021, 11:05 p.m.