#' Find problems in Biography data.
#'
#' Returns a named list of tbl_df objects, including
#' \itemize{
#' \item error_dates: Rows with one or more bad dates
#' \item error_duplicates: Rows with duplicate animals
#' }
#'
#' @param x Name of the tbl_df containing biography data generated by the function read_bio_table.
#' @export
#' @examples
#' bio_errors <- find_bio_errors(lh)
find_bio_errors <- function(x){
if(!("tbl_df" %in% class(x)) | length(names(x)) != 17){
message("Error: unrecognized object. Please use the tbl_df produced by the function read_bio_table.")
return(NULL)
}
problems <- list()
# Find duplicate animals
error_duplicates <- x %>%
dplyr::group_by(Study.Id, Animal.Id) %>%
dplyr::summarise(n_records = n()) %>%
dplyr::filter(n_records > 1)
if(nrow(error_duplicates) > 0){
problems[["error_duplicates"]] <- error_duplicates
}
# Find problem dates
realistic_dates <- lubridate::new_interval(lubridate::ymd_hms("1914-01-01 00:00:00"), Sys.time())
temp <- x %>%
dplyr::filter(Birth.Date %within% realistic_dates &
Min.Birth.Date %within% realistic_dates &
Max.Birth.Date %within% realistic_dates &
Entry.Date %within% realistic_dates &
Depart.Date %within% realistic_dates)
error_dates <- x %>%
dplyr::anti_join(temp, by = c("Study.Id", "Animal.Id")) %>%
dplyr::select(ends_with("Id"), ends_with("Date")) %>%
suppressMessages()
if(nrow(error_dates) > 0){
problems[["error_dates"]] <- error_dates
}
if(length(problems) == 0){
message("No errors found!")
problems <- NULL
}
return(problems)
}
#' Find problems in Fertility data.
#'
#' Returns a named list of tbl_df objects, including
#' \itemize{
#' \item error_dates: Rows with one or more bad dates
#' }
#'
#' @param x Name of the tbl_df containing fertility data generated by the function read_fert_table.
#' @export
#' @examples
#' fert_errors <- find_fert_errors(fert)
find_fert_errors <- function(x){
if(!("tbl_df" %in% class(x)) | length(names(x)) != 6){
message("Error: unrecognized object. Please use the tbl_df produced by the function read_fert_table.")
return(NULL)
}
problems <- list()
# Find problem dates
realistic_dates <- lubridate::new_interval(lubridate::ymd_hms("1914-01-01 00:00:00"), Sys.time())
temp <- x %>%
dplyr::filter(Start.Date %within% realistic_dates &
Stop.Date %within% realistic_dates)
error_dates <- x %>%
dplyr::anti_join(temp, by = c("Study.Id", "Animal.Id")) %>%
dplyr::select(ends_with("Id"), ends_with("Date")) %>%
suppressMessages()
if(nrow(error_dates) > 0){
problems[["error_dates"]] <- error_dates
}
if(length(problems) == 0){
message("No errors found!")
problems <- NULL
}
return(problems)
}
#' Find mothers with no corresponding Animal.Id entry.
#'
#' Returns a named list Mom.Id records that don't match the Animal.Id records for each site
#'
#' @param x Name of the tbl_df containing biography data generated by the function read_bio_table.
#' @export
#' @examples
#' mom_id_errors <- find_mom_id_errors(lh)
find_mom_id_errors <- function(x){
if(!("tbl_df" %in% class(x)) | length(names(x)) != 17){
message("Error: unrecognized object. Please use the tbl_df produced by the function read_bio_table.")
return(NULL)
}
`%ni%` = Negate(`%in%`)
f <- function(df){
inds <- levels(factor(df$Animal.Id))
moms <- levels(factor(df$Mom.Id))
missing <- moms[moms %ni% inds]
return(missing)
}
mom_errors <- plyr::dlply(x, .(Study.Id), f)
return(mom_errors)
}
#' Find mothers with no corresponding Animal.Id entry.
#'
#' Returns a named list Mom.Id records that don't match the Animal.Id records for each site
#'
#' @param x Name of the tbl_df containing biography data generated by the function read_bio_table.
#' @export
#' @examples
#' first_born_errors <- find_first_born_errors(lh)
find_first_born_errors <- function(x){
if(!("tbl_df" %in% class(x)) | length(names(x)) != 17){
message("Error: unrecognized object. Please use the tbl_df produced by the function read_bio_table.")
return(NULL)
}
problems <- list()
fb_unknown_mother <- x %>%
dplyr::filter(is.na(Mom.Id) & First.Born %in% c("N", "Y")) %>%
dplyr::select(Study.Id, Mom.Id, Animal.Id, Animal.Name, Birth.Date, First.Born)
if(nrow(fb_unknown_mother) > 0){
problems[["unknown_mother_first_born"]] <- fb_unknown_mother
}
fb_multiple <- x %>%
dplyr::filter(First.Born == "Y") %>%
dplyr::group_by(Study.Id, Mom.Id) %>%
dplyr::summarise(n = n()) %>%
dplyr::filter(n > 1) %>%
dplyr::left_join(x, by = c("Study.Id" = "Study.Id", "Mom.Id" = "Mom.Id")) %>%
dplyr::filter(First.Born == "Y") %>%
dplyr::select(Study.Id, Mom.Id, Animal.Id, Animal.Name, Birth.Date, First.Born)
# Filter out special case of twins at Karisoke
fb_multiple <- fb_multiple %>%
dplyr::filter(!(Study.Id == "karisoke" & (Animal.Id == "I81" | Animal.Id == "I82")))
if(nrow(fb_multiple) > 0){
problems[["multiple_first_born"]] <- fb_multiple
}
return(problems)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.