#' Check for and omit non-animal animal ids
#'
#' This function pads all short ids with leading zeros, then compares the animal
#' ids in your data file to those in an index file. Any ids matched to a species
#' with a value in the `omit_animal` argument will be omitted from the data
#' set.
#'
#' This is useful for removing ids you know are errors (e.g., `0000000000`)
#' or which you know are wands.
#'
#' @param r Data frame. A data frame of read data, with at least column
#' `animal_id`.
#' @param ids Character or Data frame. Either the name and location of a
#' .csv file containing the animal id index or a data frame of the animal id
#' index. In either case, the data must contain two columns: `animal_id`
#' and `species`.
#' @param omit Character vector. All the values of `species` in the
#' animal id index which you would like to omit from your data. Defaults to
#' `c("wand", "error")`.
#' @param id_length Numeric. How many characters are expected in each id? This
#' will test to make sure all ids are the right length (i.e. make sure
#' leading zeros haven't been omitted). NA skips this test.
#' @param bird_ids Deprecated.
#'
#' @return A data frame without the specified animal ids. Messages are printed to
#' inform the user of matching or non-matching animal ids.
#'
#'
#' @examples
#' \dontrun{
#' r <- check_ids(finches, ids = "animal_index.csv")
#'
#' index <- read.csv("animal_index.csv")
#' r <- check_ids(finches, ids = index)
#' }
#' @export
check_ids <- function(r, ids, omit = c("wand", "error"), id_length = 10, bird_ids){
if (!missing(bird_ids)) {
warning("Argument bird_ids is deprecated; please use ids instead.",
call. = FALSE)
ids <- bird_ids
}
if(is.null(ids) || (length(ids) > 1 & !is.data.frame(ids))) {
stop("animal_id index (id) should either be the name of a comma separated ",
"file (csv) OR should be a data frame. In either case, the data ",
"should contain headers 'animal_id' and 'species'",
call. = FALSE)
}
if(!is.data.frame(ids)) ids <- utils::read.csv(ids)
# Check for required columns in ids
if(!all(c("animal_id", "species") %in% names(ids))) {
stop("animal_id index (id) should contain at least two columns: ",
"'animal_id' and 'species'", call. = FALSE)
}
# Check for other id problems
if(!is.na(id_length)){
msg <- character()
if(any(nchar(as.character(ids$animal_id)) != id_length)) {
msg <- c(msg,
paste0("You have some ids in your animal_id index that are not ",
id_length, " characters long"))
}
if(any(nchar(as.character(r$animal_id)) != id_length)) {
msg <- c(msg,
paste0("You have some ids in your read data that are not ",
id_length, " characters long"))
}
if(length(msg) > 0) stop(paste0(msg, collapse = "\n "), call. = FALSE)
}
# Look for unknown ids in your data
unlisted <- unique(r$animal_id[!(r$animal_id %in% unique(ids$animal_id))])
if(length(unlisted) > 0) {
message("Some animal_ids present in your data do not exist in the ",
"animal_id index: ",
paste(unlisted, collapse = ", "))
} else message("All animal_ids in your data are also in your animal_id index")
# Look for individuals in your ids that are not in your data
real_animals <- unique(ids$animal_id[!(ids$species %in% omit)])
unseen <- real_animals[!(real_animals %in% unique(r$animal_id))]
if(length(unseen) > 0) {
message("Some animal_ids present in your animal_id index, are not in your data: ",
paste0(unseen, collapse = ", "))
} else message("All animal_ids in your animal_id index are also in your data")
# Any to omit?
if(!any(omit %in% ids$species)) {
message("animal_id index (id) data frame doesn't contain any animal_ids to omit")
}
# Which ids in the data set match the "omit" section?
ob <- unique(ids$animal_id[ids$species %in% omit]) # Which to omit in general
ob2 <- unique(r$animal_id[r$animal_id %in% ob]) # Which to omit in this case
if(length(ob2) > 0) {
message("The following animal_ids have been omitted: ",
paste0(ob2, collapse = ", "))
} else message("No animal_ids have been omitted")
# Only keep those that aren't in the "omit" section
# (also keeps ids not in any section: 'unlisted')
r[!(r$animal_id %in% ob), ]
}
check.ids <- function(r, animal_ids, omit_animal = c("wand", "error")){
.Deprecated("check_ids")
check_ids(r, animals_ids, omit_animal)
}
#' Check for and correct odd animal ids
#'
#' This function compares the animal ids in your data file to those in a problem
#' index file. Any ids matched will be replaced with the corrected value in the
#' problem index.
#'
#' This is useful for correcting ids that have been mangled by opening files in
#' Word (where leading zeros are stripped) or for dealing with any odd problems
#' in the field.
#'
#' @param r Data frame. A data frame of read data, with at least column
#' `animal_id`.
#' @param problems Character or Data frame. Either the name and location of a
#' .csv file containing the problem index or a data frame of the problem
#' index. In either case, the data must contain two columns:
#' `original_id` and `corrected_id`.
#'
#' @return A data frame with corrected ids. Messages are printed to inform the
#' user of any corrections made.
#'
#' @export
check_problems <- function(r, problems){
if(length(problems) > 1 & !is.data.frame(problems)) {
stop("Problems should either be the name of a comma separated file (csv) ",
"OR should be a data frame. In either case, the data should contain ",
"headers 'original_id' and 'corrected_id'", call. = FALSE)
}
# Confirm that expected columns and formats are present
check_name(r, n = c("animal_id", "logger_id", "time"), "raw RFID")
check_time(r, n = "time", internal = FALSE)
check_format(r)
# Get factor categories for animal_id
animals <- levels(r$animal_id)
r$animal_id <- as.character(r$animal_id)
# If problems is a file, load it
if(!is.data.frame(problems)) problems <- utils::read.csv(problems)
if(!all(names(problems) %in% c("original_id", "corrected_id"))) {
stop("Problems should contain headers 'original_id' and 'corrected_id'",
call. = FALSE)
}
if(nrow(problems) < 1) stop("Problems data frame has no rows", call. = FALSE)
# Trim leading or trailing whitespace
problems <- problems %>%
dplyr::mutate_all(trimws) %>%
dplyr::mutate_all(as.character)
# Fix problem IDs
fixes <- data.frame()
for(i in 1:nrow(problems)){
if(length(r$animal_id[r$animal_id == problems$original_id[i]]) > 0) {
fixes <- rbind(fixes, problems[i,])
}
r$animal_id[r$animal_id == problems$original_id[i]] <- problems$corrected_id[i]
}
# Get factors back, but remove old level and add new
animals <- animals[!(animals %in% fixes$original_id)]
animals <- c(animals, fixes$corrected_id)
r$animal_id <- factor(r$animal_id, levels = animals)
if(nrow(fixes) > 0) {
message("The following animal ids have been corrected:")
message(paste0(apply(fixes, 1, paste0, collapse = " to "), collapse = "\n"))
} else message("No animal ids needed to be fixed")
r
}
check.problems <- function(r, problems){
.Deprecated("check_problems")
check_ids(r, problems)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.