knitr::opts_chunk$set(echo = TRUE)
library(dplyr) library(ggplot2) library(tidyr)
We append flags to the candidate data with the clean_location_data function from the animaltracker package.
If the Rate is greater than 84, we append a RateFlag.
If the Course is greater than or equal to 100, we append a CourseFlag.
If the Distance is greater than or equal to 840, we append a DistanceFlag.
In the cleaning process, observations with a DistanceFlag, or 2+ flags are removed.
However, the data is left unchanged in this case for comparison purposes.
candidate <- read.csv("df_candidate.csv", stringsAsFactors = FALSE) correct <- read.csv("df_correct.csv", stringsAsFactors = FALSE) print(nrow(candidate)) print(nrow(correct))
We use the dplyr package to join the candidate and correct data on Cow and Index.
join <- dplyr::full_join(candidate, correct, by=c("Cow", "Index")) print(nrow(join))
There are approximately 500 more observations in the joined data than there are in each individual dataset.
First, we determine which observations in candidate are to be kept according to the clean_location_data function.
join <- join %>% dplyr::rename(Keep.y = Keep, DistFlag.x = DistanceFlag, DistFlag.y = DistFlag) %>% dplyr::mutate(Keep.x = 1*(TotalFlags.x < 2 & !DistFlag.x))
There are 165039 observations that both correct and candidate keep and 2935 that both discard.
However, correct discards 337 that candidate would not and candidate discards 101 that correct would not.
join %>% dplyr::group_by(Keep.x, Keep.y) %>% summarise(n = n())
join %>% dplyr::filter(Keep.x < Keep.y) %>% dplyr::select(RateFlag.x, CourseFlag.x, DistFlag.x, RateFlag.y, CourseFlag.y, DistFlag.y) %>% dplyr::summarise(RateFlag.x = sum(RateFlag.x), CourseFlag.x = sum(CourseFlag.x), DistFlag.x = sum(DistFlag.x), RateFlag.y = sum(RateFlag.y), CourseFlag.y = sum(CourseFlag.y), DistFlag.y = sum(DistFlag.y)) %>% tidyr::gather("Flag", "Count") %>% dplyr::mutate(Source = ifelse(grepl(".x", Flag), "Candidate", "Correct"), Flag = substr(Flag, 1, nchar(Flag)-2)) %>% ggplot(aes(Flag, Count, fill = Source)) + geom_bar(stat = "identity", position = "dodge") + ggtitle("Observations Kept by Correct")
join %>% dplyr::filter(Keep.y < Keep.x) %>% dplyr::select(RateFlag.x, CourseFlag.x, DistFlag.x, RateFlag.y, CourseFlag.y, DistFlag.y) %>% dplyr::summarise(RateFlag.x = sum(RateFlag.x), CourseFlag.x = sum(CourseFlag.x), DistFlag.x = sum(DistFlag.x), RateFlag.y = sum(RateFlag.y), CourseFlag.y = sum(CourseFlag.y), DistFlag.y = sum(DistFlag.y)) %>% tidyr::gather("Flag", "Count") %>% dplyr::mutate(Source = ifelse(grepl(".x", Flag), "Candidate", "Correct"), Flag = substr(Flag, 1, nchar(Flag)-2)) %>% ggplot(aes(Flag, Count, fill = Source)) + geom_bar(stat = "identity", position = "dodge") + ggtitle("Observations Kept by Candidate")
cumdist <- join %>% dplyr::group_by(Cow) %>% dplyr::arrange(Index, .by_group=TRUE) %>% dplyr::mutate(Distance.y = dplyr::lag(Distance.y,1), Distance.x = ifelse(is.na(Distance.x), 0, Distance.x), Distance.y = ifelse(is.na(Distance.y), 0, Distance.y), cumDist.x = cumsum(Distance.x), cumDist.y = cumsum(Distance.y)) %>% dplyr::ungroup() cumdist_candidate <- cumdist %>% dplyr::select(Index, Cow, cumDist.x, DistFlag.x) %>% dplyr::rename(Flag = DistFlag.x, cumDist = cumDist.x) %>% dplyr::mutate(Source = "Candidate") cumdist_correct <- cumdist %>% dplyr::select(Index, Cow, cumDist.y, DistFlag.y) %>% dplyr::rename(Flag = DistFlag.y, cumDist = cumDist.y) %>% dplyr::mutate(Source = "Correct") plot_data <- dplyr::bind_rows(cumdist_candidate, cumdist_correct) ggplot(plot_data, aes(x=Index, y=cumDist, group=Source, color=Source)) + geom_line(aes(size = Source)) + #geom_point(data=plot_data %>% dplyr::mutate(Flag = ifelse(is.na(Flag), 0, Flag)) %>% dplyr::filter(Flag==1), aes(x=Index, y=cumDist), color="black") + ylab("Cumulative Distance") + scale_color_discrete(guide = guide_legend(reverse = TRUE)) + scale_size_manual(values=c(2, 1)) + facet_wrap(vars(Cow)) + theme(axis.text.x = element_text(angle = -45))
rate_candidate <- join %>% dplyr::select(Index, Cow, Rate.x, RateFlag.x) %>% dplyr::rename(Flag = RateFlag.x, Rate = Rate.x) %>% dplyr::mutate(Source = "Candidate") rate_correct <- join %>% dplyr::select(Index, Cow, Rate.y, RateFlag.y) %>% dplyr::mutate(Flag = RateFlag.y, Rate = as.numeric(Rate.y)) %>% dplyr::mutate(Source = "Correct") plot_data <- dplyr::bind_rows(rate_candidate, rate_correct) ggplot(plot_data, aes(x=Index, y=Rate, group=Source, color=Source)) + geom_line(aes(size = Source)) + #geom_point(data=plot_data %>% dplyr::mutate(Flag = ifelse(is.na(Flag), 0, Flag)) %>% dplyr::filter(Flag==1), aes(x=Index, y=Rate), color="black") + ylab("Rate") + scale_color_discrete(guide = guide_legend(reverse = TRUE)) + scale_size_manual(values=c(2, 1)) + facet_wrap(vars(Cow)) + theme(axis.text.x = element_text(angle = -45))
course_candidate <- join %>% dplyr::select(Index, Cow, Course.x, CourseFlag.x) %>% dplyr::rename(Flag = CourseFlag.x, Course = Course.x) %>% dplyr::mutate(Source = "Candidate") course_correct <- join %>% dplyr::select(Index, Cow, Course.y, CourseFlag.y) %>% dplyr::rename(Flag = CourseFlag.y, Course = Course.y) %>% dplyr::mutate(Source = "Correct") plot_data <- dplyr::bind_rows(course_candidate, course_correct) ggplot(plot_data, aes(x=Index, y=Course, group=Source, color=Source)) + geom_line(aes(size = Source)) + #geom_point(data=plot_data %>% dplyr::mutate(Flag = ifelse(is.na(Flag), 0, Flag)) %>% dplyr::filter(Flag==1), aes(x=Index, y=Course), color="black") + ylab("Course") + scale_color_discrete(guide = guide_legend(reverse = TRUE)) + scale_size_manual(values=c(2, 1)) + facet_wrap(vars(Cow)) + theme(axis.text.x = element_text(angle = -45))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.