coding_summary <- function(user_doc_id, allocation_type="all", restrict_double_coded=TRUE){
#' Summary of coding of document allocation.
#'
#' Returns tibble with number of items of each variable value combination types coded in the document by the user.
#'
#' @param user_doc_id The user_doc_id(s) to summarize.
#' @param allocation_type The allocation type ("training", "testing","coding", "all").
#' @param restrict_double_coded Only summarize articles which have been coded more than once (in FALSE summarize all articles).
#' @return Returns tibble with number of items of predefined types coded in the document by the user.
#' @export
user_docs<-durhamevp::get_allocation(user_doc_id = user_doc_id, allocation_type=allocation_type)
user_docs<-dplyr::filter(user_docs, status=="COMPLETED")
if(restrict_double_coded){
doublecoded_docs<-user_docs %>%
dplyr::group_by(document_id) %>%
dplyr::tally() %>%
dplyr::filter(n>1)
user_docs<-dplyr::filter(user_docs, document_id %in% doublecoded_docs$document_id)
}
event_report <- durhamevp::get_event_report(user_doc_id=dplyr::pull(user_docs, "id"))
#model_event_report <- durhamevp::get_event_report(model_event_report_id)
tags<-durhamevp::get_tag(event_report_id = dplyr::pull(event_report, "id"))
attributes<-durhamevp::get_attribute(tag_id = dplyr::pull(tags, "id"))
event_report<-dplyr::left_join(event_report, user_docs, by=c("user_doc_id"="id"), suffix=c("event_report", "user_doc"))
tags<-dplyr::left_join(tags, event_report, by=c("event_report_id"="id"), suffix=c(".tags", "event_report"))
attributes<-dplyr::left_join(attributes, tags, by=c("tag_id"="id"), suffix=c("attributes", "tags"))
user_doc_coding_counts<-user_docs %>%
mutate(level="user_doc") %>%
tidyr::gather(variable, value, article_type, geo_relevant, time_relevant, electoral_nature, violence_nature, electoralviolence_nature, legibility, recommend_qualitative) %>%
dplyr::rename(user_doc_id=id) %>%
dplyr::group_by(user_doc_id, user_id, document_id, level, variable, value) %>%
dplyr::tally()
event_report_coding_counts<-event_report %>%
mutate(level="event_report") %>%
tidyr::gather(variable, value, event_type, environment, event_start, event_end) %>%
dplyr::group_by(user_doc_id, user_id, document_id, level, variable, value) %>%
dplyr::tally()
tag_coding_counts <- tags %>%
mutate(level="tag") %>%
dplyr::rename(value=tag_value) %>%
unite(variable, tag_table, tag_variable) %>%
group_by(user_doc_id, user_id, document_id, level, variable, value) %>%
tally()
attribute_coding_counts <- attributes %>%
mutate(level="attribute") %>%
dplyr::rename(value=attribute_value, variable=attribute) %>%
group_by(user_doc_id, user_id, document_id, level, variable, value) %>%
tally()
coding_summary <- bind_rows(user_doc_coding_counts,
event_report_coding_counts,
tag_coding_counts,
attribute_coding_counts)
coding_summary
}
compare_summaries<- function(coding_summary){
#'Compares summaries of coding by user.
#'
#'@param coding_summary A coding summary (usually generated by the \code{coding_summary} function.)
#'@export
#'
coding_allocs<-coding_summary %>%
ungroup() %>%
group_by(user_doc_id, document_id, user_id) %>%
summarize()
# There are some duplicated coding allocations (9 on 20/9/2018) - need to find out why!
# For now remove them
coding_allocs <- coding_allocs[!duplicated(coding_allocs[,c("document_id", "user_id")]),]
coder_pairs<- dplyr::full_join(coding_allocs,
coding_allocs,
by=c("document_id"),
suffix=c(".case1", ".case2"))
# create unique pairs of respondents
coder_pairs <- coder_pairs %>%
dplyr::group_by(document_id) %>%
tidyr::expand(user_id.case1, user_id.case2) %>%
dplyr::filter(user_id.case1 < user_id.case2) %>%
tibble::rowid_to_column("pair_no")
long_coder_pairs <- coder_pairs %>%
tidyr::gather(which_user_id, user_id, user_id.case1, user_id.case2) %>%
dplyr::left_join(coding_allocs, by = c("document_id", "user_id")) %>%
dplyr::arrange(pair_no)
long_coding<- dplyr::right_join(coding_summary,
long_coder_pairs,
by = c("user_doc_id", "user_id", "document_id"))
compare_res<-long_coding %>%
ungroup() %>%
dplyr::select(document_id, pair_no, which_user_id, n, level, variable, value) %>%
spread(which_user_id, n, fill=0) %>%
mutate(abs_diff=abs(user_id.case1-user_id.case2)) %>%
dplyr::select(-user_id.case1, -user_id.case2) %>%
left_join(coder_pairs, by=c("pair_no", "document_id"), suffix=c(".val", "")) %>%
gather(which_case, user_id, user_id.case1, user_id.case2)
compare_res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.