knitr::opts_chunk$set( warning = FALSE, # show warnings during codebook generation message = FALSE, # show messages during codebook generation error = TRUE, # do not interrupt codebook generation in case of errors, # usually better for debugging echo = FALSE, # show R code comment=NA #to remove all hashes from output ) ggplot2::theme_set(ggplot2::theme_bw()) pander::panderOptions("table.split.table", Inf)
library(codebook) codebook_data<-read.csv(("https://raw.githubusercontent.com/edunford/qualify/master/Data/example_pull_data_w_2users.csv"), header=TRUE) #Code on lines 39:60 are intended to create sample data for exhibition purposes only. They should be removed prior to relase of the package. codebook_data$code<- rpois(n = 24, lambda = 10) codebook_data$count<-runif(n = 24, min = 25, max = 3000) rdate <- function(x, min = paste0(format(Sys.Date(), '%Y'), '-01-01'), max = paste0(format(Sys.Date(), '%Y'), '-12-31'), sort = TRUE) { dates <- sample(seq(as.Date(min), as.Date(max), by = "day"), x, replace = TRUE) if (sort == TRUE) { sort(dates) } else { dates } } codebook_data$date<-rdate(24) codebook_data$evid<-sample(x = c("aa", "ba", "bb", "ac", "cc", "bc"), prob = c(.1, .1, .2, .05, .25, .3), size = 24, replace = TRUE) # omit the following lines, if your missing values are already properly labelled codebook_data <- detect_missing(codebook_data, only_labelled = TRUE, # only labelled values are autodetected as # missing negative_values_are_missing = FALSE, # negative values are missing values ninety_nine_problems = TRUE # 99/999 are missing values, if they # are more than 5 MAD from the median ) # If you are not using formr, the codebook package needs to guess which items # form a scale. The following line finds item aggregates with names like this: # scale = scale_1 + scale_2R + scale_3R # identifying these aggregates allows the codebook function to # automatically compute reliabilities. # However, it will not reverse items automatically. codebook_data <- detect_scales(codebook_data)
codebook(codebook_data)
The data coded is assumed to be categorical. Different reliability tests would be more appropriate for ordinal/interval/ratio data.
#This code formats the ICR dataset a MxK dataframe where m is number of coders, and K is are unique unit of analysis/variable pairs. library("irr") library("tidyverse") irc_variables<-c(".user", "variableID", "code") codebook_dataunite<- unite(codebook_data, variable, .unit, col="variableID") trunc_codebook_data<-codebook_dataunite[irc_variables] icr_data<-spread(trunc_codebook_data, variableID,code, fill=NA, convert=FALSE, drop=TRUE, sep=NULL) #flipping the table to KxM icr_transpose <- as.data.frame(t(as.matrix(icr_data))) icr_trans<- icr_transpose[-1,]
#These tests were selected because data was assumed to be categorical and non-ordinal print(kappam.fleiss(icr_trans, exact=TRUE, detail=TRUE))
print(kappam.light(icr_trans))
rater.bias(icr_trans)
```r
pull_timeline = function(.project_path,round_date = "minute"){ if(dir.exists(.project_path) & check_db_exists(.project_path)){ con = sql_instance(.project_path) all_tbls = grep("v", dplyr::src_tbls(con),value = T) data_summary = c()
for (t in all_tbls){ # Get the variable name var_name = dplyr::tbl(con,".input_state") %>% dplyr::filter(.id == t) %>% dplyr::collect() %>% .$var_name # Extract the data state data_summary <- dplyr::tbl(con,t) %>% dplyr::collect() %>% dplyr::mutate(timestamp = lubridate::round_date(lubridate::ymd_hms(timestamp),unit = round_date), variable = var_name) %>% dplyr::group_by(timestamp,.unit,variable) %>% tidyr::nest() %>% dplyr::mutate(is_entry = purrr::map(data,function(x) (apply(x,1,function(x) as.numeric(sum(x=="") < length(x)))) )) %>% tidyr::unnest() %>% dplyr::filter(is_entry==1) %>% dplyr::group_by(timestamp,variable) %>% dplyr::count() %>% dplyr::ungroup() %>% dplyr::bind_rows(data_summary,.) } return(data_summary)
} else{ cat("\nNo qualify database located in the provide path.\n") } }
pull_data = function(.project_path = ""){ if(dir.exists(.project_path) & check_db_exists(.project_path)){ con = sql_instance(.project_path) all_tbls = grep("v", dplyr::src_tbls(con),value = T) data_summary = c()
for (t in all_tbls){ # Get the variable name var_name = dplyr::tbl(con,".input_state") %>% dplyr::filter(.id == t) %>% dplyr::collect() %>% .$var_name # Extract the data state data_summary <- dplyr::tbl(con,t) %>% dplyr::collect() %>% dplyr::group_by(.unit) %>% dplyr::arrange(desc(timestamp)) %>% dplyr::slice(1) %>% dplyr::ungroup() %>% dplyr::mutate(variable = var_name) %>% select(variable,.unit,timestamp,dplyr::everything()) %>% dplyr::bind_rows(data_summary,.) } return(data_summary)
} else{ cat("\nNo qualify database located in the provide path.\n") }
}
check_db_exists = function(.project_path = ""){ set_path = paste0(path,"/.qualify_data.sqlite") file.exists(set_path) }
````
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.