summary of the vignette
library(permuter) library(dplyr) library(ggplot2) data(nsgk) time_stamps <- c(36, 32, 35, 37, 31, 35, 40, 32) nvideos <- length(unique(nsgk$video)) ncategories <- length(unique(nsgk$domain)) nraters <- max(nsgk$rater) head(nsgk)
We have to turn this table into the right data format: for each tag-video pair, we must create a matrix of 0s and 1s, where 1 in the $i, j$ position indicates that rater $i$ saw the tag at timestamp $j$. We'll have 8 matrices for each tag, for a total of \Sexpr{ncategories*nvideos} matrices. We'll store these matrices in a list of lists: the first level of the list indexes the tag, and for each tag there is a list of 8 matrices, one for each video.
unique_tags <- sort(unique(nsgk$domain)) nsgk_mat <- list() for(tag in unique_tags){ domain <- list() for(v in seq_len(nvideos)){ thisvideo <- nsgk %>% filter(domain == tag) %>% filter(video == v) tag_mat <- matrix(0, nrow = nraters, ncol = time_stamps[v]) for(row in seq_len(nrow(thisvideo))){ tag_mat[thisvideo[row, "rater"], thisvideo[row, "time_stamp"]] <- 1 } domain[[v]] <- tag_mat } nsgk_mat[[length(nsgk_mat) + 1]] <- domain } names(nsgk_mat) <- unique_tags length(nsgk_mat) length(nsgk_mat[[1]]) dim(nsgk_mat[[1]][[1]])
We compute the mean and SD of the number of times a tag was observed in each video. Rows index the tag and columns index the video.
video_means <- t(sapply(nsgk_mat[1:20], function(tt){ sapply(tt, mean) })) head(video_means) video_sds <- t(sapply(nsgk_mat[1:20], function(tt){ sapply(tt, sd) })) head(video_sds)
npc_results <- lapply(nsgk_mat[1:20], function(tt){ video_specific_results <- lapply(tt, function(x){ res <- irr_ts_distribution(x, reps = 1000, keep_dist = TRUE, seed = 101) }) tag_distribution <- sapply(video_specific_results, function(x) x$dist) tag_pvalues <- sapply(video_specific_results, function(x) x$pvalue) tag_concordance <- sapply(video_specific_results, function(x) x$obs_ts) tag_npc_res <- irr_npc_distribution(tag_distribution, size = time_stamps, tag_concordance) return(list("pvalues_per_video" = tag_pvalues, "concordance_per_video" = tag_concordance, "tag_global_pvalue" = tag_npc_res$pvalue)) }) head(npc_results, 3)
Let's put it all together:
ncategories <- 20 nsgk_res <- matrix(nrow = ncategories, ncol = (1 + 1 + 8*4)) for(i in seq_len(ncategories)){ nsgk_res[i, ] <- c(unique_tags[i], npc_results[[i]]$tag_global_pvalue, npc_results[[i]]$concordance_per_video, npc_results[[i]]$pvalues_per_video, video_means[i,], video_sds[i,]) } colnames(nsgk_res) <- c("tag_index", "global_pvalue", paste("video", 1:8, "concordance", sep = ""), paste("video", 1:8, "pvalue", sep = ""), paste("video", 1:8, "mean", sep = ""), paste("video", 1:8, "sd", sep = "")) head(nsgk_res) #write.csv(nsgk_res, file = "nsgk_results.csv", row.names=FALSE)
Overall, what's the relationship between concordance and P?
nsgk_res %>% as.data.frame() %>% mutate(avg_concordance = apply(nsgk_res[, 3:10], 1, mean)) %>% ggplot(aes(x = avg_concordance, y = global_pvalue)) + geom_point(color = "#629e1f", alpha = 0.6, size = 3) + xlab("Average Concordance") + ylab("Overall P-value") + ggtitle("Average Concordance Across 8 Videos vs P-value") + theme( panel.background = element_rect(fill = "#E8EBEF"), axis.text = element_text(size = 16, color = "#143264"), axis.title = element_text(size = 18, color = "#143264"), title = element_text(color = "#143264", size = 18) )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.