Nothing
#' Score novel and duplicates codes across interviews
#'
#' @description
#' 'Novel' and 'duplicate' codes are scored once per interview; the number of
#' times they are spoken in an interview does not matter.
#'
#' The definition of whether a code is _novel_ or _duplicated_ is
#' entirely chronological:
#'
#' - A **novel code** is a topic/idea/concept that, for example, is mentioned in
#' Interview 17, but was not mentioned in Interviews 1 through 16.
#' - A **duplicate code** is one that has been talked about in other interviews
#' previously.
#'
#' The cumulative sum of novel codes is used to visualise a stopping
#' point for qualitative interviews.
#'
#' @param interviews (List) A list of dataframes, as generated by [import_coding_matrices()].
#'
#' @return A dataframe, with one row per interview and these columns:
#' 1. `itvw_seq`, the chronological order of interviews.
#' 2. `n_codes`, the number of unique codes mentioned in this interview.
#' 3. `n_duplicate`, how many of those codes are duplicates mentioned in previous interviews).
#' 5. `n_novel`, how many of those codes are novel (mentioned for the first time in this interview).
#' 4. `prop_duplicate`, the proportion of this interview's codes that are duplicates.
#' 6. `prop_novel`, the proportion of this interview's codes that are novel.
#' 7. `cumsum_novel`, the cumulative sum of novel codes over time (i.e. across interviews).
#'
#' @export
#'
#' @seealso [plot_novelty()], [plot_richness()]
#'
#' @examples
#' # A folder of example coding matrices included with the package
#' path_to_matrices <- system.file("insect_study/matrices/", package = "novelqualcodes")
#' print(path_to_matrices)
#'
#' # A list of files in that folder
#' list.files(path_to_matrices)
#'
#' # Import them all at once
#' my_matrices <- import_coding_matrices(path_to_matrices)
#'
#' # Score them for novel and duplicate codes
#' my_scores <- score_codes(my_matrices)
#'
#' # Look inside the result; novel and duplicate codes are scored across
#' # all interviews.
#' print(my_scores)
#'
#' @md
score_codes <- function(interviews) {
# Pre-allocate for efficiency.
result = data.frame(
itvw_seq = seq_along(interviews), # Chronological order of interviews
n_codes = NA_real_, # Number of codes this person mentioned
n_duplicate = NA_real_, # Duplicate codes: Number of codes that others mentioned before them.
n_novel = NA_real_, # Novel codes: Number of codes mentioned by no one else before now.
prop_duplicate = NA_real_, # Proportion of codes mentioned by this person that are duplicates.
prop_novel = NA_real_, # Inverse of above: Proportion of codes from this person that are novel.
cumsum_novel = NA_real_ # Cumulative sum of novel codes
)
# Rather than the obvious method of starting with an empty list and adding
# to it as we discover a new code (which is potentially expensive because
# the vector keeps growing), let's do the opposite where we start with a
# full list of all codes, and subtract from it every time we encounter a new
# code. If a code is in this list, then it's never been used before. If the
# code is not in this list, then it's a duplicate.
novel_codes <- unique(unlist(Map(function(x) x[[1]], interviews)))
# Calculations that have to be done per-interview.
for (row_idx in seq_along(interviews)) {
topics <- interviews[[row_idx]]$code
result[row_idx, "n_codes"] <- length(topics)
result[row_idx, "n_duplicate"] <- sum(!(topics %in% novel_codes))
result[row_idx, "n_novel"] <- sum(topics %in% novel_codes)
# Remove all codes mentioned by this person; they're no longer novel.
novel_codes <- novel_codes[-which(novel_codes %in% topics)]
}
# Calculations that can be done across the dataframe.
result$cumsum_novel <- cumsum(result$n_novel)
result$prop_novel <- result$n_novel / result$n_codes
result$prop_duplicate <- 1 - result$prop_novel
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.