Nothing
#' Conditional Response Probability
#'
#' This function calculates the conditional response
#' probability of each lag position. Participants' lag
#' between subsequent named items is tallied and then
#' divided by the possible combination of subsequent lags
#' given their response pattern.
#'
#' This output can then be used to create a CRP visualizations,
#' and an example can be found in our manuscript/vignettes.
#'
#' Important: The code is written assuming the data provided are for
#' a single recall list. If repeated measures are used (i.e., there are
#' multiple lists completed by each participant or multiple list versions),
#' you should use this function several times, once on each list/answer key.
#'
#' @param data a dataframe of the scored free recall that you would
#' like to calculate - use prop_correct_free() for best formatting.
#' @param position a column name in the dataframe that contains
#' answered position of each response in quotes (i.e., "column")
#' @param answer a column name of the answer given for that position
#' in the original dataframe.
#' @param id a column name of the participant id in the original
#' dataframe.
#' @param key a vector containing the scoring key or data column name.
#' This column does not have to be included in the original dataframe.
#' We assume your answer key is in the tested position order. You should
#' not include duplicates in your answer key.
#' @param scored a column in the original dataframe indicating if the
#' participant got the answer correct (1) or incorrect (0).
#'
#' @return
#' \item{DF_CRP}{A dataframe of the proportion correct for each
#' conditional lag position including any other between subjects
#' variables present in the data.}
#'
#' @keywords proportion correct scoring free recall serial position
#' @export
#' @examples
#'
#' data(free_data)
#' data(answer_key_free2)
#'
#' free_data <- subset(free_data,
#' List_Type == "Cat_Recall_L1")
#'
#' DF_long <- arrange_data(data = free_data,
#' responses = "Response",
#' sep = " ",
#' id = "Username")
#'
#' scored_output <- prop_correct_free( data = DF_long,
#' responses = "response",
#' key = answer_key_free2$Answer_Key,
#' id = "Sub.ID",
#' cutoff = 1,
#' flag = TRUE,
#' group.by = "Version")
#'
#' crp_output <- crp(data = scored_output$DF_Scored,
#' position = "position",
#' answer = "Answer",
#' id = "Sub.ID",
#' key = answer_key_free2$Answer_Key,
#' scored = "Scored")
#'
#' head(crp_output)
#'
crp <- function(data, position, answer, id,
key, scored){
# for cran check
Sub.ID <- NULL
#create answer key with order
if (sum(duplicated(key)) > 0){
stop("Your answer key contains duplicates. Please check your data.")
}
key <- data.frame("Answer" = key, "Tested.Position" = 1:length(key))
#merge that with the data
DF <- as.data.frame(data)
colnames(DF)[grepl(answer, colnames(DF))] <- "Answer"
colnames(DF)[grepl(position, colnames(DF))] <- "Answered.Position"
colnames(DF)[grepl(scored, colnames(DF))] <- "Scored"
colnames(DF)[grepl(id, colnames(DF))] <- "Sub.ID"
DF <- merge(DF, key, by = "Answer")
#calculate the number of times within window
DF$Answered.Position <- as.numeric(DF$Answered.Position)
DF$Tested.Position <- as.numeric(DF$Tested.Position)
DF$Lag <- DF$Tested.Position - DF$Answered.Position
number_spots <- 1:max(DF$Tested.Position)
DF_final <- NULL
# Get all possible lags for 0 options
all_lags <- sort(c((1:(nrow(key) - 1)) * -1, 1:(nrow(key) - 1)))
# Create a dataframe of all possible options and ids
merge_lags <- data.frame(Sub.ID = rep(unique(DF$Sub.ID), each = length(all_lags)),
participant_lags = rep(all_lags, length(unique(DF$Sub.ID))))
#for each participant calculate the possible lags
for (i in unique(DF$Sub.ID)){
temp_part <- subset(DF,
Sub.ID == i)
temp_part <- temp_part[order(temp_part$Answered.Position), ]
participant_lags <- diff(temp_part$Tested.Position)
possible_lags <- c()
if (nrow(temp_part) > 1){
#participant lags
for (j in 1:nrow(temp_part)){
#take up to the current answered position
current_used <- temp_part$Tested.Position[1:j]
answers_left <- setdiff(number_spots, current_used)
current_spot <- temp_part$Tested.Position[j]
possible_lags <- c(possible_lags, answers_left - current_spot)
} #answers loop
table_part_lags <- as.data.frame(table(participant_lags))
table_possible_lags <- as.data.frame(table(possible_lags))
colnames(table_possible_lags) <- c("participant_lags", "Possible.Freq")
table_part_lags <- merge(table_part_lags,
table_possible_lags,
by = "participant_lags")
table_part_lags$Sub.ID <- i
if (is.null(DF_final)){
DF_final <- table_part_lags
} else {
DF_final <- rbind(DF_final, table_part_lags)
}
} #close nrow check
} #participant loop
# create other columns in merge_lags
other.columns <- setdiff(colnames(DF),
c("Responses", "Sub.ID", "Answer",
"Scored", "Answered.Position",
"Tested.Position", "Lag",
colnames(merge_lags)))
for (col in other.columns){
DF_temp <- unique(DF[ , c("Sub.ID", col)])
if (sum(duplicated(DF_temp$Sub.ID)) == 0){
merge_lags <- merge(merge_lags, DF_temp, by = "Sub.ID")
}
}
# Merge all possible lags with real lags
DF_final$participant_lags <- as.numeric(as.character(DF_final$participant_lags))
DF_final <- merge(DF_final, merge_lags, by = c("Sub.ID", "participant_lags"), all = T)
# Create CRP
DF_final$CRP <- DF_final$Freq / DF_final$Possible.Freq
# Add zeroes back in
DF_final$Freq[is.na(DF_final$Freq)] <- 0
DF_final$Possible.Freq[is.na(DF_final$Possible.Freq)] <- 0
DF_final$CRP[is.na(DF_final$CRP)] <- 0
return(DF_final)
}
#' @rdname crp
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.