################################################################################
# Smiles and Masks Lookit study
# Written by: Michaela DeBolt
# Last edit: 10/22/20
# Contact: mdebolt@ucdavis.edu
################################################################################
# This files was intended to use on the real coded data, because the second coder
# is only going to be coding a portion of what the first coder coded, the original
# reliability script needed to be adjusted to account for this.
# Functions to compute reliability between two coder's data on the same child.
# These functions work on the frame-exported data from Datavyu.
# This function requires that the data has been exported into .txt. files and are
# delimited with a bracket " ] ", using the "FrameExport.rb" file. Exporting the
# frame data with FrameExport.rb will ensure they are exported with a specific frame
# rate.
#### Flexible reliability function #############################################
check_reliability <- function(project = "Smiles_and_Masks_LookitStudy", RA1 = "RA1", RA2 = "RA2", child_id = "PTB42L") {
library(tidyverse)
library(stringr)
# Set working directory to the coder's files
setwd(paste0("/Volumes/General/Backup/CodingProjects/", project, "/VideosFinishedCoding/"))
# Find the RA's folder
RA1_files <- list.files(pattern = RA1)
RA2_files <- list.files(pattern = RA2)
# List all the files that correspond to the child_id parameter
RA1_coded_files <- data.frame(files = list.files(paste0(RA1_files,"/", child_id), pattern = ".txt") )
RA2_coded_files <- data.frame(files = list.files(paste0(RA2_files,"/", child_id), pattern = ".txt") )
# Remove initials from the file names so they are identical between coders:
RA1_coded_files$files <- gsub(pattern = paste0("_",RA1), x = RA1_coded_files$files, replacement = "")
RA2_coded_files$files <- gsub(pattern = paste0("_",RA2), x = RA2_coded_files$files, replacement = "")
# Remove the trials from the 1st coder that were not coded by the 2nd coder:
RA1_coded_files <- RA1_coded_files %>% filter(files %in% RA2_coded_files$files)
# Add the coder's initials back to the file names, or else this throws an error later when reading in data
RA1_coded_files$files <- paste0(str_sub(RA1_coded_files$files,1, -5), "_", RA1, ".txt")
RA2_coded_files$files <- paste0(str_sub(RA2_coded_files$files,1, -5), "_", RA2, ".txt")
# Change these dataframes back into lists to be used in the loops below:
RA1_coded_files <-unlist(as.list(RA1_coded_files$files))
RA2_coded_files <-unlist(as.list(RA2_coded_files$files))
# List all of the files for the child_id and for both RA's combined
#all_files <- c(RA1_coded_files, RA2_coded_files)
data_list_RA1 <- list()
data_list_RA2 <- list()
# Read in all of the files for the child_id and both RA's data
if( length(RA1_coded_files) == length(RA2_coded_files) ) {
for(i in 1:length(RA1_coded_files)) { # If the lengths are the same, you can just use coder 1 length to loop
# RA1
data_list_RA1[[i]] <- read.csv(paste0(RA1_files,"/", child_id,"/", RA1_coded_files[i]), na.strings = "", sep = "]")
# RA2
data_list_RA2[[i]] <- read.csv(paste0(RA2_files,"/", child_id,"/", RA2_coded_files[i]), na.strings = "", sep = "]")
dim(data_list_RA1[[i]])
dim(data_list_RA2[[i]])
# Check to see if the RA's trials have the same number of frames. If not, then extend the length of the
# shorter trial to be as long as the longer trial.
if( dim(data_list_RA1[[i]])[1] != dim(data_list_RA2[[i]])[1] ){
# Find out which trial has more frames
max_nFrames <- max(data_list_RA1[[i]]$framenum, data_list_RA2[[i]]$framenum) # compare the 2 n_frames to get
# Make the frames equal between the two trials
data_list_RA1[[i]] <- merge(data.frame(framenum = seq(1:max_nFrames)), data_list_RA1[[i]], by = "framenum", all.x = TRUE)
data_list_RA2[[i]] <- merge(data.frame(framenum = seq(1:max_nFrames)), data_list_RA2[[i]], by = "framenum", all.x = TRUE)
} else {
next() #move on
}
}# for
} else{
print("RA's don't have the same number of coded trials for this child!")
}
# Set up a lot of storage lists:
coder_1_looks.ordinal <- list()
coder_2_looks.ordinal <- list()
coder_1_looks.code01 <- list()
coder_2_looks.code01 <- list()
trial <- list()
n_frame <- list()
all_trials <- list()
reliability <- list()
newColNames <- c("framenum", "time","child_id_child_id",
"test_date_date" ,"trial_trial_name",
"coder_1_initials_coder_1_initials",
"coder_1_looks_ordinal",
"coder_1_looks_coder_1_looks",
"coder_1_notes_coder_1_notes",
"tracks___fps", "tracks___file")
# Combine each trial for each RA:
for(i in 1:length(data_list_RA1)) {
# Check the column names of coder 1's data to see if it contains the string "code01"
ifelse(sum(grepl("code01", colnames(data_list_RA1[[i]]))) > 0,
colnames(data_list_RA1[[i]]) <- newColNames, colnames(data_list_RA1[[i]]) <- colnames(data_list_RA1[[i]]))
# Check the column names of coder 2's data to see if it contains the string "code01"
ifelse(sum(grepl("code01", colnames(data_list_RA2[[i]]))) > 0,
colnames(data_list_RA2[[i]]) <- newColNames, colnames(data_list_RA2[[i]]) <- colnames(data_list_RA2[[i]]))
coder_1_looks.ordinal[[i]] <- data_list_RA1[[i]]$coder_1_looks_ordinal #Assign coder 1 looks
coder_2_looks.ordinal[[i]] <- data_list_RA2[[i]]$coder_1_looks_ordinal #Assign coder 2 looks
coder_1_looks.code01[[i]] <- data_list_RA1[[i]]$coder_1_looks_coder_1_looks #Assign coder 1 looks
coder_2_looks.code01[[i]] <- data_list_RA2[[i]]$coder_1_looks_coder_1_looks #Assign coder 2 looks
trial[[i]] <- data_list_RA1[[i]]$trial_trial_name
n_frame[[i]] <- data_list_RA1[[i]]$framenum
temp <- data.frame(bind_cols( coder_1_looks.ordinal = coder_1_looks.ordinal[[i]],
coder_1_looks.code01 = coder_1_looks.code01[[i]],
coder_2_looks.ordinal = coder_2_looks.ordinal[[i]],
coder_2_looks.code01 = coder_2_looks.code01[[i]],
trial = trial[[i]]), n_frame = n_frame[[i]] )
all_trials[[i]] <- temp
}
for(i in 1:length(all_trials)) {
data <- all_trials[[i]]
# Mark instances of looking off screen/not looking/cells that were simply not coded
data$coder_1_looks.ordinal <- tidyr::replace_na(data = data$coder_1_looks.ordinal, replace = "offscreen")
data$coder_1_looks.code01 <- tidyr::replace_na(data = as.character(data$coder_1_looks.code01), replace = "offscreen")
data$coder_2_looks.ordinal <- tidyr::replace_na(data = data$coder_2_looks.ordinal, replace = "offscreen")
data$coder_2_looks.code01 <- tidyr::replace_na(data = as.character(data$coder_2_looks.code01), replace = "offscreen")
# Indicate agreement for each frame
data$agree <- ifelse(data$coder_1_looks.code01 == data$coder_2_looks.code01, 1,0)
# Calculate reliability for this trial and store in a dataframe:
r <- data.frame(raw_agreement = sum(data$agree)/max(data$n_frame)*100, coder1 = RA1, coder2 = RA2,
trial = data$trial[1])
reliability[[i]] <- r
}
final_reliability <- bind_rows(reliability)
return(final_reliability)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.