# TODO: add in switch for raw summaries.
# TODO:
# SRETextract - tool for pulling out JS SRET data ------------------------
SRETextract <- function(dat,
alt.names = NULL,
suffix = NULL,
raw.out.path = NULL,
raw.shape = "wide",
raw.dropmissing = TRUE # if FALSE, keep the extra rows for super-easy merging. TRUE drops missing cases and does not save those ID's in main data
){
# This function should be written back to the full data that you have, like:
# mydata <- SRETextract(mydata)
# It will add several variables that relate to number and percent of responses
# of positive and negative words, along with some checks of # of words seen for
# diagnostics.
require(stringr)
# If variable names not given, uses standard names:
if(is.null(alt.names)){
alt.names <-
c(SRET.words = "SRET.words",
SRET.keys = "SRET.keys",
SRET.time = "SRET.time")
}
# throws error if all the given/default names aren't present:
if(!all(c("SRET.words", "SRET.keys", "SRET.time") %in%
names(alt.names))){
stop("names are wrong")
}
# Setup: wordlists --------------------------------------------------------
# Complete word-list
wordlist <-
c("Funny", "Happy", "Terrible", "Angry", "Ashamed", "Free",
"Sorry", "Hateful", "Kind", "Stupid", "Naughty", "Helpful",
"Joyful", "Worried", "Playful", "Horrible", "Bad", "Fantastic",
"Depressed", "Pleased", "Nasty", "Foolish", "Good", "Brilliant",
"Excited", "Nice", "Annoyed", "Content", "Sad", "Upset",
"Scared", "Awesome", "Unloved", "Fun", "Unhappy", "Alone",
"Cool", "Excellent", "Loved", "Confident", "Unwanted", "Friendly",
"Great", "Lost", "Guilty", "Proud", "Lonely", "Mad",
"Wicked", "Best", "Glad", "Wonderful")
# Positive words
wordlist.positive <-
c("Happy", "Fun", "Brilliant", "Cool", "Good", "Glad",
"Loved", "Friendly", "Wonderful", "Pleased", "Helpful","Proud",
"Fantastic", "Confident", "Content", "Joyful", "Best", "Excited",
"Free", "Funny", "Kind", "Playful", "Great", "Excellent",
"Awesome", "Nice")
# Negative words
wordlist.negative <-
c("Terrible", "Angry", "Ashamed", "Sorry", "Hateful", "Stupid",
"Naughty", "Worried", "Horrible", "Bad", "Depressed", "Nasty",
"Foolish", "Annoyed", "Sad", "Upset", "Scared", "Unloved",
"Unhappy", "Alone", "Unwanted", "Lost", "Guilty", "Lonely",
"Mad", "Wicked")
# Make reference data frame for positive/negative words:
wordlist.ispositive <- ifelse(wordlist %in% wordlist.positive, 1, 0)
# Data frame:
wordlist.reference <-
data.frame(
wordlist,
is.positive = wordlist.ispositive,
stringsAsFactors = FALSE
)
# Initial extraction of JSON data ----------------------------------
# Extract the words from each person's responses:
sret.words <-
str_extract_all(dat[[alt.names["SRET.words"]]],
"[[:alpha:]]+")
# Keypresses
sret.keys <-
str_extract_all(dat[[alt.names["SRET.keys"]]],
"[[:alpha:]]+")
# Endorsements:
sret.agree <-
lapply(sret.keys, function(key){ifelse(key=="P", 1, 0)})
# Response times:
sret.time <-
lapply(str_extract_all(
dat[[alt.names["SRET.time"]]],
"[0-9]+\\.?[0-9]*"),
as.double)
# Checking the length:
length.check <-
data.frame(
words.length = sapply(sret.words, length),
keys.length = sapply(sret.keys, length),
agree.length = sapply(sret.agree, length),
time.length = sapply(sret.time, length)
)
#noting any discrepancies:
length.check$problemflag <-
vapply(seq_len(nrow(length.check)), function(i){
ifelse(
any(is.na(unlist(length.check[i,])) |
all(unlist(length.check[i,]) == 52) |
all(unlist(length.check[i,]) == 0)
), 0L, 1L)
}, FUN.VALUE = integer(1))
# Create a data frame for every person with their responses:
sret.data <-
mapply(
data.frame,
words = sret.words,
keys = sret.keys,
agree = sret.agree,
time = sret.time,
MoreArgs = list(
stringsAsFactors = FALSE
),
SIMPLIFY = FALSE
)
# Adding in a word-position variable, problem flag for RT, valence marker
sret.data <-
lapply(sret.data, function(x){
# Word position (order seen)
len <- seq_len(nrow(x))
x$word.position <- len
# Word valence:
# Compare to list of words:
ispos.index <-
match(x$words, wordlist.reference$wordlist)
# Note if positive or not
x$ispositive <-
wordlist.reference$is.positive[ispos.index]
# Check for RTs that are < 200
x$RTsub200 <- ifelse(x$time < 200, 1, 0)
# Check for RT's that are above 3 AMD > median
xmedian <- median(x$time[x$RTsub200 == 0])
xMAD <- mad(x$time[x$RTsub200 == 0])
x$RToutlier <-
ifelse(
x$RTsub200 == 0 &
x$time > (xmedian + 3*xMAD)
, 1, 0)
x
})
# sret.data[[165]]
# sret.data[[10]]
# Appears to be random
# mean(sret.data[[21]]$words %in%
# sret.data[[249]]$words)
# Summaries back into the main dataset ------------------------------------
output.vars <-
c("endorsepos", "endorseneg",
"RTendorsepos", "RTendorseneg",
"RTrejectpos", "RTrejectneg",
"RToverall",
"totalpos", "totalneg",
"sub200pos", "sub200neg",
"outlierpos", "outlierneg",
"problemflag", "problemdescrip"
)
sret.output.vars <- paste0("sret.", output.vars)
dat[sret.output.vars] <-
do.call(rbind.data.frame,
lapply(sret.data, function(x){
if(nrow(x) == 0 | all(is.na(x$words))){
# Fill everything in with NA's if data is missing
# (empty string or one NA)
rep(NA, length(output.vars))
} else {
# Proportion of positive words endorsed
endorsepos <- mean(x$agree[x$ispositive & !x$RTsub200])
# Proportion of negative words endorsed
endorseneg <- mean(x$agree[!x$ispositive & !x$RTsub200])
# RT of positive words endorsed
RTendorsepos <-
round(digits = 0,
mean(x$time[x$ispositive & x$agree &
!x$RTsub200 & !x$RToutlier]))
# RT of negative words endorsed:
RTendorseneg <-
round(digits = 0,
mean(x$time[!x$ispositive & x$agree &
!x$RTsub200 & !x$RToutlier]))
# RT of positive words rejected:
RTrejectpos <-
round(digits = 0,
mean(x$time[x$ispositive & !x$agree &
!x$RTsub200 & !x$RToutlier]))
# RT of negative words rejected:
RTrejectneg <-
round(digits = 0,
mean(x$time[!x$ispositive & !x$agree &
!x$RTsub200 & !x$RToutlier]))
# RT of negative words rejected:
RToverall <-
round(digits = 0,
mean(x$time[!x$RTsub200 & !x$RToutlier]))
# Number of words seen that were positive:
totalpos <- sum(x$ispositive, na.rm=T)
# Number of words seen that were negative
totalneg <- sum(!x$ispositive, na.rm=T)
# Number of positive <200ms words
sub200pos <- sum(x$ispositive & x$RTsub200, na.rm=T)
# Number of negative <200ms words
sub200neg <- sum(!x$ispositive & x$RTsub200, na.rm=T)
# Number of positive outlier words
outlierpos <- sum(x$ispositive & x$RToutlier, na.rm=T)
# Number of negative outlier words
outlierneg <- sum(!x$ispositive & x$RToutlier, na.rm=T)
#Problem variables:
# Set flags to 0 assuming no problems
problemflag <- 0
problemdescrip <- ""
sep <- NULL
# problemflag for >10% too-fast responding:
if(mean(x$RTsub200) >= .1){
problemflag <- 1
problemdescrip <- "10%sub200"
sep <- "; "
}
# problemflag for >15% problematic responding:
if(mean(rowSums(x[c("RTsub200", "RToutlier")])) >= .15){
problemflag <- 1
problemdescrip <- paste0(problemdescrip, sep, "15%badRT")
sep <- "; "
}
# problemflag for uniform responding:
if(mean(x$agree[!x$RTsub200]) %in% 0:1){
problemflag <- 1
problemdescrip <- paste0(problemdescrip, sep, "uniform")
sep <- "; "
}
# problemflag for potentially noncompliant responding:
if(any(table(x$agree[!x$RTsub200]) <= 5)){
problemflag <- 1
problemdescrip <- paste0(problemdescrip, sep, "near-uniform")
sep <- "; "
}
# Output the new data.frame:
data.frame(endorsepos, endorseneg,
RTendorsepos, RTendorseneg,
RTrejectpos, RTrejectneg,
RToverall,
totalpos, totalneg,
sub200pos, sub200neg,
outlierpos, outlierneg,
problemflag, problemdescrip,
stringsAsFactors = FALSE
)
} # end variable creation
} # End anon function
) # End lapply
) # End do.call
# Error checking:
if(any(length.check$problemflag == 1)){
errors <- which(length.check$problemflag == 1)
dat$sret.problemflag[errors] <- 1
dat$sret.problemdescrip[errors] <-
ifelse(dat$sret.problemdescrip[errors] == "",
"error",
paste0(dat$sret.problemdescrip[errors], "; ", "error")
)
}
# Raw output --------------------------------------------------------------
if(!is.null(raw.out.path)){
# Adds the ID variable:
output.vars <- c("rawmatchID", output.vars)
sret.output.vars <- c("sret.rawmatchID", sret.output.vars)
dat$sret.rawmatchID <-
paste0(
round(as.numeric(Sys.time()), 0), # Start with the current system time
# add a random number from the lowest place value of the available numbers up to the next place value (so if 550, from 100:999)
sample(
x = 10^floor(log10(nrow(dat))):(10^(floor(log10(nrow(dat)))+1)-1),
size = nrow(dat),
replace = FALSE)
)
raw.vars <-
construct.all(c(sort(wordlist.positive), sort(wordlist.negative)), ".",
c("agree", "time.ms", "order", "RTsub200", "RToutlier"))
rawdat <-
do.call(rbind.data.frame,
lapply(sret.data, function(x){
if(nrow(x) == 0 | all(is.na(x$words))){
# Fill everything in with NA's if data is missing
# (empty string or one NA)
rep(NA, length(raw.vars))
} else {
# Sorts each case first by valence, then alphabetically:
x.sort <-
x[order(1-x$ispositive, x$words),
c("words", "ispositive", "agree", "time", "word.position",
"RTsub200", "RToutlier")]
setNames(
# Make a run of the words in question:
as.list(
round(digits = 0,
do.call(c, x.sort[-1])))
,
# Give them the right names, dropping any words if they don't
# have them
construct.all(x.sort$words, ".",
c("positive", "agree", "time.ms", "order",
"RTsub200", "RToutlier"))
) # End SetNames
} # End else
}) # End lapply
) #end do.call (and creation)
rawdat <-
setNames(
cbind(dat[sret.output.vars], rawdat),
c(output.vars, names(rawdat))
)
# Sort to match the original data:
rawdat <- rawdat[match(rawdat$rawmatchID, dat$sret.rawmatchID),]
if(raw.shape %in% c("long", "LONG")){
rawdat$index <- seq_len(nrow(rawdat))
rawdat <-
reshape(rawdat,
direction = "long",
timevar = "word",
idvar = "rawmatchID",
varying = list(
paste0(wordlist, ".order"),
paste0(wordlist, ".positive"),
paste0(wordlist, ".agree"),
paste0(wordlist, ".time.ms"),
paste0(wordlist, ".RTsub200"),
paste0(wordlist, ".RToutlier")
),
v.names = c(
"order",
"positive",
"agree",
"time.ms",
"RTsub200",
"RToutlier")
, times = wordlist
)
rawdat$problemflag <- ifelse(is.na(rawdat$RTendorsepos) & is.na(rawdat$RTendorseneg), NA, rawdat$problemflag)
rawdat$word <- ifelse(is.na(rawdat$RTendorsepos) & is.na(rawdat$RTendorseneg), NA, rawdat$word)
rawdat <- rawdat[order(rawdat$index, rawdat$order),]
rawdat$index <- NULL
}
# Drop the extra rawmatchID's unless requested to keep them
if(raw.dropmissing){
# Drop the ID's from missing cases:
dat$sret.rawmatchID[is.na(dat[alt.names["SRET.keys"]])] <- NA
# Filter the raw data to contain only the remaining ID's
rawdat <- rawdat[rawdat$rawmatchID %in% dat$sret.rawmatchID,]
}
# Outputting to console:
if(raw.out.path %in% c("HERE", "here")){
message("outputting raw data ONLY (with summaries)")
return(rawdat)
} else {
# Outputting to file:
message(paste0("raw data written to ", raw.out.path,
", outputting summaries to original data frame"))
write.csv(rawdat, raw.out.path, row.names = FALSE)
} # End output
} # End raw data section
# Final output ------------------------------------------------------------
# Adds the variable suffix in if requested:
if(!is.null(suffix)){
# Replace the output names with the suffix-appended ones:
names(dat)[names(dat) %in% sret.output.vars] <- paste0(sret.output.vars, suffix)
}
# Output the original data frame plus the new variables:
dat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.