R/preprocess_funcs.R

Defines functions getMain getRats

Documented in getMain getRats

#' Preprocesses participant ratings
#' Takes in a raw data .csv file (as generated by JavaScript experimental code) and outputs a rating summary table, where each row corresponds to a single item
#' @import data.table
#'
#'@description
#' Contains functions for preprocessing the raw data
#' `getRatings()` creates a dataframe containing pre and post-task ratings from a single file (session)
#' `getMain()` creates a dataframe containing results of the main part of the experiment

#' @param file is a .csv datafile (raw experiment output from the online task)
#' @param sub is the specific participant ID (default=NA, when iterating though all files)
#' @param exp is the experiment number
#' @param full_only (bool) a check whether all files contain 3 full ratings (stops otherwise)
#'
#' @return a data.table with session ratings
#' @export
getRats = function(file, sub = NA, full_only=F) {

  p_range =pics =v_range=NULL
  #get file
  x = data.table::fread(file)
  start_main = which(str_detect(x$stimulus, 'Great!'))-1

  chosen = unique(substr(as.character(x$stimulus[x$trial_type=='image-selector-fb' & x$trial_index>start_main ]),1,13)) ##chosen pics

  x = x[cat!='',] #leave ratings only
  # print(dim(x))
  if (full_only) {
    if(nrow(x)!=144) {
      print (file)
      print ('Not enough ratings!')
      break
    }
  }

  ratV1_pre = data.table('pics'= as.character(x$stimulus[1:24]), 'ratV1_pre' = as.numeric(as.character(x$response[1:24])))
  ratV1_pre = ratV1_pre[order(pics),]
  ratP1_pre = data.table('pics'= as.character(x$stimulus[25:48]), 'ratP1_pre' = as.numeric(as.character(x$response[25:48])))
  ratP1_pre = ratP1_pre[order(pics),]
  ratV2_pre = data.table('pics'= as.character(x$stimulus[49:72]), 'ratV2_pre' = as.numeric(as.character(x$response[49:72])))
  ratV2_pre = ratV2_pre[order(pics),]
  ratP2_pre = data.table('pics'= as.character(x$stimulus[73:96]), 'ratP2_pre' = as.numeric(as.character(x$response[73:96])))
  ratP2_pre = ratP2_pre[order(pics),]
  ratV_post = data.table('pics'= as.character(x$stimulus[97:120]), 'ratV_post' = as.numeric(as.character(x$response[97:120])))
  ratV_post = ratV_post[order(pics),]
  ratP_post = data.table('pics'= as.character(x$stimulus[121:144]), 'ratP_post' = as.numeric(as.character(x$response[121:144])))
  ratP_post = ratP_post[order(pics),]

  rats = cbind(ratV1_pre, ratV2_pre[,2], ratP1_pre[,2], ratP2_pre[,2], ratV_post[,2], ratP_post[,2])
  rats$preV_mean = (rats$ratV1_pre+ rats$ratV2_pre)/2
  rats$preP_mean = (rats$ratP1_pre+ rats$ratP2_pre)/2

  rats$chosen = rep(NA, nrow(rats))

  for (j in 1:nrow(rats)) {
    rats$chosen[j] = as.character(rats$pics[j]) %in% chosen
  }


  if (!is.na(sub)) rats$subjID = rep(sub, nrow(rats))

  print (paste('participant', unique(rats$subjID)))
  rats = as.data.table(rats)
  rats[,p_range := abs(ratP1_pre - ratP2_pre)]
  rats[,v_range := abs(ratV1_pre - ratV2_pre)]

  return (rats)

}

#'Preprocesses raw output file from an online experiment
#'
#' Takes in a raw data .csv file (as generated by JavaScript experimental code) as well as the preprocessed ratings and outputs a dataframe, where each row corresponds to a single experimental trial. Creates a set of additional columns with relevant measures.
#' @param file is a .csv datafile (raw experiment output from the online task)
#' @param sub is the specific participant ID (default=NA, when iterating though all files)
#' @param exp is the experiment number
#' @param rats_all is the rating dataframe created with `getRats()`
#' @param jstats (bool) whether to include judgement difficulty & magnitude
#'
#' @export

getMain = function(file, sub = NA, rats_all, jstats = T, exp = 2) {

  #just to deal with not-bound global varnings due to data.table
  Cbias=LR=Pbias=Pdiff=RT=RT2=Rbias=Vdiff=acc=accF=acc_stop=calib=chosen=
    con=consDJ=consRJ=dec_diff=dec_type=dec_type2=diffCon=diffInc=est=if_else=
    image_click=judge_diff=judge_type=judge_type2=judgement=magCon=magD=magInc=
    magJ=p_range=pics=preP_mean=preV_mean=rat=ratP1=ratP_L=ratP_R=ratV1=ratV_L=
    ratV_R=ref=ref_type=reference=rejected=rename=response=rt=select=separate=
    stimL=stimR=stimulus=str_detect=subjID=symbol=trial_index=
    trial_type=v_range=NULL

  x = data.table::fread(file)
  start_main = which(str_detect(x$stimulus, 'Great!'))-1

  dec = x[trial_type=='image-selector-fb' & trial_index>start_main,] ### or 'image-selector-fb2? check if error
  judge = x[trial_type=='image-slider-2AFC' & trial_index>start_main,]

  if (exp != 5) {
    dec = select(dec, RT = rt, stimulus, LR = image_click, dec_type = symbol)
    judge = select(judge, RT2=rt, rat = response, judge_type = symbol)
  } else {
    dec = select(dec, RT = rt, stimulus, LR = image_click, dec_type = symbol)
    judge = select(judge, RT2=rt, rat = response, judge_type = symbol, ref = reference)
  }
  dec$trial_no = judge$trial_no = seq(1, nrow(dec))

  main = merge(dec, judge)

  if (exp == 4) {
    predec = x[trial_type=='image-selector-fb1' & trial_index>start_main,]
    predec = select(predec, RT = rt, stimulus, LR = image_click, dec_type = symbol)

    missing = which(main$stimulus=='')
    #fill in trials when subj responded on yellow
    main[missing,stimulus:= predec[missing,stimulus]]
    main[missing,LR:= 'null_yellow']

  }
  main[ ,rat :=  as.numeric(as.character(rat))]
  main[ ,RT  :=  as.numeric(as.character(RT ))] ###numeric RT
  main[ ,RT2 :=  as.numeric(as.character(RT2))]

  #main$stimulus = as.character(main$stimulus) #unnecessary in data.table
  main = main %>% separate(stimulus, into=c('stimL', 'stimR'), sep=',')   #separate L/R stims



  if (exp != 3) {
    main[ ,dec_type   := plyr::mapvalues(dec_type  , c('noS.png', 'heartS.png','sizeS.png'), c('no','val','per'))]
  } else if (exp == 3) {
    main[ ,dec_type   := plyr::mapvalues(dec_type  , c('forcedL.png','forcedR.png', 'heartS.png','sizeS.png'), c('fl','fr','val','per'))]
  }
 # main[ ,dec_type := plyr::mapvalues  (dec_type,   c('noS.png', 'heartS.png','sizeS.png'), c('no',  'val','per'))]
  main[ ,judge_type := plyr::mapvalues(judge_type, c(           'heartS.png','sizeS.png'), c(     'val','per'))]

  #####################
  #####add rat values
  #####################
  rats = rats_all[subjID==sub,]  ## filter based on current subject

  if (exp == 1) {
    rat_pre = rats[,list(pics, ratV1, ratP1)]
    pics = as.character(rat_pre$pics)
    main$ratP_L = main$ratP_R = main$ratV_L = main$ratV_R = rep(NA, nrow(main))

    for (j in seq_along(pics)){
      main$ratP_L[main$stimL== pics[j]] = rat_pre$ratP1[j]
      main$ratP_R[main$stimR == pics[j]] = rat_pre$ratP1[j]
      main$ratV_L[main$stimL== pics[j]] = rat_pre$ratV1[j]
      main$ratV_R[main$stimR == pics[j]] = rat_pre$ratV1[j]
    }
  } else if (exp > 1) {
    rat_pre = rats[,list(pics, preV_mean, preP_mean, p_range, v_range)]
    pics = as.character(rat_pre$pics)
    main$ratP_L = main$ratP_R = main$ratV_L = main$ratV_R = rep(NA, nrow(main))

    for (j in seq_along(pics)){
      main$ratP_L[main$stimL== pics[j]] = rat_pre$preP_mean[j]
      main$ratP_R[main$stimR == pics[j]] = rat_pre$preP_mean[j]
      main$ratV_L[main$stimL== pics[j]] = rat_pre$preV_mean[j]
      main$ratV_R[main$stimR == pics[j]] = rat_pre$preV_mean[j]

      #rating range
      main$ranP_L[main$stimL== pics[j]] = rat_pre$p_range[j]
      main$ranP_R[main$stimR == pics[j]] = rat_pre$p_range[j]
      main$ranV_L[main$stimL== pics[j]] = rat_pre$v_range[j]
      main$ranV_R[main$stimR == pics[j]] = rat_pre$v_range[j]
    }
  }


  ############
  ###add stats
  ############

  ##rat-based diff
  main[,Pdiff := ratP_R - ratP_L]
  main[,Vdiff := ratV_R - ratV_L]

  main[dec_type   == 'per', dec_diff   := Pdiff]  ## diff relevant for choice
  main[dec_type   == 'val', dec_diff   := Vdiff]
  main[judge_type == 'per', judge_diff := Pdiff] ##diff relevant for judge
  main[judge_type == 'val', judge_diff := Vdiff]

  if (exp==5) {
    main[ref=='first', judge_diff := -judge_diff] #if left is reference, the scale is reversed
  }

  if (exp == 3){ #
    main[dec_type   == 'fr', dec_diff   := judge_diff]  ## diff relevant for choice
    main[dec_type   == 'fl', dec_diff   := judge_diff]
  }


  #acc
  main[LR == 0 & dec_diff <= 0 ,acc := 1]
  main[LR == 0 & dec_diff >  0 ,acc := 0]
  main[LR == 1 & dec_diff <= 0 ,acc := 0]
  main[LR == 1 & dec_diff >  0 ,acc := 1]

  if(exp==3) {
    main[, accF := acc]
    main[(dec_type == 'fr') | (dec_type == 'fl'), acc := NA]
  }

  ##Rbias
  if (exp != 5) {
    main[judge_type=='per',Rbias := rat - Pdiff]
    main[judge_type=='val',Rbias := rat - Vdiff]
    ##Cbias
    main[LR == 1 , Cbias :=  Rbias]
    main[LR == 0 , Cbias := -Rbias]
  } else {
    main[, ref := ifelse(ref=='first', 0,1)]
    main[judge_type=='per',Pbias := rat - Pdiff] #positive bias
    main[judge_type=='val',Pbias := rat - Vdiff]

    main[LR == ref, Cbias := Pbias]
    main[LR != ref, Cbias := -Pbias]
    main[dec_type=='no', Cbias:=NA]

  }




  #consistency: DJ: decision - judgement; RJ: rating - judgement
  if (exp != 5) {
    main[LR == 1 & rat >= 0 ,consDJ := 1]
    main[LR == 1 & rat <  0 ,consDJ := 0]
    main[LR == 0 & rat >  0 ,consDJ := 0]
    main[LR == 0 & rat <= 0 ,consDJ := 1]


  } else {
    main[LR == ref & rat >= 0 ,consDJ := 1]
    main[LR == ref & rat <  0 ,consDJ := 0]
    main[LR != ref & rat >  0 ,consDJ := 0]
    main[LR != ref & rat <= 0 ,consDJ := 1]
  }
  main[judge_diff >= 0 & rat >= 0, consRJ := 1]
  main[judge_diff <  0 & rat >  0, consRJ := 0]
  main[judge_diff >  0 & rat <  0, consRJ := 0]
  main[judge_diff <= 0 & rat <= 0, consRJ := 1]


  if (exp == 5) {
    main[LR == ref, ref_type := 'choice_con']
    main[LR != ref, ref_type := 'choice_inc']
    main[dec_type == 'no', ref_type := 'neutral']
  }

  ###CALIB
  main[,calib := abs(rat -judge_diff)]

  ### trial congruency

  main[ ,con := if_else( dec_type=='no', 'neu',
                         if_else(dec_type == judge_type, 'con','inc'))]

  if (exp == 3) {
    main[(dec_type == 'fl') | (dec_type == 'fr'),con:= 'neu']
  }


  ###difficulty (D: dec; J: judge)
  #difficulty
  main[judge_type == 'per', diffCon :=  abs(ratP_L - ratP_R)]
  main[judge_type == 'val', diffCon :=  abs(ratV_L - ratV_R)]
  main[judge_type == 'per', diffInc :=  abs(ratV_L - ratV_R)]
  main[judge_type == 'val', diffInc :=- abs(ratP_L - ratP_R)]

  ##magnitude
  main[judge_type == 'per', magCon := ratP_L + ratP_R]
  main[judge_type == 'val', magCon := ratV_L + ratV_R]
  main[judge_type == 'per', magInc := ratV_L + ratV_R]
  main[judge_type == 'val', magInc := ratP_L + ratP_R]

  #scale


  # main[, diffiD := scale(100-abs(dec_diff))] #scaled
  # if (jstats) main[, diffiJ := scale(100-abs(judge_diff))]
  #
  # ##magnitude
  # main[dec_type == 'per', magD := scale(200-(ratP_L + ratP_R))]
  # main[dec_type == 'val', magD := scale(200-(ratV_L + ratV_R))]


  if (jstats) {
    main[judge_type == 'per', magJ := scale(200-(ratP_L + ratP_R))]
    main[judge_type == 'val', magJ := scale(200-(ratV_L + ratV_R))]

  }

  if (!is.na(sub)) main$subjID = rep(sub, nrow(main))


  ####chosen/rejected
  main[LR == 0, chosen := stimL]
  main[LR == 1, chosen := stimR]

  main[LR == 0, rejected := stimR]
  main[LR == 1, rejected := stimL]

  main$LR = as.factor(main$LR)

  main = main %>% rename(judgement = rat)

  ##overestimation of the higher rated
  main[judge_diff >0,est := judgement - judge_diff ]
  main[judge_diff <0,est := -(judgement - judge_diff) ]

  #to make it easier for the paper
  main[, judge_type2 := ifelse(judge_type=='val', 'pref','size')]
  main[, dec_type2   := as.factor(ifelse(dec_type=='val', 'pref',
                                         ifelse(dec_type=='per', 'size','no'))) ]
  return (main)
}
SpTB/cibtools documentation built on Dec. 18, 2021, 2:08 p.m.