library(dplyr)
library(tidyr)
library(readr)
library(readxl)
# Input suicide coding data
# suicide coding.4.txt removed all quote marks, whereas .3. has them.
d1 = read.delim('data-raw/suicide coding.4.txt', stringsAsFactors=FALSE, na.string=c('na', 'NA'), fill=F, quote="")
# Read Syme Garfield reconciled version, which overrides the 17 theoretical variables, and adds two more
rec = read.delim("data-raw/Final Reconciliation.txt", stringsAsFactors=FALSE, na.string=c('', 'na', 'NA'), fill=F)
# All coding is now identical, so only need data from 1 coder
rec = rec[rec$coder=='syme',]
# ex_doc has the document id's and years for each extract id
ex_doc = read.csv('data-raw/extract document id codes.txt')
ex_doc$document.year[ex_doc$document.year==0] = NA
documents = read.delim('data-raw/documents.tsv', as.is=c('field_years', 'location', 'title', 'authors', 'publisher'))
ex_doc$document.authors = NA
for (i in 1:nrow(ex_doc)){
ex_doc$document.authors[i] = documents$authors[documents$id==ex_doc$document.id[i]]
}
d1 = merge(d1, ex_doc, by.x='id', by.y='extract.id')
# Remove cultures that are not in the probability sample
remove = c('Nuer', 'Igbo', 'Bakairi', 'Bagisu', 'Banyoro', 'Bena', 'Bengali')
d1 = d1[!(d1$culture %in% remove),]
rec = rec[!(rec$culture %in% remove),]
# Remove death of a loved one, already coded
d1$Traumatic_events[d1$Traumatic_events == 'death of a loved one'] = NA
# Fix entry error
d1$group_size[d1$id == 1902] = '2'
# Fix entry error
d1$group_size[d1$id == 1902] = '2'
# Fix entry error
d1$Evidence_Mental_Illness[d1$id == 515] = 'Evidence for'
# Fix entry error
d1$content_type[d1$id == 39] = 'Cultural model'
# Fix entry error
d1$content_type[d1$id == 144] = 'Cultural model'
# Fix entry error
d1$content_type[d1$id == 1908] = 'Cultural model'
# Fix entry error
d1$SA_sex[d1$id == 117] = 'both'
# Fix entry error
d1$SA_sex[d1$id == 134] = 'both'
# Fix entry error
d1$SA_sex[d1$id == 135] = 'male'
# Fix entry error
d1$SA_age[d1$id == 135] = 'adult'
# Create group size variable that excludes population rates
d1$group_size2 = rep(NA, length(d1$group_size))
d1$group_size2[d1$group_size == '1'] = '1'
d1$group_size2[d1$group_size == '2'] = '2'
d1$group_size2[d1$group_size == '1,2'] = '2'
d1$group_size2[d1$group_size == '1 or 2'] = '2'
d1$group_size2[d1$group_size == '1s 1a'] = '2'
d1$group_size2[d1$group_size == '3'] = '>2'
d1$group_size2[d1$group_size == '4'] = '>2'
d1$group_size2[d1$group_size == '26s 1a'] = '>2'
d1$group_size2[d1$group_size == '100+'] = '>2'
d1$group_size2[d1$group_size == 'group'] = '>2'
d1$group_size2[d1$group_size == 'many'] = '>2'
d1$group_size2[d1$group_size == 'unknown'] = '>2'
# Fix method typos
d1$Method = as.character(d1$Method)
d1$Method[d1$Method == 'ask_someone_else,not stated'] = 'ask_someone_else,not_stated'
d1$Method[d1$Method == 'ask_someone_else'] = 'ask_someone_else,not_stated'
d1$Method[d1$Method == 'not stated,will to die'] = 'will to die'
# Number of methods
d1$num_methods = rep(NA, length(d1$Method))
for (i in 1:length(d1$Method)){
d1$num_methods[i] = length(strsplit(as.character(d1$Method[i]), ',')[[1]])
}
d1$SA_rel2 = as.character(d1$SA_relationship) # Make a copy of SA_relationship
d1$SA_rel2[d1$SA_relationship == 'Wives'] = 'spouse' # Set 'Wives' to 'spouse'
d1$SA_rel2[d1$SA_relationship == 'wife'] = 'spouse'
d1$SA_rel2[d1$SA_relationship == 'husband'] = 'spouse'
d1$SA_rel2[d1$SA_relationship == 'spouse,partner'] = 'spouse'
d1$SA_rel2[d1$SA_relationship == 'accusers']='social partners'
d1$SA_rel2[d1$SA_relationship == 'adversary']='social partners'
d1$SA_rel2[d1$SA_relationship == 'affine']='affines'
d1$SA_rel2[d1$SA_relationship == 'avenger']='social partners'
d1$SA_rel2[d1$SA_relationship == 'brother in law/husband']='affines'
d1$SA_rel2[d1$SA_relationship == 'chief']='social superior'
d1$SA_rel2[d1$SA_relationship == 'competitor']='sexual competitor'
d1$SA_rel2[d1$SA_relationship == 'adversary']='social partners'
d1$SA_rel2[d1$SA_relationship == 'enemies']='social partners'
d1$SA_rel2[d1$SA_relationship == 'father']='parents'
d1$SA_rel2[d1$SA_relationship == 'father in law']='affines'
d1$SA_rel2[d1$SA_relationship == 'brother']='siblings'
d1$SA_rel2[d1$SA_relationship == 'competitor ']='sexual competitor'
d1$SA_rel2[d1$SA_relationship == 'competitor']='sexual competitor'
d1$SA_rel2[d1$SA_relationship == 'exwife']='exspouse'
d1$SA_rel2[d1$SA_relationship == 'family']='parents'
d1$SA_rel2[d1$SA_relationship == 'mother in law']='affines'
d1$SA_rel2[d1$SA_relationship == 'kin group']='kin'
d1$SA_rel2[d1$SA_relationship == 'mother in law']='affines'
d1$SA_rel2[d1$SA_relationship == 'Outgroup']='outgroup'
d1$SA_rel2[d1$SA_relationship == 'paramour']='sexual competitor'
d1$SA_rel2[d1$SA_relationship == 'uncle']='kin'
d1$SA_rel2[d1$SA_relationship == 'mother']='parents'
d1$SA_rel2[d1$SA_relationship == 'social partner']='social partners'
d1$SA_rel2[d1$SA_relationship == 'son']='offspring'
d1$SA_rel2[d1$SA_relationship == 'authorities,parents,spouse']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'brothers,social group']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'cowife,affines']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'father,chief']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'father,others']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'parents,peers']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'mate/potential mate']='potential spouse'
d1$SA_rel2[d1$SA_relationship == 'kin,potential mates']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'kin,social group']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'husband,others,unknown']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'ingroup and outgroup']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'kin,husband']='multiple kin'
d1$SA_rel2[d1$SA_relationship == 'uncle,spouse']='multiple kin'
d1$SA_rel2[d1$SA_relationship == 'wife,sons']='multiple kin'
d1$SA_rel2[d1$SA_relationship == 'wife,kin']='multiple kin'
d1$SA_rel2[d1$SA_relationship == 'wife,social group']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'father,brother']='multiple kin'
d1$SA_rel2[d1$SA_relationship == 'government']='political power'
d1$SA_rel2[d1$SA_relationship == 'political group']='political power'
d1$SA_rel2[d1$SA_relationship == 'slaver owners']='captors'
d1$SA_rel2[d1$SA_relationship == 'slave owners']='captors'
d1$SA_rel2[d1$SA_relationship == 'superior']='social superior'
d1$SA_rel2[d1$SA_relationship == 'employer']='employers'
d1$SA_rel2[d1$SA_relationship == 'police']='social superior'
d1$SA_rel2[d1$SA_relationship == 'social group']='social partners'
d1$SA_rel2[d1$SA_relationship == 'wife,kin group']='multiple social domains'
d1$SA_rel2[d1$SA_relationship == 'husband,others unknown']='multiple social domains'
# Make a copy of Traumatic_events
d1$Traum_event2 = as.character(d1$Traumatic_events)
d1$Traum_event2[d1$Traumatic_events == 'perpetrate violence,no evidence']='perpetrate violence'
d1$Traum_event2[d1$Traumatic_events == 'physical abuse,sexual abuse']='sexual abuse'
d1$Traum_event2[d1$Traumatic_events == 'war,no evidence']='trauma related to war'
d1$Traum_event2[d1$Traumatic_events == 'war']='trauma related to war'
d1$Traum_event2[d1$Traumatic_events == 'war,refugee']='trauma related to war'
d1$Traum_event2[d1$Traumatic_events == 'war,combat,witness violence,perpetrate violence']='trauma related to combat'
d1$Traum_event2[d1$Traumatic_events == 'war,perpetrate violence']='trauma related to combat'
d1$Traum_event2[d1$Traumatic_events == 'natural disaster,traum to a loved one']='natural disaster'
d1$Traum_event2[d1$Traumatic_events == 'physical abuse,witness violence']='physical abuse'
d1$Traum_event2[d1$Traumatic_events == 'witness violence']='physical abuse'
d1$Traum_event2[d1$Traumatic_events == 'no evidence']='unknown'
d1$Traum_event2[d1$Traumatic_events == 'other']='unknown'
d1$Traum_event2[d1$Traumatic_events == 'other']='unknown'
# Fix some coding errors in Type_of_SB
d1$Type_ofSB[d1$id==25] <- 'completion'
d1$Type_ofSB[d1$id==257] <- 'completion,unknown'
d1$Type_ofSB[d1$id==261] <- 'threat,ideation,attempt'
d1$Type_ofSB[d1$id==357] <- 'threat,attempt'
d1$Type_ofSB[d1$id==393] <- 'attempt,unknown'
d1$Type_ofSB[d1$id==571] <- 'unknown'
# Make a copy of Type_of_SB
d1$TypeSB2 = as.character(d1$Type_of_SB)
d1$TypeSB2[d1$Type_of_SB == 'attempt,completion,unknown']='unknown'
d1$TypeSB2[d1$Type_of_SB == 'completion,unknown']='unknown'
d1$TypeSB2[d1$Type_of_SB == 'threat,completion,unknown']='unknown'
d1$TypeSB2[d1$Type_of_SB == 'threat,attempt,unknown']='unknown'
d1$TypeSB2[d1$Type_of_SB == 'threat,ideation,attempt,unknown']='unknown'
d1$TypeSB2[d1$Type_of_SB == 'attempt,unknown']='unknown'
# These two records are "mixed" nonlethal and lethal
d1$TypeSB2[d1$id==267] <- 'threat,completion'
d1$TypeSB2[d1$id==1862] <- 'threat,attempt,completion'
# Make a copy of ailments
d1$ailments2 = as.character(d1$ailments)
d1$ailments2[d1$ailments == 'other,unknown']='unknown'
d1$ailments2[d1$ailments == 'other']='unknown'
d1$ailments2[d1$ailments == 'other chronic pain,unknown']='other chronic pain'
d1$ailments2[d1$ailments == 'gastrointestinal*']='gastrointestinal'
d1$ailments2[d1$ailments == 'gastrointestinal ']='gastrointestinal'
# Make a copy of social_status
d1$status2 = as.character(d1$social_status)
d1$status2[d1$social_status == 'average,unknown']='unknown'
d1$status2[d1$social_status == 'high,unknown']='probably high'
d1$status2[d1$social_status == 'high,average,unknown']='unknown'
d1$status2[d1$social_status == 'average,low,unknown']='unknown'
d1$status2[d1$social_status == 'low,unknown']='probably low'
d1$status2[d1$social_status == 'average,low']='probably low'
d1$status2[d1$social_status == 'average,low']='probably low'
# Make a copy of Target
d1$Target2 = as.character(d1$Target)
d1$Target2[d1$Target == 'adversary']= 'social partners'
d1$Target2[d1$Target == 'brother in law/husband']= 'affines'
d1$Target2[d1$Target == 'chief']= 'social superior'
d1$Target2[d1$Target == "paramour and paramour's kin"]= 'sexual competitor'
d1$Target2[d1$Target == 'child']= 'offspring'
d1$Target2[d1$Target == 'children']= 'offspring'
d1$Target2[d1$Target == 'exwife']= 'ex wife'
d1$Target2[d1$Target == 'husband ']= 'husband'
d1$Target2[d1$Target == 'immediate kin']= 'kin'
d1$Target2[d1$Target == 'immediate kin,spouse']= 'kin,spouse'
d1$Target2[d1$Target == 'kin and social group']= 'kin,social group'
d1$Target2[d1$Target == 'kin group']= 'kin'
d1$Target2[d1$Target == 'SA']= 'unknown'
d1$Target2[d1$Target == 'social partner']= 'social partners'
d1$Target2[d1$Target == 'social group']= 'social partners'
d1$Target2[d1$Target == 'paramour ']= 'sexual competitor'
d1$Target2[d1$Target == 'enemies']= 'social partners'
# Make a copy of Cause_type
d1$Cause_type2 = as.character(d1$Cause_type)
d1$Cause_type2[d1$Cause_type == 'death loved one,accus_commit wrongdo,spirit attack']= 'death loved one'
d1$Cause_type2[d1$Cause_type == 'death loved one,disease outbreak']= 'death loved one'
d1$Cause_type2[d1$Cause_type == 'death_loved_one,disease outbreak ']= 'death loved one'
d1$Cause_type2[d1$Cause_type == 'death loved one,other']= 'death loved one'
d1$Cause_type2[d1$Cause_type == 'death_loved_one,spirit attack']= 'death loved one'
d1$Age = as.character(d1$Age)
d1$age_min = rep(0, length(d1$Age))
d1$age_max = d1$age_min
d1$age_min[d1$Age == 'child'] = 5
d1$age_max[d1$Age == 'child'] = 14
d1$age_min[d1$Age == 'child,young adult'] = 5
d1$age_max[d1$Age == 'child,young adult'] = 19
d1$age_min[d1$Age == 'child,young adult,unknown'] = 5
d1$age_max[d1$Age == 'child,young adult,unknown'] = 19
d1$age_min[d1$Age == 'child,young adult,adult'] = 5
d1$age_max[d1$Age == 'child,young adult,adult'] = 60
d1$age_min[d1$Age == 'child,young adult,adult,unknown'] = 5
d1$age_max[d1$Age == 'child,young adult,adult,unknown'] = 90
d1$age_min[d1$Age == 'child,young adult,adult,elderly'] = 5
d1$age_max[d1$Age == 'child,young adult,adult,elderly'] = 90
d1$age_min[d1$Age == 'young adult'] = 15
d1$age_max[d1$Age == 'young adult'] = 19
d1$age_min[d1$Age == 'young adult,unknown'] = 15
d1$age_max[d1$Age == 'young adult,unknown'] = 30
d1$age_min[d1$Age == 'young adult,adult'] = 15
d1$age_max[d1$Age == 'young adult,adult'] = 90
d1$age_min[d1$Age == 'young adult, adult'] = 15
d1$age_max[d1$Age == 'young adult, adult'] = 90
d1$age_min[d1$Age == 'young adult,adult,unknown'] = 15
d1$age_max[d1$Age == 'young adult,adult,unknown'] = 90
d1$age_min[d1$Age == 'young adult,adult,elderly'] = 15
d1$age_max[d1$Age == 'young adult,adult,elderly'] = 90
d1$age_min[d1$Age == 'young adult,adult,elderly,unknown'] = 15
d1$age_max[d1$Age == 'young adult,adult,elderly,unknown'] = 90
d1$age_min[d1$Age == 'adult,unknown'] = 15
d1$age_max[d1$Age == 'adult,unknown'] = 90
d1$age_min[d1$Age == 'adult'] = 20
d1$age_max[d1$Age == 'adult'] = 60
d1$age_min[d1$Age == 'adult,elderly'] = 20
d1$age_max[d1$Age == 'adult,elderly'] = 90
d1$age_min[d1$Age == 'adult,elderly,unknown'] = 20
d1$age_max[d1$Age == 'adult,elderly,unknown'] = 90
d1$age_min[d1$Age == 'elderly'] = 61
d1$age_max[d1$Age == 'elderly'] = 90
d1$age_min[d1$Age == 'elderly,unknown'] = 20
d1$age_max[d1$Age == 'elderly,unknown'] = 90
# The following min and max ages were the modes
# for the mins and maxes where the age range <= 40
d1$age_min[d1$Age == 'unknown'] = 15
d1$age_max[d1$Age == 'unknown'] = 90
d1$age_min[is.na(d1$Age)] = NA
d1$age_max[is.na(d1$Age)] = NA
d1$age_range = d1$age_max - d1$age_min
### Recode Sex ###
d1$Sex2 = as.character(d1$Sex)
d1$Sex2[d1$Sex=='both' & d1$content_type=='Cultural model'] = 'unknown'
d1$Sex2[d1$Sex=='more often men'] = 'male'
d1$Sex2[d1$Sex=='more often women'] = 'female'
d1$Sex2 = factor(d1$Sex2)
# Convert case_score to numeric
d1$case_score[d1$case_score=='x'] = NA
d1$case_score = as.numeric(d1$case_score)
## Now replace theoretical variables in d1 with reconciled versions from rec
# remove id 468 from rec (Bakairi, which are not in probability sample)
rec = rec[rec$id!=468,]
# First save d1 row id 1909
d1_1909 = d1[d1$id==1909,]
# Only include ids in d1 that are in rec (the others were duplicates)
d1 = d1[d1$id %in% intersect(rec$id, d1$id),]
# The variables that were reconciled
vars = names(rec)[6:24]
# Now substitute the reconciled values for the original values
for (i in 1:nrow(d1)){
d1[d1$id==rec$id[i],vars] = rec[i,vars]
}
# Some extracts in original dataset were split into new, additional
# extracts in the reconciled file
# rec ids a-k were split from 1909
new_extracts = rec[rec$id %in% c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'),]
vars = c('id', vars)
for (i in 1:nrow(new_extracts)){
newrow = d1_1909
newrow[,vars] = new_extracts[i,vars]
d1 = rbind(d1,newrow)
}
# Make a copy of public_private
d1$PublicSB = rep(0, nrow(d1))
d1$PublicSB[is.na(d1$public_private)]=0
d1$PublicSB[d1$public_private == 'public']=1
d1$PublicSB[d1$public_private == 'private']=-1
d1$PublicSB[d1$public_private == 'Private']=-1
d1$PublicSB[d1$public_private == 'Public']=1
# Decided to code Fitness threat "Evidence for" regardless of presence or absence of "Conflict"
# This resulted in many extracts being recoded as "Evidence for" Fitness threat
corrections = read.csv('data-raw/Corrected data.csv', colClasses='character')
for (i in 1:nrow(corrections)){
d1$Fitness_threat[d1$id == corrections$id[i]] = 1
}
# A few corrections to Burdensomeness and LowRP
# id Burdensomeness Low_RP
# 190 Evidence for Evidence for
# 214 Evidence for
# 581 Evidence for Evidence for
d1$Evidence_of_burdensomeness[d1$id==190] = 1
d1$Evidence_of_burdensomeness[d1$id==581] = 1
d1$Low_RP[d1$id==190] = 1
d1$Low_RP[d1$id==214] = 1
d1$Low_RP[d1$id==581] = 1
# There was a discrepency between Low RP and - High RP in these 5 rows, which we reviewed and rectified
d1$Low_RP[d1$id==27] = 0
d1$Low_RP[d1$id==106] = -1
d1$High_RP[d1$id==190] = -1
d1$Low_RP[d1$id==571] = -1
d1$High_RP[d1$id==581] = -1
# There were 4 discrepancies between "Outcome: others harmed" and minus "Outcome: others better off", which we
# reviewed and rectified. Only 1 value needed to be changed.
d1$Outcome_others_harmed[d1$id==4] = 1 # Eight children left motherless
# Aggregate types of SB
d1$TypeSB3 = d1$TypeSB2
d1$TypeSB3[d1$TypeSB3=='attempt,completion'] = 'completion'
d1$TypeSB3[d1$TypeSB3=='ideation,completion'] = 'completion'
d1$TypeSB3[d1$TypeSB3=='threat,attempt,completion'] = 'completion'
d1$TypeSB3[d1$TypeSB3=='threat,completion'] = 'completion'
d1$TypeSB3[d1$TypeSB3=='threat,ideation,completion'] = 'completion'
d1$TypeSB3[d1$TypeSB3=='plan,ideation'] = 'ideation'
d1$TypeSB3[d1$TypeSB3=='threat,attempt'] = 'attempt'
d1$TypeSB3[d1$TypeSB3=='threat,ideation'] = 'threat'
d1$TypeSB3[d1$TypeSB3=='threat,plan'] = 'threat'
d1$TypeSB3[d1$TypeSB3=='threat,plan,ideation'] = 'threat'
d1$TypeSB3 = factor(d1$TypeSB3, levels = c('completion', 'attempt', 'threat', 'ideation', 'unknown'))
d1$TypeSB4 = d1$TypeSB2
d1$TypeSB4[d1$TypeSB4=='attempt,completion'] = 'mixed'
d1$TypeSB4[d1$TypeSB4=='ideation,completion'] = 'mixed'
d1$TypeSB4[d1$TypeSB4=='threat,attempt,completion'] = 'mixed'
d1$TypeSB4[d1$TypeSB4=='threat,completion'] = 'mixed'
d1$TypeSB4[d1$TypeSB4=='threat,ideation,completion'] = 'mixed'
d1$TypeSB4[d1$TypeSB4=='plan,ideation'] = 'ideation'
d1$TypeSB4[d1$TypeSB4=='threat,attempt'] = 'attempt'
d1$TypeSB4[d1$TypeSB4=='threat,ideation'] = 'threat'
d1$TypeSB4[d1$TypeSB4=='threat,plan'] = 'threat'
d1$TypeSB4[d1$TypeSB4=='threat,plan,ideation'] = 'threat'
d1$TypeSB4 = factor(d1$TypeSB4, levels = c('completion', 'mixed', 'attempt', 'threat', 'ideation', 'unknown'))
### Rename theoretical variables for readability ###
# Groups of theoretical variables
# These will be used througout the ms
base_vars = c('culture', 'region')
bar_vars = c('Conflict', 'Fitness_threat', 'High_RP', 'Lethality_n_attempt', 'Motive_leverage', 'Outcome_change', 'Target_fitness', 'powerlessness', 'Motive_harm_others', 'Outcome_others_harmed', 'aggression', 'PublicSB')
fit_vars = c('Low_RP', 'Motive_better_off_kin', 'Motive_make_others_better_off', 'Evidence_of_burdensomeness', 'Outcome_better_off')
theory_vars = c(bar_vars, fit_vars)
bar_vars.new = c('Conflict', 'Fitness threat', 'High RP', 'Low lethality of attempt', 'Motive: Leverage', 'Outcome: victim better off', 'Social partner fitness', 'Powerlessness', 'Motive: harm others', 'Outcome: others harmed', 'Aggression', 'Public SB')
fit_vars.new = c('Low RP', 'Motive: kin better off', 'Motive: make others better off', 'Evidence of burdensomeness', 'Outcome: others better off')
theory_vars.new = c(bar_vars.new, fit_vars.new)
nms = names(d1)
for (i in 1:length(theory_vars)){
if (theory_vars[i] %in% nms){
c = grep(theory_vars[i], nms, fixed=T)
names(d1)[c[1]] = theory_vars.new[i]
}
}
coding <- d1
# Decided to rename "Aggression" to "AngrySB" to
# better reflect operationalization
coding = dplyr::rename(coding, AngrySB = Aggression)
bar_vars.new[11] = 'AngrySB'
theory_vars.new = c(bar_vars.new, fit_vars.new)
selectvars = c('id', fit_vars.new, bar_vars.new)
# Merge Syme Garfield Hagen consensus
sgh = read.csv('data-raw/coding.csv', stringsAsFactors = F)
sgh = sgh[, c(1, 3:19)]
names(sgh) = selectvars
# Some rows were deleted (because they were duplicates)
# and some rows were split in two. So, need to
# delete or duplicate those rows in coding
# First duplicate these rows which actually had two cases/cultural models
# that needed to be coded separately
r1655b = coding[coding$id=='1655',]
r1655b$id = '1655b'
coding$Sex[coding$id=='1655'] = 'female'
coding$Sex2[coding$id=='1655'] = 'female'
r1906b = coding[coding$id=='1906',]
r1906b$id = '1906b'
r1906b$content_type = 'Case'
coding = rbind(coding, r1655b, r1906b)
# Delete these rows
coding = coding[coding$id != 1788,] # Duplicate of id 58
coding = coding[coding$id != 1894,] # Duplicate of id 297
coding = coding[coding$id != 1903,] # Not suicidal model or case
coding = coding[coding$id != 'g',] # Not suicide model or case (lit review)
# Now replace old coding with new consensus coding
coding = left_join(coding[,!(names(coding) %in% theory_vars.new)], sgh, by='id')
# Convert -1 to new variables
d = coding[, theory_vars.new]
# Add new variables to capture -1's
d$`Outcome: victim worse off` = as.integer(d$`Outcome: victim better off`==-1)
d$`High lethality of attempt` = as.integer(d$`Low lethality of attempt`==-1)
d$`No social partner fitness` = as.integer(d$`Social partner fitness`==-1)
d$`Private SB` = as.integer(d$`Public SB`==-1)
d$`Evidence against burdensomeness` = as.integer(d$`Evidence of burdensomeness`==-1)
d$`Evidence against fitness threat` = as.integer(d$`Fitness threat`==-1)
d = lapply(d, function(x) as.integer(x==1)) # For non-negative matrix factorization
d = data.frame(d, check.names=F)
d = cbind(d, coding[, !(names(coding) %in% theory_vars.new)])
# Add age categories
n = nrow(d)
d$age_estimate=d$age_min+(d$age_max-d$age_min)/2
d$age.category = 'Child'
d$age.category[d$age_estimate>=15] = 'Adolescent'
d$age.category[d$age_estimate>=20] = 'Adult'
d$age.category[d$age_estimate>=60] = 'Elderly'
d$age.category = factor(d$age.category, levels=c('Child', 'Adolescent', 'Adult', 'Elderly'))
#Remove 634 from d
d = d[d$id != '634',]
# Select and reorder columns
coding <- d %>%
select(
id,
culture,
record = extract,
record_type = content_type,
age_category = age.category,
sex = Sex2,
sb_type = TypeSB4,
1:23
)
coding2 <- d # Not in public data package
# # Apology data
# apology <- read_csv('data-raw/KristenApologyRevise.csv', col_types = 'ccccciiiiicii') # Because ID has chr values
#
# ##Entry errors
# apology$transgression[apology$id == 446] = 0
# apology$transgression[apology$id == 766] = 0
# apology$id[apology$id=="I"] = "i"
#
# ##Remove duplicates and blank data
# apology <- apology[apology$id != 634,]
# apology = apology[apology$id != 1788,]
# apology = apology[apology$id != 1894,]
# apology = apology[apology$id != 1903,]
# apology = apology[apology$id != 'g',]
# apology = apology[-c(466, 467,468),]
#
# #Remove 1909 from apology
# apology <- apology[apology$id != 1909,]
#
# transgression <- read_csv("data-raw/transgression_data.csv", col_types = 'ccccccci')
# transgression <- transgression[transgression$id !=1909,]
#
# #Remove from apology and transgression, not in original coding
# apology <- apology[apology$id != 382,]
# apology = apology[apology$id != 411,]
# apology = apology[apology$id != 463,]
# apology = apology[apology$id != 468,]
# apology = apology[apology$id != 'm',]
# apology = apology[apology$id != 'l',]
# transgression <- transgression[transgression$id != 382,]
# transgression = transgression[transgression$id != 411,]
# transgression = transgression[transgression$id != 463,]
# transgression = transgression[transgression$id != 468,]
# transgression = transgression[transgression$id != 'm',]
coltypes <- c(id = 'text',
coder = 'text',
content_type = 'text',
culture = 'text',
extract = 'text',
forgiven = 'numeric',
guilt = 'numeric',
motive_apologize = 'numeric',
punishment = 'numeric',
punishment_threat = 'numeric',
punishment_type = 'text',
shame = 'numeric',
transgression = 'numeric',
transgression_type = 'text',
unjustly_accused_punished = 'numeric')
apology <- read_csv("data-raw/Caitlin Kristen Reconciliation.csv")
causetypes_raw <- read_tsv("data-raw/CauseTypesFinal.csv") ### Final
#check coding is identical
syme <- apology[apology$coder == 'syme', -c(2, 5)]
calsbeek <- apology[apology$coder == 'calsbeek', -c(2, 5)]
dif <- anti_join(syme, calsbeek)
which(syme[[13]]!=calsbeek[[13]])
apology_unreconciled<-read_csv('data-raw/CandK.csv')
apology_unreconciled <- apology_unreconciled[apology_unreconciled$id!='382', ]
apology_unreconciled <- apology_unreconciled[apology_unreconciled$id!='411', ]
apology_unreconciled <- apology_unreconciled[apology_unreconciled$id!='463', ]
apology_unreconciled <- apology_unreconciled[apology_unreconciled$id!='468', ]
apology_unreconciled <- rbind(apology_unreconciled, apology[apology$coder=='calsbeek' & apology$id=='634',])
syme <- apology_unreconciled[apology_unreconciled$coder=='syme',]
calsbeek <- apology_unreconciled[apology_unreconciled$coder=='calsbeek',]
apology <- apology[apology$coder == 'syme',-2]
apology_raw <- apology
# Now convert apology_raw, which has -1 values, to apology, which will only have 0's and 1's
apology$culpable <- apology$unjustly_accused_punished == -1
apology$culpable <- as.numeric(apology$culpable)
apology$unjustly_accused_punished[apology$unjustly_accused_punished == -1] <- 0
apology$punishment_type[apology$punishment_type == 'unknown'] <- 'unknown punishment'
apology$punishment_type[apology$punishment_type == 'death'] <- 'death penalty'
#Remove 466, 467, and 634 from apology
apology = apology[apology$id != '634',]
lapply(apology, function(x){sum(x==-1)})
apology$forgiven[apology$forgiven == -1] <- 0
apology$guilt[apology$guilt == -1] <- 0
apology$motive_apologize[apology$motive_apologize == -1] <- 0
apology$punishment[apology$punishment == -1] <- 0
caitlin_cause_types <-read_csv('data-raw/CaitlinCauseTypes.csv')
kristen_cause_types <- d[c('id', 'Cause_type2')]
kristen_cause_types_clean <- read_csv('data-raw/kristen_cause_types.csv')
tmp <- left_join(caitlin_cause_types[c('id', 'Cause_type')], kristen_cause_types_clean, by='id')
library(stringr)
cause_types <- function(v){
l1 <- list()
for (i in 1:length(v)){
c1 <- str_split(v[i], ',')[[1]]
l1[[i]] <- c1
}
return(l1)
}
ctcomparison <- function(x){
total_causes <- c()
matching_causes <- c()
for (i in 1:length(x$l1)){
total_causes <- c(total_causes, length(union(x$l1[[i]], x$l2[[i]])))
matching_causes <- c(matching_causes, length(intersect(x$l1[[i]], x$l2[[i]])))
}
return(data.frame(total_causes=total_causes, matching_causes=matching_causes))
}
rcd <- function(mylist,original,newc){
for (i in 1:length(mylist)){
a <- mylist[[i]]
x <- match(original,a)
if (!is.na(x)) a[x] <- newc
mylist[[i]] <- a
}
return(mylist)
}
rcd2 <- function(mylist, sbstns){
for (i in 1:length(sbstns)){
s <- sbstns[i]
mylist <- rcd(mylist, names(s), s)
}
return(mylist)
}
l1 <- cause_types(tmp$Cause_type)
names(l1) <- tmp$id
l2 <- cause_types(tmp$Cause_type2)
names(l2) <- tmp$id
l_final <- cause_types(causetypes_raw$Cause_type2)
names(l_final) <- causetypes_raw$id
substitutions <- c(
'fear of supernatural' = 'spirit_attack',
'family tension' = 'interpersonal_confli',
'emotional distress' = 'psychological distress',
'mental illness' = 'psychological distress',
'loneliness' = 'alienation',
'fear of imprisonment' = 'enslavement_capture',
'fear of enslavement' = 'enslavement_capture',
'fear of rape' = 'rape',
'fear of illness disf' = 'illness',
'gambling loss' = 'resource_loss',
'inability to marry' = 'thwarted marriage',
'senility' = 'illness',
'infirmity' = 'illness',
'thwarted_status' = 'loss_social_position',
'commit adultry' = 'commit adultery',
'failure or sense of' = 'failure/sense of failure',
'inability to have ch' = 'infertility',
'psychological distre' = 'psychological distress',
'failed romantic rela' = 'failed romantic relationship',
'disappointment in ma' = 'disappointment in marriage',
'anomie/social tensio' = 'anomie/social tension',
'bring in cowife' = 'cowife',
'failed romantic rela' = 'failed romantic relationship',
' clan incest' = 'clan incest'
)
l1 <- rcd2(l1, substitutions)
l2 <- rcd2(l2, substitutions)
l_final <- rcd2(l_final, substitutions)
# x$l1 <- rcd(x$l1, 'fear of supernatural', 'spirit_attack')
# x$l2 <- rcd(x$l2, 'fear of supernatural', 'spirit_attack')
# x$l1 <- rcd(x$l1, 'family tension', 'interpersonal_confli')
# x$l2 <- rcd(x$l2, 'family tension', 'interpersonal_confli')
# x$l1 <- rcd(x$l1, 'emotional distress', 'psychological distre')
# x$l2 <- rcd(x$l2, 'emotional distress', 'psychological distre')
# x$l1 <- rcd(x$l1, 'mental illness', 'psychological distre')
# x$l2 <- rcd(x$l2, 'mental illness', 'psychological distre')
# x$l1 <- rcd(x$l1, 'loneliness', 'alienation')
# x$l2 <- rcd(x$l2, 'loneliness', 'alienation')
# x$l1 <- rcd(x$l1, 'fear of imprisonment', 'enslavement_capture')
# x$l2 <- rcd(x$l2, 'fear of imprisonment', 'enslavement_capture')
# x$l1 <- rcd(x$l1, 'fear of rape', 'rape')
# x$l2 <- rcd(x$l2, 'fear of rape', 'rape')
# x$l1 <- rcd(x$l1, 'fear of illness disf', 'illness')
# x$l2 <- rcd(x$l2, 'fear of illness disf', 'illness')
# x$l1 <- rcd(x$l1, 'gambling loss', 'resource_loss')
# x$l2 <- rcd(x$l2, 'gambling loss', 'resource_loss')
# x$l1 <- rcd(x$l1, 'inability to marry', 'thwarted marriage')
# x$l2 <- rcd(x$l2, 'inability to marry', 'thwarted marriage')
# x$l1 <- rcd(x$l1, 'senility', 'illness')
# x$l2 <- rcd(x$l2, 'senility', 'illness')
# x$l1 <- rcd(x$l1, 'infirmity', 'illness')
# x$l2 <- rcd(x$l2, 'infirmity', 'illness')
# x$l1 <- rcd(x$l1, 'thwarted_status', 'loss_social_position')
# x$l2 <- rcd(x$l2, 'thwarted_status', 'loss_social_position')
# x$l1 <- rcd(x$l1, 'commit adultry', 'commit adultery')
# x$l2 <- rcd(x$l2, 'commit adultry', 'commit adultery')
# x$l1 <- rcd(x$l1, 'failure or sense of ', 'failure or sense of')
# x$l2 <- rcd(x$l2, 'failure or sense of ', 'failure or sense of')
# Remove cause types that are redundant or not informative
bad_causetypes <- c('failed expectations',
'disconnected from fa',
'separation from love',
'significant loss of',
'significant loss of ', # Note the extra space
'significant loss soc',
'accus_commit_wrongdo',
'thwarted purpose',
'rejection',
'ridicule',
'group_conflict',
'interpersonal_confli',
'family shame',
'bad luck',
'commit adultery',
'imprisonment',
'public humiliation',
"")
l1 <- lapply(l1, function(x) setdiff(x, bad_causetypes))
l2 <- lapply(l2, function(x) setdiff(x, bad_causetypes))
l_final <- lapply(l_final, function(x) setdiff(x, bad_causetypes))
# incorporate "transgression" "shame" "punishment_type" "transgresion_type"
# from apology, and conflict from ZK
for (i in 1:length(l_final)) {
nm <- names(l_final[i])
row1 <- coding[coding$id == nm,]
row2 <- apology[apology$id == nm,]
if (row1$Conflict) {
l_final[[i]] <- c(l_final[[i]], 'conflict')
}
if (row2$transgression_type != 'none'){
l_final[[i]] <- c(l_final[[i]], row2$transgression_type)
}
if (row2$punishment_type != 'na'){
l_final[[i]] <- c(l_final[[i]], str_split(row2$punishment_type, ',')[[1]])
}
}
# Cause groups--groups in terms of impact on fitness
# left: original cause type. right: new, more general cause group
cause_groups <- c(
# generalized conflict
'conflict' = 'conflict',
'betrayal' = 'conflict',
# issues of mating
'incest' = 'mating', # added clan incest distinct from incest
'clan incest' = 'mating',
' clan incest' = 'mating',
'unfaithful spouse' = 'mating',
'rape' = 'mating',
'failed romantic relationship' = 'mating',
'disappointment in marriage' = 'mating',
'divorce' = 'mating',
'divorce or attempted' = 'mating',
'thwarted marriage' = 'mating',
'forced marriage' = 'mating',
'cowife' = 'mating',
'inability to marry' = 'mating',
'adultery' = 'mating',
'commit adultery' = 'mating',
'sexual_taboo' = 'mating',
# issues of reproduction
'pregnancy' = 'reproduction',
'loss of children' = 'reproduction',
'infertility' = 'reproduction',
# resource loss
'natural disaster' = 'resource loss', # Good categorization?
'owed debt' = 'resource loss',
'fear of loss' = 'resource loss',
'resource_loss' = 'resource loss',
'fine' = 'resource loss',
# issues related to being a burden on others
'fear of harming others' = 'burden on others',
'burdensomeness' = 'burden on others',
'fail_others' = 'burden on others',
'squandered_resources' = 'burden on others', # since extracts are more about impact of loss on others
# Threat to, or loss of social status
'thwarted status' = 'loss of social position',
'loss of status' = 'loss of social position',
'loss_social_position' = 'loss of social position',
'loss_position' = 'loss of social position', # could also be loss of resources
'failure/sense of failure' = 'loss of social position',
'public_humiliation' = 'loss of social position',
'ridicule' = 'loss of social position',
'social_condemnation' = 'loss of social position',
'ostracism' = 'loss of social position',
'alienation' = 'loss of social position',
# social unrest resutling from conflict with external groups or colonial powers
'military_defeat' = 'between group conflict',
'political unrest' = 'between group conflict',
'warn others' = 'between group conflict',
'anomie/social tension' = 'between group conflict',
'nonconversion' = 'between group conflict',
# issues of confinement--can't invest elsewhere
'labor exploitation' = 'loss of autonomy/mobility',
'enslavement_capture' = 'loss of autonomy/mobility',
'imprisonment' = 'loss of autonomy/mobility',
# inflicted physical bodily harm or death
'physical abuse' = 'physical harm to victim',
'bodily trauma' = 'physical harm to victim',
'disfigurement' = 'physical harm to victim',
'nonlethal_physical' = 'physical harm to victim',
'death penalty' = 'physical harm to victim',
# physical illness
'illness' = 'illness',
'disease outbreak' = 'illness',
# mental health issues
'psychological distress' = 'mental health',
'boredom' = 'mental health',
'drunkenness' = 'mental health',
# a pervasive cultural model
'spirit_attack' = 'spirit attack',
# conflict between children/adolescents and parents
'neglect' = 'child/adolescent parental conflict',
'childhood_disobedience' = 'child/adolescent parental conflict',
'strike_parents' = 'child/adolescent parental conflict',
# loss of social partner through death or relationship defection
'death_loved_one' = 'social partner loss',
'trauma to loved one' = 'social partner loss',
'loss_social_partner' = 'social partner loss',
# the victim physically harmed others
'manslaughter' = 'physically harm others',
'murder' = 'physically harm others',
'physical_attack' = 'physically harm others',
'attempted_murder' = 'physically harm others',
'threaten_murder' = 'physically harm others',
# too complex to lump in with anything else
'witchcraft' = 'witchcraft',
# social faux pas
'public_flatulence' = 'social faux pas',
'disrespect_ritual' = 'social faux pas',
# unknown or a lack of information to categorize it
'fear of punishment' = 'unspecified punishment',
'unknown punishment' = 'unspecified punishment',
'fear of revenge' = 'unspecified punishment',
'arrogance' = 'unknown', # based on a cultural model
'unknown' = 'unknown'
)
a1 <- lapply(l1, function(x) unique(cause_groups[x]))
a2 <- lapply(l2, function(x) unique(cause_groups[x]))
a3 <- lapply(l_final, function(x) unique(cause_groups[x]))
# Something else
y <- ctcomparison(list(l1=l1, l2=l2))
c1 <- 0
for (i in 1:length(l1)){
if ('spirit_attack' %in% l1[[i]]){
if ('spirit_attack' %in% l2[[i]]){
c1 <- c1+1
}
}
}
library(irr)
ctfind <- function(ct, l1, l2){
v1 <- sapply(l1, function(a) ct %in% a)
v2 <- sapply(l2, function(a) ct %in% a)
print(kappa2(cbind(v1, v2)))
#return(list(v1=as.numeric(v1), v2=as.numeric(v2)))
#return(c(ct=cor(as.numeric(v1), as.numeric(v2), use='complete.obs')))
#return(c(ct=kappa2(cbind(as.numeric(v1), as.numeric(v2)))$value))
print(ct)
print(table(v1,v2))
}
# Lower level cause types
u <- unlist(l1)
u <- c(u,unlist(l2))
u <- unique(u)
z <- sapply(u,ctfind, l1=l1, l2=l2)
# Higher level cause groups
cg <- unique(cause_groups)
z2 <- sapply(cg, ctfind, l1=a1, l2=a2)
# rename l_final, a3
causetypes <- l_final
causegroups <- a3
#a,b,c,d,e,f,h,i,j,k make no sense (replication of c) in kristen cause types, need to recode
# codingextra: cases in lengthy documents on suicide
codingextra <- read_excel('data-raw/Missing suicide data.xlsx')
save(causetypes_raw, causetypes, causegroups, coding, coding2, codingextra, apology, apology_raw, apology_unreconciled, file = "data/coding.RData", compress = "xz")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.