R/utils/SHR_utils.R

Defines functions str_replace_all_lower

str_replace_all_lower <- function(string, pattern) {
  str_replace_all(tolower(string), pattern)
}

shr_names <- c("SUBC.*"                  = "SUBCIRCUMSTANCE",
               "ETHNC_ORIGN"             = "ETHNIC_ORIGIN",
               "ETHNICITY"               = "ETHNIC_ORIGIN",
               "ORIGN|ORGIN"             = "ORIGIN",
               "ETHNC"                   = "ETHNIC",
               "ETHNIC$"                 = "ETHNIC_ORIGIN",
               "GENDER"                  = "SEX",
               "OFFENDER_([0-9]+)_CIR.*" =  "CIRCUMSTANCE_\\1",
               "OFFENDER_([0-9]+)_SUB.*" = "SUBCIRCUMSTANCE_\\1",
               "OFFENDER_([0-9]+)_REL.*" =  "RELATIONSHIP_\\1",
               "SUB_CIRCUMSTANCE"        = "SUBCIRCUMSTANCE",
               "AGENCY_CODE"             = "ORI_CODE",
               "^STATE_CODE$"            = "STATE",
               "AGENCY_STATE"            = "STATE_NAME",
               "NUMERIC_STATE_CODE"      = "STATE",
               "SMSA"                    = "MSA",
               "TYPE_OF_OFFENSE_HOMI.*"  = "HOMICIDE_TYPE",
               "^OFFNDR_COUNT_ADD_L$"    = "ADDITIONAL_OFFENDER_COUNT",
               "^OFFNDR_COUNT_ADDITIONAL_OFFENDERS$" = "ADDITIONAL_OFFENDER_COUNT",
               "^OFFNDR_COUNT_ADDL_OFFNDR$" = "ADDITIONAL_OFFENDER_COUNT",
               "^OFFENDER_COUNT$"           = "ADDITIONAL_OFFENDER_COUNT",
               "^OFFNDR_COUNT_ADDL_OFFNDR$" = "ADDITIONAL_OFFENDER_COUNT",
               "VICTIM_COUNT_ADDITIONAL_VICTIMS" = "ADDITIONAL_VICTIM_COUNT",
               "VICTIM_COUNT_ADDL_VICT"  = "ADDITIONAL_VICTIM_COUNT",
               "^VICTIM_COUNT$"          = "ADDITIONAL_VICTIM_COUNT",
               "^PART_NUMBER$"           = "ICPSR_PART_NUMBER",
               "^EDITION_NUMBER$"        = "ICPSR_EDITION_NUMBER",
               "^SEQUENCE_NUMBER$"       = "ICPSR_SEQUENCE_NUMBER",
               "_1$|_9028$"              = "",
               "ICPSR_SEQ_ID_NUMBER"     = "ICPSR_SEQUENCE_NUMBER",
               "COUNTIM"                 = "COUNT",
               "MSA_CODE"                = "MSA",
               "IDENTIFIER_CODE"         = "ICPSR_IDENTIFIER_CODE",
               "^RELATIONSHIP$"          = "RELATIONSHIP_1",
               "RELATIONSHIP_([0-9]+)"   = "OFFENDER_\\1_RELATIONSHIP_TO_VICTIM_1",
               "^CIRCUMSTANCE$"          = "CIRCUMSTANCE_1",
               "^SUBCIRCUMSTANCE$"       = "SUBCIRCUMSTANCE_1",
               "^CIRCUMSTANCE_([0-9]+)"  = "OFFENDER_\\1_CIRCUMSTANCE",
               "^SUBCIRCUMSTANCE_([0-9]+)"   = "OFFENDER_\\1_SUBCIRCUMSTANCE",
               "^ORI_CODE$"              = "ORI",
               "OFFENSE_MONTH"           = "MONTH_OF_OFFENSE",
               "ACTION_TYPE"             = "TYPE_OF_ACTION",
               "^DIVISION$"              = "GEOGRAPHIC_DIVISION",
               "^HOMICIDE$"              = "HOMICIDE_TYPE")

weapons <- c("^oth -type dk$"     = "other or unknown weapon",
             "^handgun -pistol,$" = "handgun",
             "^knife - cutting$"  = "knife or cutting instrument",
             "^personal weapons$" = "personal weapons - includes beating",
             "^narcotics drugs$"  = "narcotics/drugs - includes sleeping pills",
             "^firearm,type dk$"  = "firearm, type not stated",
             "^strangulation$"    = "strangulation - includes hanging",
             "^asphyxiation-gas$" = "asphyxiation - includes death by gas",
             "^pushed - thrown$"  = "pushed or thrown out of window",
             "^pushed or thrown out window$" = "pushed or thrown out of window",
             "^oth gun-unknown$"  = "firearm, type not stated",
             "^poison - not gas$" = "poison - does not include gas",
             "^inap, only one offender$"               = NA,
             "^handgun - pistol, revolver, etc$"       = "handgun",
             "^other - type of weapon not designated$" = "other or unknown weapon",
             "^blunt object - hammer, club, etc$"      = "blunt object",
             "^narcotics or drugs, sleeping pills$"    = "narcotics/drugs - includes sleeping pills",
             "^inap$"                  = NA,
             "^handgun-pstl,etc$"      = "handgun",
             "^oth -type unk$"         = "other or unknown weapon",
             "^strangulatn-hang$"      = "strangulation - includes hanging",
             "^firearm,type unk$"      = "firearm, type not stated",
             "^pushd out wndw$"        = "pushed or thrown out of window",
             "^90$"                    = "other or unknown weapon",
             "^inap only 1 offndr$"    = NA,
             "^hndgn-pstl rvlvr etc$"  = "handgun",
             "^knfe/cttg instrmnt$"    = "knife or cutting instrument",
             "^frarm type not sttd$"   = "firearm, type not stated",
             "^narcotics/drugs$"       = "narcotics/drugs - includes sleeping pills",
             "^othr-wpn nt dsgntd$"    = "other or unknown weapon",
             "^psn-not inclde gas$"    = "poison - does not include gas",
             "^strngltn-hngng$"        = "strangulation - includes hanging",
             "^asphxtn-inlds dth gas$" = "asphyxiation - includes death by gas",
             "^frrm type not sttd$"    = "firearm, type not stated",
             "^strngltn-hangg$"        = "strangulation - includes hanging",
             "^asphyxtn-dth/gas$"      = "asphyxiation - includes death by gas",
             "^pshd/thrwn out wndw$"   = "pushed or thrown out of window",
             "^knife-cut inst$"        = "knife or cutting instrument",
             "^strangulat -hang$"      = "strangulation - includes hanging",
             "^knife/cttg instrmt$"    = "knife or cutting instrument",
             "^strangulat -hang$"      = "strangulation - includes hanging",
             "^hndgn-pstl rvlvr$"      = "handgun",
             "^othr-type nt dsgntd$"   = "other or unknown weapon",
             "^other$"                 = "other or unknown weapon",
             "^frrm not stated$"       = "firearm, type not stated",
             ".*inap.*"                = NA,
             "^knfe/cttg instrmt$"     = "knife or cutting instrument",
             "^strngltn-hanging$"      = "strangulation - includes hanging",
             "^strngltn-hngg$"         = "strangulation - includes hanging",
             "^othr-tpe wpn nt dsgd$"  = "other or unknown weapon",
             "^firarm tpe nt sttd$"    = "firearm, type not stated",
             "^asphxtn-dth gas$"       = "asphyxiation - includes death by gas",
             "^personl weapons$"       = "personal weapons - includes beating",
             "^personal weapons, includes beating$" = "personal weapons - includes beating",
             "^blnt objct$"            = "blunt object",
             "^strangulat-hang$"       = "strangulation - includes hanging",
             "^strangulation - hanging$" = "strangulation - includes hanging",
             "^frrm type nt sttd$"     = "firearm, type not stated",
             "^asphxtn-dth gas$"       = "asphyxiation - includes death by gas",
             "^personl weapons$"       = "personal weapons - includes beating",
             "^strangulation-hangg$"   = "strangulation - includes hanging",
             "^frrm not sttd$"         = "firearm, type not stated",
             "^firrm type nt sttd$"    = "firearm, type not stated",
             "^poison -not gas$"       = "poison - does not include gas",
             "^knfe/cttng instrmnt$"   = "knife or cutting instrument",
             "^other-wpn nt dsgntd$"   = "other or unknown weapon"
             )


ethnic <- c("^not of hispanic origin$"  = "not hispanic",
            "^hispanic origin$"         = "hispanic",
            "^not hispanic or latino$"  = "not hispanic",
            "^hispanic or latino$"      = "hispanic",
            "^not of hispanic$"         = "not hispanic",
            "^unknown-not rept$"        = "unknown",
            "^unknown or not reported$" = "unknown",
            "^not reported$"            = "unknown",
            "^unknown/not rpt$"         = "unknown",
            "^not of hisp orgn$"        = "not hispanic",
            "^hisp origin$"             = "hispanic",
            "^not of hisp origin$"      = "not hispanic",
            ".*inap.*"                  = NA,
            "^not hisp orgn$"           = "not hispanic",
            "^unknown or not reported by law enforceme$" = "unknown")

country_division_fix <- c("^1$"  = "New England States",
                          "^2$"  = "Middle Atlantic States",
                          "^3$"  = "East North Central States",
                          "^4$"  = "West North Central States",
                          "^5$"  = "South Atlantic States",
                          "^6$"  = "East South Central States",
                          "^7$"  = "West South Central States",
                          "^8$"  = "Mountain States",
                          "^9$"  = "Pacific States")

group_number_fix <- c("^smsa counties$"                       = "msa county",
                      "^citie bet 25-49k$"                    = "city 25,000 thru 49,999",
                      "^all cities 250k\\+$"                  = "city 250,000+",
                      "^citie bet 10-24k$"                    = "city 10,000 thru 24,999",
                      "^cit bet 2.5-9.9k$"                    = "city 2,500 thru 9,999",
                      "^cit bet 100-249k$"                    = "city 100,000 thru 249,999",
                      "^non-smsa countie$"                    = "non-msa county",
                      "^citie under 2.5k$"                    = "city under 2,500",
                      "^citie bet 50-99k$"                    = "city 50,000 thru 99,999",
                      "^msa counties 100,000 or over$"        = "msa county 100,000+",
                      "^cities 25,000 thru 49,999$"           = "city 25,000 thru 49,999",
                      "^cities 250,000 thru 499,999$"         = "city 250,000 thru 499,999",
                      "^cities 10,000 thru 24,999$"           = "city 10,000 thru 24,999",
                      "^cities 2,500 thru 9,999$"             = "city 2,500 thru 9,999",
                      "^msa counties 25,000 thru 99,999$"     = "msa county 25,000 thru 99,999",
                      "^cities 100,000 thru 249,999$"         = "city 100,000 thru 249,999",
                      "^msa counties 10,000 thru 24,999$"     = "msa county 10,000 thru 24,999",
                      "^cities under 2,500$"                  = "city under 2,500",
                      "^non-msa counties 10,000 thru 24,999$" = "non-msa county 10,000 thru 24,999",
                      "^non-msa counties 25,000 thru 99,999$" = "non-msa county 25,000 thru 99,999",
                      "^non-msa counties under 10,000$"       = "non-msa county under 10,000",
                      "^cities 50,000 thru 99,999$"           = "city 50,000 thru 99,999",
                      "^cities 500,000 thru 999,999$"         = "city 500,000 thru 999,999",
                      "^msa counties under 10,000$"           = "msa county under 10,000",
                      "^all cities 1,000,000 or over$"        = "city 1,000,000+",
                      "^non-msa counties 100,000 or over$"    = "non-msa county 100,000+",
                      "^msa co. 100,000 \\+$"                 = "msa county 100,000+",
                      "^cit 25,000-49,999$"                   = "city 25,000 thru 49,999",
                      "^cit 250,000-499,999$"                 = "city 250,000 thru 499,999",
                      "^cit 2,500-9,999$"                     = "city 2,500 thru 9,999",
                      "^cit 10,000-24,999$"                   = "city 10,000 thru 24,999",
                      "^msa co. 25,000-99,999$"               = "msa county 25,000 thru 99,999",
                      "^cit 100,000-249,999$"                 = "city 100,000 thru 249,999",
                      "^msa co. 10,000-24,999$"               = "msa county 10,000 thru 24,999",
                      "^non-msa co. 10,000-24,999$"           = "non-msa county 10,000 thru 24,999",
                      "^non-msa co. < 10,000$"                = "non-msa county under 10,000",
                      "^cit < 2,500$"                         = "city under 2,500",
                      "^non-msa co. 25,000-99,999$"           = "non-msa county 25,000 thru 99,999",
                      "^cit 50,000-99,999$"                   = "city 50,000 thru 99,999",
                      "^cit 500,000-999,999$"                 = "city 500,000 thru 999,999",
                      "^cit 1,000,000 \\+$"                   = "city 1,000,000+",
                      "^msa co. < 10,000$"                    = "msa county under 10,000",
                      "^non-msa co. 100,000 \\+$"             = "non-msa county 100,000+",
                      "^msa counties$"                        = "msa county",
                      "^cits bet 25k-50k$"                    = "city 25,000, 49,999",
                      "^cit bt 2.5k-9.9k$"                    = "city 2,500 thru 9,999",
                      "^citi bet 10k-25k$"                    = "city 10,000 thru 24,999",
                      "^cit bt 100k-250k$"                    = "city 100,000 thru 249,999",
                      "^non-msa cnties$"                      = "non-msa county",
                      "^citi bet 50k-99k$"                    = "city 50,000 thru 99,999",
                      "^msa cts 100000/over$"                 = "msa county 100,000+",
                      "^cts 25000-49999$"                     = "city 25,000 thru 49,999",
                      "^cts 250000-499999$"                   = "city 250,000 thru 499,999",
                      "^cts 10000-24999$"                     = "city 10,000 thru 24,999",
                      "^cits 2500-9999$"                      = "city 2,500 thru 9,999",
                      "^cts undr 2,500$"                      = "city under 2,500",
                      "^cts 100000-249999$"                   = "city 100,000 thru 249,999",
                      "^msa cts 10000-24999$"                 = "msa county 10,000 thru 24,999",
                      "^n-ms cts udr 10,000$"                 = "non-msa county under 10,000",
                      "^msa cts 25000-99999$"                 = "msa county 25,000 thru 99,999",
                      "^n-ms cts 10000-24999$"                = "non-msa county 10,000 thru 24,999",
                      "^n-ms cts 25000-99999$"                = "non-msa county 25,000 thru 99,999",
                      "^cts 50000-99999$"                     = "city 50,000 thru 999,999",
                      "^al cts mlln ovr$"                     = "city 1,000,000+",
                      "^cts 500000-999999$"                   = "city 500,000 thru 999,999",
                      "^ms cts udr 10000$"                    = "msa county under 10,000",
                      "^n-msa cots 10000$"                    = "non-msa county under 10,000",
                      "^non-msa st police$"                   = "non-msa state police",
                      "^msa st police$"                       = "msa state police")

race <- c("^white -chicano$"                   = "white",
         "^asian-pac island$"                  = "asian",
         "^amer indi-eskimo$"                  = "american indian or alaskan native",
         "^asian or pacific islander$"         = "asian",
         "^black or african american$"         = "black",
         "^american indian or alaska native$"  = "american indian or alaskan native",
         "^american indian$"                   = "american indian or alaskan native",
         "^asn/pac islndr$"                    = "asian",
         "^amer ind/alask nat$"                = "american indian or alaskan native",
         "^asn/pacific islander$"              = "asian",
         "^amer ind/alsk nat$"                 = "american indian or alaskan native",
         "^ameri indi-eskimo$"                 = "american indian or alaskan native",
         ".*inap.*"                            = NA)

relationship <- c("^dk,all instances$"            = "unknown",
                  "^homosexual relat$"            = "homosexual relationship",
                  "^oth -known to vi$"            = "other - known to victim",
                  "^common-law husba$"            = "common-law husband",
                  "^relationship not determined$" = "unknown",
                  "^oth-vic knows$"               = "other - known to victim",
                  "^rel not determnd$"            = "unknown",
                  "^common-law husb$"             = "common-law husband",
                  "^rltnshp not dtrmnd$"          = "unknown",
                  "^reltnshp nt dtrmnd$"          = "unknown",
                  "^other - known-vctm$"          = "other - known to victim",
                  "^homosxl rltnshp$"             = "homosexual relationship",
                  "^hmsxl rltnshp$"               = "homosexual relationship",
                  "^other-known-vctm$"            = "other - known to victim",
                  "^othr-knwn-vctm$"              = "other - known to victim",
                  "^othr-knwn-vctm$"              = "other - known to victim",
                  "^other-knwn-vctm$"             = "other - known to victim",
                  ".*inap.*"                      = NA)

circumstance <- c("^brawl - alchol$"     = "brawl due to influence of alcohol",
                  "^all instances$"      = "circumstances undetermined",
                  "^oth non-fel type$"   = "other",
                  "^argument - money$"   = "argument over money or property",
                  "^argument over money property$" = "argument over money or property",
                  "^justif homi civil$"  = "felon killed by private citizen",
                  "^justif homi poli$"   = "felon killed by police",
                  "^guncleanin death$"   = "gun cleaning death - other than self-inflicted",
                  "^lover-s triangle$"   = "lovers triangle",
                  "^all suspect felo$"   = "all suspected felony type",
                  "^oth negligen gun$"   = "other negligent handling of gun which resulted in death of another",
                  "^other negligent handling of gun$"   = "other negligent handling of gun which resulted in death of another",
                  "^narcotic drug$"      = "narcotic drug laws",
                  "^kids play withgun$"  = "children playing with gun",
                  "^oth -felony type$"   = "other felony type - not specified",
                  "^all oth manslaug$"   = "all other manslaughter by negligence except traffic deaths",
                  "^vic shot hunt ac$"   = "victim shot in hunting accident",
                  "^oth sex offense$"    = "other sex offenses",
                  "^other sex offense$"    = "other sex offenses",
                  "^brawl-narcotics$"    = "brawl due to influence of narcotics",
                  "^institution kill$"   = "institutional killings",
                  "^juv gang killing$"   = "juvenile gang killings",
                  "^prostitutio-vice$"   = "prostitution and commercialized vice",
                  "^kid kill-sitter$"    = "child killed by babysitter",
                  "^motor vehi theft$"   = "motor vehicle theft",
                  "^32$"                 = "abortion",
                  "^all suspected felony type$" = "all suspected felony type",
                  "^lover s triangle$"   = "lovers triangle",
                  "^oth negligen-gun$"   = "other negligent handling of gun which resulted in death of another",
                  "^rel not determnd$"   = "circumstances undetermined",
                  "^narc drug law$"      = "narcotic drug laws",
                  "^juv gang killngs$"   = "juvenile gang killings",
                  "^brawl-alcohol$"      = "brawl due to influence of alcohol",
                  "^kids play with gun$" = "children playing with gun",
                  "^justif homi-poli$"   = "felon killed by police",
                  "^gangland kllings$"   = "gangland killings",
                  "^gangland killing$"   = "gangland killings",
                  "^justif homi-civil$"  = "felon killed by private citizen",
                  "^prostit,com-vice$"   = "prostitution and commercialized vice",
                  "^institut killngs$"   = "institutional killings",
                  "^gun-cleaning dth$"   = "gun cleaning death - other than self-inflicted",
                  "^gun-cleaning death - other than self$" = "gun cleaning death - other than self-inflicted",
                  "^sitter klld chld$"   = "child killed by babysitter",
                  "^crcmstncs undtrmnd$" = "circumstances undetermined",
                  "^other-not specifd$"  = "other - not specified",
                  "^brwl due-inflce alcl$" = "brawl due to influence of alcohol",
                  "^argmt ovr mny/prop$"   = "argument over money or property",
                  "^fln klld prvte citn$"  = "felon killed by private citizen",
                  "^all sspctd fely tpe$"  = "all suspected felony type",
                  "^fln klld by plce$"     = "felon killed by police",
                  "^juv gng kllgs$"        = "juvenile gang killings",
                  "^brwl-inflce nrctcs$"   = "brawl due to influence of narcotics",
                  "^prstttn/cmmclzd vice$" = "prostitution and commercialized vice",
                  "^chld klld bbysttr$"    = "child killed by babysitter",
                  "^mtr vhcle thft$"       = "motor vehicle theft",
                  "^instttnl kllgs$"       = "institutional killings",
                  "^all other manslaughter by negligence except traffic deaths exc$" = "all other manslaughter by negligence except traffic deaths",
                  "^gun-cleaning death - other than self-inf$" = "gun cleaning death - other than self-inflicted",
                  "^brawl due to inflence of alcohol$"         = "brawl due to influence of alcohol",
                  "^gun-cleaning death - other than sell$"     = "gun cleaning death - other than self-inflicted",
                  ".*inap.*"               = NA,
                  "^juv gang kllngs$"      = "juvenile gang killings",
                  "^othr non-flny hom$"    = "other",
                  "^oth non-felony$"       = "other",
                  "^crcmstnces undtrmnd$"  = "circumstances undetermined",
                  "^othr flny-nt spcfd$"   = "other felony type - not specified",
                  "^fln klld by plice$"    = "felon killed by police",
                  "^fln klld/police$"      = "felon killed by police",
                  "^juv gang killings$"    = "juvenile gang killings",
                  "^other$"                = "other non-felony type - not specified",
                  "^all other manslaughter except traffic de$" = "all other manslaughter by negligence except traffic deaths",
                  "^all other manslaughter by negligence$" = "all other manslaughter by negligence except traffic deaths",
                  "^oth felony type$"      = "other felony type - not specified",
                  "^other non-felony type homicide$"      = "other felony type - not specified",
                  "^oth non-felony$"       = "other non-felony type - not specified",
                  "^justif hom-polic$"     = "felon killed by police",
                  "^victim shot in hunting accident$" = "victim shot in hunting accident",
                  "^crcmstnces undtrmnd$"  = "circumstances undetermined",
                  "^juv gng killings$"     = "juvenile gang killings",
                  "^otr nn-fl tpe hmcde$"  = "other non-felony type - not specified",
                  "^argmnt mny/prprty$"    = "argument over money or property",
                  "^fln klld by police$"   = "felon killed by police",
                  "^brwl-inflnce alchl$"   = "brawl due to influence of alcohol",
                  "^brwl-inflnce nrctcs$"  = "brawl due to influence of narcotics",
                  "^instttnl kllngs$"      = "institutional killings",
                  "^all sspctd fel type$"  = "all suspected felony type",
                  "^otr flny tp-nt spfd$"  = "other felony type - not specified",
                  "^vehicle theft$"        = "motor vehicle theft",
                  "^fln klld/prvte ctzn$"  = "felon killed by private citizen",
                  "^chld klld/bbysttr$"    = "child killed by babysitter",
                  "^prstttn/cmmrclzd vce$" = "prostitution and commercialized vice",
                  "^othr non-flny hmcde$"  = "other non-felony type - not specified",
                  "^othr flny-not spcfd$"  = "other felony type - not specified",
                  "^brwl-inflnce narcs$"   = "brawl due to influence of narcotics",
                  "^fln klld police$"      = "felon killed by police",
                  "^argmnt mny prop$"      = "argument over money or property",
                  "^brwl-inflnce alc$"     = "brawl due to influence of alcohol",
                  "brwl-inflnce narcs"     = "brawl due to influence of narcotics",
                  "^ganglnd killngs$"      = "gangland killings",
                  "^all sspctd flny$"      = "all suspected felony type",
                  "^argmnt mney propy$"    = "argument over money or property",
                  "^fln klld/priv ctzn$"   = "felon killed by private citizen",
                  "^brawl - alcohol$"      = "brawl due to influence of alcohol",
                  "^all suspec felon$"     = "all suspected felony type",
                  "^justi homi civil$"     = "felon killed by private citizen",
                  "^justi homi polic$"     = "felon killed by police")

subcircumstance <- c("^fel kil com crim$" = "felon killed in commission of a crime",
                     "^fel attack polic$" = "felon attacked police officer",
                     "^not enough infor$" = "not enough information to determine",
                     "^fel attack civil$" = "felon attacked a civilian",
                     "^fel attem flight$" = "felon attempted flight from a crime",
                     "^fel resisted arr$" = "felon resisted arrest",
                     "^fel att fel offe$" = "felon attacked fellow police officer",
                     ".*inap.*"           = NA,
                     "^killd during crm$" = "felon killed in commission of a crime",
                     "^fel resist arres$" = "felon resisted arrest",
                     "^fel attack pers$"  = "felon attacked police officer",
                     "^nt engh info-dtrmne$"  = "not enough information to determine",
                     "^fln attd plce offcr$"  = "felon attacked police officer",
                     "^fln klld cmmssn crme$" = "felon killed in commission of a crime",
                     "^fln attckd cvln$"      = "felon attacked a civilian",
                     "^fln attd fllw plce off$" = "felon attempted flight from a crime",
                     "^fln rsstd arrst$"        = "felon resisted arrest",
                     "^fln attmtd flght crme$"  = "felon attempted flight from a crime",
                     "^inp nt jstifble hmcde$"  = NA,
                     "^killed durng crm$"       =    "felon killed in commission of a crime",
                     "^felon attacked a civilian$" = "felon attacked a civilian",
                     "^fln attkd plce off$"        = "felon attacked police officer",
                     "^fln resisted arrest$"       = "felon resisted arrest",
                     "^fln klld cmmssn crime$"     = "felon killed in commission of a crime",
                     "^fln attkd fllw plce off$"   = "felon attacked fellow police officer",
                     "^not engh info-dtrmne$"      = "not enough information to determine",
                     ".*inap.*"                    = NA,
                     "^fln attckd pol off$"        = "felon attacked police officer",
                     "^fln rsstd arrest$"          = "felon resisted arrest",
                     "^fln attkd fll pl off$"      = "felon attacked fellow police officer",
                     "^inp nt jstfble hmcde$"      = NA,
                     "^fln attckd plce off$"       = "felon attacked police officer",
                     "^fln attckd cvlian$"         = "felon attacked a civilian",
                     "^fln attckd fllw plce$"      = "felon attacked fellow police officer",
                     "^fln attckd pol off$"        = "felon attacked police officer",
                     "^fln rsstd arrest$"          = "felon resisted arrest",
                     "^fln attkd fll pl off$"      = "felon attacked fellow police officer" ,
                     "^inp nt jstfble hmcde$"      = NA
                     )

age <- c("^7 dys old-364 dys$"    = "7 days to 364 days",
         "^7 dys old-364 days$"   = "7 days to 364 days",
         "^7 days old to 364 days old$" = "7 days to 364 days",
         "^brth-6 days old$"      = "birth to 6 days, including abandoned infant",
         "^99 yrs old/more$"      = "99 years or older",
         "^birth to 1 yr ol$"     = "0",
         "^99 yrs old - mor$"     = "99 years or older",
         "^99 years or more$"     = "99 years or older",
         "^99 years old or more$" = "99 years or older",
         "^1 year old$"           = "1",
         "^nb$"                   = "birth to 6 days, including abandoned infant",
         "^birth to 6 days old$"  = "birth to 6 days, including abandoned infant",
         "^out of universe$"      = NA,
         "^age unknown$"          = NA,
         ".*inap.*"               = NA,
         ".*inp.*"                = NA)

sex <- c(".*inap.*" = NA,
         "^$"         = "")


homicide_type <- c("murder nonneglig" = "murder and nonnegligent manslaughter",
                   "mansl by neglig" = "manslaughter by negligence",
                   "mrdr/nn-nglgt mnsltr" = "murder and nonnegligent manslaughter")

situation <- c("singl vic-mul of"     = "single victim/multiple offenders",
               "single vict-offe"     = "single victim/single offender",
               "multiple vic-off"     = "multiple victims/multiple offenders",
               "single vic un of"     = "single victim/unknown offender(s)",
               "mult vic-sin off"     = "multiple victims/single offender",
               "mult vic-unk off"     = "multiple victims/unknown offender(s)",
               "sngl vict-1 offe"     = "single victim/single offender",
               "sngl vic unk off"     = "single victim/unknown offender(s)",
               "sngle vctm/sngle off" = "single victim/single offender",
               "snge vtm/unkwn off/s" = "single victim/unknown offender(s)",
               "mtple vtms/sngl off"  = "multiple victims/single offender",
               "mlt vcts/unkwn off/s" = "multiple victims/unknown offender(s)",
               "snge vctm/mult offs"  = "single victim/multiple offenders",
               "mple vtms/mult offs"  = "multiple victims/multiple offenders" )

population <- c("^$"                      = "",
                "unknown or not reported" = NA,
                "unknwn/not reprtd"       = NA)
jacobkap/crime_data documentation built on April 16, 2023, 11:58 p.m.