knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(magrittr)
library(stringr)
library(tidyr)
library(njoaguof)

Read the raw data

agency_2021_csv <- "2021_NJOAGLEOD_Agency_Information.csv"
officer_2021_csv <- "2021_NJOAGLEOD_Officer_Information.csv"

agency_2021 <- system.file("extdata", agency_2021_csv, package="njoagleod") %>%
  readr::read_csv()
officer_2021 <- system.file("extdata", officer_2021_csv, package="njoagleod") %>%
  readr::read_csv()

Clean up the agency table

Rename the columns and add a year column.

agency <- agency_2021 %>%
  rename(agency_county=County,
         agency_name=Agency,
         agency_type=`Agency Type`,
         hiring_governed_by_civil_service=`Hiring governed by Civil Service?`) %>%
  mutate(year=2021) %>% 
  relocate(year)

agency_county

We want to use the same counties as the US census (and package njoaguof). In the source data, non-county agencies use "Statewide Or Regional LEA". as the county. We will map this to NA. We put this in a function, because we will do this twice -- once for the agency table, and once for the officer table.

fix_agency_county <- function(tbl) {
  county_levels <- levels(njoaguof::incident$agency_county)
  stopifnot(
    "Statewide Or Regional Lea County" ==
      tbl %>% pull(agency_county) %>% str_to_title() %>%
      paste0(" County") %>% setdiff(county_levels)
  )
  tbl %>%
    mutate(agency_county=str_to_title(agency_county)) %>%
    mutate(agency_county=paste0(agency_county, " County")) %>%
    mutate(agency_county=factor(agency_county, county_levels))
}

agency <- agency %>% fix_agency_county()

agency_name

We want to use the same agency names as the njoaguof package, so we will do some cleanup here. Because we need to do this for both the agency and officer tables, we put it in a function.

fix_agency_name <- function(tbl) {
  tbl %>%
    mutate(
      agency_name = case_when(
        agency_name == "Sussex Co Sheriff'S Dept" ~ "Sussex Co Sheriffs Dept",
        agency_name == "Cumberland Co Sheriff'S Dept" ~ "Cumberland Co Sheriffs Dept",
        agency_name == "Hunterdon Co Sheriff'S Dept" ~ "Hunterdon Co Sheriffs Dept",
        agency_name == "Burlington County Sheriffs Off" ~ "Burlington Co Sheriffs Office",
        agency_name == "Mercer Co Prosecutor Offfice" ~ "Mercer Co Prosecutors Office",
        agency_name == "Hudson Co Prosecutor Office" ~ "Hudson Co Prosecutors Office",
        agency_name == "Somerset County Sheriff's Office" ~ "Somerset Co Sheriffs Office",
        agency_name == "Totowa Boro  PD" ~ "Totowa Boro PD",
        agency_name == "New Jersey Institute Of Technol" ~ "New Jersey Institute Of Technology",
        agency_name == "NJ Div Criminal Justice - Trenton" ~ "NJ Division of Criminal Justice",
        agency_name == "Toms River Township" ~ "Toms River Twp PD",
        agency_name == "Hardyston Twp Police Dept" ~ "Hardyston Twp PD",
        agency_name == "Linden Police Department" ~ "Linden PD",
        TRUE ~ agency_name
      )
    )
}

agency <- agency %>% fix_agency_name()

agency_type

There are four values we expect here. We encode this as a factor and check that there are no rows which don't match.

agency_type_levels <- c("State-wide", "County", "Municipal", 
                        "Multiple Municipalities", "Not Provided")
agency <- agency %>%
  mutate(agency_type = factor(agency_type, agency_type_levels))

stopifnot(0 == agency %>% filter(is.na(agency_type)) %>% nrow())

The Chester Police Department is incorrectly classified as municipal. It is a multi-municipality agency covering Chester Borough and Chester Township. We make that correction here.

agency <- agency %>%
  mutate(agency_type = if_else(
    agency_name == "Chester PD", 
    factor("Multiple Municipalities", agency_type_levels), agency_type))

hiring_governed_by_civil_service

This is a yes/no field (with some inconsistency in captialization) which we convert to a boolean. "Not provided" and other values are mapped to NA.

agency <- agency %>% 
  mutate(hiring_governed_by_civil_service = 
           str_to_lower(hiring_governed_by_civil_service)) %>%
  mutate(hiring_governed_by_civil_service=case_when(
    hiring_governed_by_civil_service == "yes" ~ TRUE,
    hiring_governed_by_civil_service == "no" ~ FALSE,
    TRUE ~ NA
  )) 

For municipal agencies, add agency_municipality, if possible

Most municipal agency names are of the form <town-name> PD, but the <town-name does not match exactly with the name in the US census. We want to add the US Census name if it can be determined from the agency name.

First, lets verify that the the rule that municipal agency names end in "PD" has only three exceptions:

stopifnot(
  setequal(
    agency %>% 
      filter(agency_type == "Municipal") %>%
      filter(!str_detect(agency_name, "PD$")) %>%
      pull(agency_name),
    c("Cherry Hill Campus Police", 
      "Stevens Institute Of Technology", 
      "Princeton University Police")
  )
)

Next, lets initialize a lookup table which we will fill up incrementally. We include only municipal agencies that end in "PD". We temporarily add the column agency_no_pd, which is just the agency name minus the "PD" extension.

municipality_lookup <- agency %>% 
  filter(agency_type == "Municipal") %>%
  filter(str_detect(agency_name, " PD$")) %>%
  mutate(agency_no_pd = str_remove(agency_name, " PD$")) %>%
  mutate(municipality = NA_character_) %>%
  select(agency_county, agency_name, agency_no_pd, municipality)
municipality_lookup

The following function will take a partially filled in municipality_lookup table with an added column -- test_municipality -- and compare it to the municipality table. Any matching row will be updated.

add_test_municipalities <- function(municipality_lookup) {
  municipality_lookup %>% 
    filter(is.na(municipality)) %>%
    select(agency_name, county=agency_county, municipality=test_municipality) %>%
    inner_join(njoagleod::municipality, by=c("county", "municipality")) %>%
    right_join(municipality_lookup, by=c("agency_name", "county"="agency_county")) %>%
    mutate(municipality = if_else(
      is.na(municipality.y), 
      municipality.x, 
      municipality.y)) %>%
    select(agency_county=county, agency_name, agency_no_pd, municipality)
}

Now we try variations on the agency_no_pd column to fill out the municipality_lookup table. We leave in the commented out filter(is.na(municipality)) lines to emphasize the iterative nature of this step.

municipality_lookup <- municipality_lookup %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = paste0(agency_no_pd, " city")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = paste0(agency_no_pd, " town")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = paste0(agency_no_pd, " village")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = paste0(agency_no_pd, " borough")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = paste0(agency_no_pd, " township")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = str_replace(agency_no_pd, "City", "city")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = str_replace(agency_no_pd, "Twp", "township")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = str_replace(agency_no_pd, "Boro", "borough")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = str_replace(agency_no_pd, "Town", "town")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = str_replace(agency_no_pd, "Village", "village")) %>%
  add_test_municipalities() %>% 
  #filter(is.na(municipality))
  mutate(test_municipality = paste0(agency_no_pd, " City city")) %>%
  add_test_municipalities()

For most of the remaining municipalities, we can set the municipality manually.

municipality_lookup <- municipality_lookup %>% 
  mutate(municipality = case_when(
    agency_name == "Mt. Ephraim PD" ~ "Mount Ephraim borough",
    agency_name == "Orange PD" ~ "City of Orange township",
    agency_name == "South Orange PD" ~ "South Orange Village township",
    agency_name == "Avon-By-The-Sea PD" ~ "Avon-by-the-Sea borough",
    agency_name == "Bayhead Boro PD" ~ "Bay Head borough",
    agency_name == "Oceangate Boro PD" ~ "Ocean Gate borough",
    agency_name == "Peapack-Gladstone PD" ~ "Peapack and Gladstone borough",
    TRUE ~ municipality
  ))

At this point, there are two known exceptional agencies left. Let us remove them and then verify that the table is complete, and that every municipality in the lookup table is found in the municipality table. We also don't need the agency_no_pd column anymore, so we will remove it.

municipality_lookup <- municipality_lookup %>% 
  filter(!(agency_name %in%
             c("Monmouth University PD",
               "Washington Park PD"))) %>%
  select(-agency_no_pd)

# Check there are no mor NAs
stopifnot(0 ==
            municipality_lookup %>% filter(is.na(municipality)) %>% nrow())

# Check that every municipality is in the lookup table
stopifnot(0 ==
            municipality_lookup %>%
            anti_join(njoagleod::municipality, by = c("agency_county" = "county", "municipality")) %>%
            nrow())

Finally, we can use the lookup table to add a municipality column to the agency table.

agency <- agency %>% 
  left_join(municipality_lookup, by=c("agency_county", "agency_name"))

Clean up the officer table

Rename the columns and add a year column.

officer <- officer_2021 %>%
  rename(agency_county=County,
         agency_name=Agency,
         officer_age=Age,
         officer_race=`Race/Ethnicity`,
         officer_gender=Gender
         ) %>%
  mutate(year=2021) %>% 
  relocate(year)

agency_county and agency_name

We reuse the functions created to the agency table.

officer <- officer %>% 
  fix_agency_county() %>% 
  fix_agency_name()

officer_age

No change.

officer_race

We use the same levels as the njoaguof package, mapping "Not provided" to NA. We will do a check to make sure that we are not missing any values when we do this mapping.

# Save this value for the check at the end
officer_race_not_provided <- officer %>% 
  filter(officer_race=="Not provided") %>%
  nrow()

# Now do the mapping
race_levels <- levels(incident$officer_race)
officer <- officer %>% 
  mutate(officer_race = str_to_title(officer_race)) %>%
  mutate(officer_race = case_when(
    officer_race == "American Indian Or Alaska Native" ~ "American Indian",
    officer_race == "Black Or African American" ~ "Black",
    officer_race == "Native Hawaiian Or Other Pacific Islander" ~ "Native Hawaiian or other Pacific Islander",
    officer_race == "Not Provided" ~ NA_character_,
    officer_race == "Two Or More Races" ~ "Two or more races",
    TRUE ~ officer_race
  )) %>% 
  mutate(officer_race=factor(officer_race, levels=race_levels))

# Check that we did not introduce any more `NA`s than we meant to.
stopifnot(
  officer_race_not_provided == 
    officer %>% filter(is.na(officer_race)) %>% nrow()
)

officer_gender

We will use this same levels as the njoaguof package, mapping "No Provided" to NA. We will do a check to make sure that we are not missing any values when we do this mapping.

# Save this value for the check at the end
officer_gender_not_provided <- officer %>% 
  filter(officer_gender=="Not Provided") %>%
  nrow()

# Now do the mapping
gender_levels <- levels(njoaguof::incident$officer_gender)
officer <- officer %>% 
  mutate(officer_gender=case_when(
    officer_gender == "Not Provided" ~ NA_character_,
    officer_gender == "X Or Non-Binary" ~ "Gender Non-Conforming/X",
    TRUE ~ officer_gender
  )) %>%
  mutate(officer_gender=factor(officer_gender, levels=gender_levels))

# Check that we did not introduce any more `NA`s than we meant to.
stopifnot(
  officer_gender_not_provided == 
    officer %>% filter(is.na(officer_gender)) %>% nrow()
)

Final cleanup and consistency checks

Duplicate entries for DRBA PD

The "Delaware River and Bay Authority Police Department" is listed twice, associated with two different counties, with with two entries for each officer.

drbapd <- "Delaware River and Bay Authority Police Department"
agency %>% filter(agency_name==drbapd)
officer %>% filter(agency_name==drbapd) %>% 
  group_by(agency_county) %>%
  count()

We will keep just the Salem County records, and change the agency_county to NA.

# In agency, filter out the non-Salem County DRBA agencies.
agency <- agency %>% filter(agency_name != drbapd |
                    (agency_name == drbapd &
                       agency_county == "Salem County"))
# Now replace Salem County with NA
agency$agency_county[agency$agency_name == drbapd] <- NA

# Now do the same for the officer table
officer <- officer %>% filter(agency_name != drbapd |
                              (agency_name == drbapd &
                                 agency_county == "Salem County"))
officer$agency_county[officer$agency_name == drbapd] <- NA

Integrity check: uniqueness of primary key in agency table

Each combination of year, agency_county and agency_name should appear at most one time.

stopifnot(
  0 ==
    agency %>%
    group_by(year, agency_county, agency_name) %>%
    count() %>%
    filter(n > 1) %>%
    nrow()
)

Integrity check: foreign key in officer table

Every year, agency_county and agency_name appearing in officer should also appear in agency.

stopifnot(
  0 ==
    officer %>%
    select(year, agency_county, agency_name) %>%
    unique() %>%
    anti_join(agency, by = names(.)) %>%
    nrow()
)

Integrity check: Every agency has at least one officer

For every year, agency_county and agency_name combination appearing in the agency table, check that there is at least one entry in the officer table.

stopifnot(
  0 ==
    agency %>%
    select(year, agency_county, agency_name) %>%
    anti_join(officer, by = names(.)) %>%
    nrow()
)

Consistency with njoaguof.

Ideally, every agency in the njoaguof package would be reflected in the the agency table. However, the NJ OAG LEOD dataset excludes corrections officers. And even after we filter out corrections officers from the njoaguof dataset, there are still some missing agencies.

Let's check that the only missing agencies are the various corrections departments, plus a short list of known exceptions.

stopifnot(
  njoaguof::incident %>%
    select(agency_county, agency_name) %>%
    unique() %>%
    anti_join(agency, by = names(.)) %>%
    filter(!str_detect(agency_name, "Corrections")) %>%
    pull(agency_name) %in%
    c(
      "Toms River Township",
      "Hardyston Twp Police Dept",
      "Linden Police Department",
      "Freehold Boro PD",
      "Washington Twp PD",
      "Boonton Twp PD",
      "College Of New Jersey",
      "State Parole Board"
    ) %>%
    all()
)


tor-gu/njoagleod documentation built on Jan. 9, 2023, 5:41 a.m.