oscn_compile_definfo <- function(courts, casetypes, file_years) {
courts <- courts[courts %in% oscn_counties]
qoscn <- function(table, fields) {
dbGetQuery(ojo_db, glue_sql(
"SELECT {`fields`*}
FROM {`table`}
WHERE court IN ({courts*})
AND casetype IN ({casetypes*})
AND file_year IN ({file_years*})",
.con = ojo_db)) %>%
mutate_at(vars(contains("_date")), list(ymd))
}
connect_ojo()
di <- qoscn("oscn_parties", c("court", "casenum", "party_id", "party_name", "party_type")) %>%
filter(party_type %in% c("Defendant", "DEFENDANT")) %>%
rename(def_id = party_id, defname = party_name) %>%
left_join(qoscn("oscn_caseinfo", c("court", "casenum", "file_date"))) %>%
left_join(qoscn("oscn_party_address", c("court", "casenum", "def_id", "def_address", "def_zip", "rec_date"))) %>%
mutate(datedif = abs(rec_date - file_date),
def_address = str_squish(def_address) %>%
str_remove_all("[^[[:alpha:]]|,| ]") %>%
str_remove(" U(S| )A$")) %>%
group_by(court, casenum, def_id) %>%
filter(datedif == min(datedif)) %>%
group_by(court, casenum, def_id, defname) %>%
summarize(def_address = first(def_address[which(!is.na(def_address) & def_address != "")]),
def_zip = first(def_zip[which(!is.na(def_zip))])) %>%
left_join(qoscn("oscn_party_profile", c("court", "casenum", "def_id", "def_mob")) %>%
filter(!is.na(def_mob)) %>%
group_by(court, casenum, def_id) %>%
summarize(def_mob = first(def_mob))) %>%
mutate(firstname = str_extract(defname, "(?<=, ).*?(?= |$)"),
middlename = str_extract(defname, "(?<= )[[:alpha:]]*?$"),
b_year = str_sub(def_mob, 1, 4) %>%
as.numeric)
print("Defendant data queried.")
gender_tbl <- dbReadTable(ojo_db, "ojo_gender")
dip <- di %>%
ungroup %>%
select(firstname, b_year) %>%
distinct() %>%
anti_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(firstname = name, b_year))
sex_df <- tibble()
print("Getting gender data for first names . . .")
total_time <- 0
if (nrow(dip) > 0) {
for (i in 1:nrow(dip)) {
start_time <- Sys.time()
if (is.na(dip[i, "b_year"])) {
sex_df <- sex_df %>%
bind_rows(gender(dip[i, "firstname"] %>% as.character(),
years = c(1950, 2000),
method = "ssa") %>%
mutate(gender = as.character(gender)))
} else {
sex_df <- sex_df %>%
bind_rows(gender(dip[i, "firstname"] %>% as.character(),
years = dip[i, "b_year"] %>% as.numeric,
method = "ssa") %>%
mutate(gender = as.character(gender)))
}
total_time <- total_time + (Sys.time() - start_time)
print(paste(i, "of", nrow(dip), "done. About",
if_else(round((total_time/i)*(nrow(dip) - i), 1) < 60,
paste(round((total_time/i)*(nrow(dip) - i), 1), "seconds remaining."),
paste(round((total_time/i)*(nrow(dip) - i)/60, 1), "minutes remaining.")))
)
}
}
gender_tbl <- gender_tbl %>%
bind_rows(sex_df %>%
mutate(newrow = TRUE))
round2 <- di %>%
ungroup %>%
select(firstname, middlename, b_year) %>%
left_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(firstname = name, b_year, proportion_male)) %>%
filter(proportion_male > .25 & proportion_male < .75) %>%
select(middlename, b_year) %>%
filter(nchar(middlename) > 1) %>%
distinct
middle_sex <- tibble()
total_time <- 0
print("Searching for middle names")
if (nrow(round2) > 0) {
for (i in 1:nrow(round2)) {
start_time <- Sys.time()
if (is.na(round2[i, "b_year"])) {
middle_sex <- middle_sex %>%
bind_rows(gender(round2[i, "middlename"] %>% as.character(),
years = c(1950, 2000),
method = "ssa") %>%
mutate(gender = as.character(gender)))
} else {
middle_sex <- middle_sex %>%
bind_rows(gender(round2[i, "middlename"] %>% as.character(),
years = round2[i, "b_year"] %>% as.numeric,
method = "ssa") %>%
mutate(gender = as.character(gender)))
}
total_time <- total_time + (Sys.time() - start_time)
print(paste(i, "of", nrow(round2), "middle names done. About",
if_else(round((total_time/i)*(nrow(round2) - i), 1) < 60,
paste(round((total_time/i)*(nrow(round2) - i), 1), "seconds remaining."),
paste(round((total_time/i)*(nrow(round2) - i)/60, 1), "minutes remaining.")))
)
}
}
gender_tbl <- gender_tbl %>%
bind_rows(middle_sex %>%
mutate(newrow = TRUE))
dbWriteTable(ojo_db, "ojo_gender",
gender_tbl %>%
filter(newrow == TRUE) %>%
select(-newrow),
row.names = FALSE, append = TRUE)
di <- di %>%
left_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(firstname = name, b_year, proportion_male, gender) %>%
distinct)
di <- di %>%
left_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(middlename = name, b_year, proportion_male_middle = proportion_male, gender_middle = gender) %>%
distinct)
di_all <<- di %>%
mutate(sex_imputed = case_when(proportion_male < .25 | proportion_male > .75 ~ gender,
proportion_male_middle < .25 | proportion_male_middle > .75 ~ gender_middle,
TRUE ~ "unknown"),
sex_impute_method = case_when(proportion_male < .25 | proportion_male > .75 ~ "first name",
proportion_male_middle < .25 | proportion_male_middle > .75 ~ "middle name",
TRUE ~ "none")) %>%
filter(!str_detect(defname, "OSBI|PROFILE")) %>%
mutate(casetype = str_sub(casenum, 1, 2),
file_year = str_sub(casenum, 4, 7))
dbxUpsert(ojo_db, "ojo_def_info", di_all, where_cols = c("court", "casenum", "defname"))
disconnect_ojo()
}
odcr_compile_definfo <- function(courts, casetypes, file_years) {
courts <- courts[courts %in% odcr_counties]
qodcr <- function(table, fields) {
dbGetQuery(ojo_db, glue_sql(
"SELECT {`fields`*}
FROM {`table`}
WHERE court IN ({courts*})
AND casetype IN ({casetypes*})
AND file_year IN ({file_years*})",
.con = ojo_db)) %>%
mutate_at(vars(contains("_date")), list(ymd))
}
connect_ojo()
di <- qodcr("odcr_party", c("court", "casenum", "party", "party_type")) %>%
filter(party_type == "Defendant") %>%
select(-party_type) %>%
rename(defname = party) %>%
left_join(qodcr("odcr_party_profile", c("court", "casenum", "def_mob", "def_address", "def_zip")) %>%
mutate(def_address = str_remove(def_address, "\\s{5}.*|\\d{5}") %>%
str_squish())) %>%
mutate(firstname = str_extract(defname, "(?<=, ).*?(?= |$)"),
middlename = str_extract(defname, "(?<= )[[:alpha:]]*?$"),
b_year = str_sub(def_mob, 1, 4) %>%
as.numeric)
gender_tbl <- dbReadTable(ojo_db, "ojo_gender")
dip <- di %>%
ungroup %>%
select(firstname, b_year) %>%
distinct() %>%
anti_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(firstname = name, b_year))
sex_df <- tibble()
print("Getting gender data for first names . . .")
total_time <- 0
if (nrow(dip) > 0) {
for (i in 1:nrow(dip)) {
start_time <- Sys.time()
if (is.na(dip[i, "b_year"])) {
sex_df <- sex_df %>%
bind_rows(gender(dip[i, "firstname"] %>% as.character(),
years = c(1950, 2000),
method = "ssa") %>%
mutate(gender = as.character(gender)))
} else {
sex_df <- sex_df %>%
bind_rows(gender(dip[i, "firstname"] %>% as.character(),
years = dip[i, "b_year"] %>% as.numeric,
method = "ssa") %>%
mutate(gender = as.character(gender)))
}
total_time <- total_time + (Sys.time() - start_time)
print(paste(i, "of", nrow(dip), "done. About",
if_else(round((total_time/i)*(nrow(dip) - i), 1) < 60,
paste(round((total_time/i)*(nrow(dip) - i), 1), "seconds remaining."),
paste(round((total_time/i)*(nrow(dip) - i)/60, 1), "minutes remaining.")))
)
}
}
gender_tbl <- gender_tbl %>%
bind_rows(sex_df %>%
mutate(newrow = TRUE,
gender = as.character(gender)))
round2 <- di %>%
ungroup %>%
select(firstname, middlename, b_year) %>%
left_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(firstname = name, b_year, proportion_male)) %>%
filter(proportion_male > .25 & proportion_male < .75) %>%
select(middlename, b_year) %>%
filter(nchar(middlename) > 1) %>%
distinct
middle_sex <- tibble()
total_time <- 0
print("Searching for middle names")
if (nrow(round2) > 0) {
for (i in 1:nrow(round2)) {
start_time <- Sys.time()
if (is.na(round2[i, "b_year"])) {
middle_sex <- middle_sex %>%
bind_rows(gender(round2[i, "middlename"] %>% as.character(),
years = c(1950, 2000),
method = "ssa") %>%
mutate(gender = as.character(gender)))
} else {
middle_sex <- middle_sex %>%
bind_rows(gender(round2[i, "middlename"] %>% as.character(),
years = round2[i, "b_year"] %>% as.numeric,
method = "ssa") %>%
mutate(gender = as.character(gender)))
}
total_time <- total_time + (Sys.time() - start_time)
print(paste(i, "of", nrow(round2), "middle names done. About",
if_else(round((total_time/i)*(nrow(round2) - i), 1) < 60,
paste(round((total_time/i)*(nrow(round2) - i), 1), "seconds remaining."),
paste(round((total_time/i)*(nrow(round2) - i)/60, 1), "minutes remaining.")))
)
}
}
gender_tbl <- gender_tbl %>%
bind_rows(middle_sex %>%
mutate(newrow = TRUE,
gender = as.character(gender)))
connect_ojo()
dbWriteTable(ojo_db, "ojo_gender",
gender_tbl %>%
filter(newrow == TRUE) %>%
select(-newrow),
row.names = FALSE, append = TRUE)
di <- di %>%
left_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(firstname = name, b_year, proportion_male, gender) %>%
distinct)
di <- di %>%
left_join(gender_tbl %>%
mutate(b_year = ifelse(year_min == year_max, year_min, NA)) %>%
select(middlename = name, b_year, proportion_male_middle = proportion_male, gender_middle = gender) %>%
distinct)
di_all <<- di %>%
mutate(sex_imputed = case_when(proportion_male < .25 | proportion_male > .75 ~ gender,
proportion_male_middle < .25 | proportion_male_middle > .75 ~ gender_middle,
TRUE ~ "unknown"),
sex_impute_method = case_when(proportion_male < .25 | proportion_male > .75 ~ "first name",
proportion_male_middle < .25 | proportion_male_middle > .75 ~ "middle name",
TRUE ~ "none")) %>%
filter(!str_detect(defname, "OSBI|PROFILE")) %>%
mutate(casetype = str_sub(casenum, 1, 2),
file_year = str_sub(casenum, 4, 7))
dbxUpsert(ojo_db, "ojo_def_info", di_all, where_cols = c("court", "casenum", "defname"))
disconnect_ojo()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.