#' Hispanic identity imputation from last name.
#' @return The percent of individuals who identified themselfs as hispanics in the US Census 2000
#' @description Hispanic origin imputation from the individual's last name using the US 2000 Census data.
#' @param last.name The individual's last name \cr \cr
#' @examples
#' hispCensus2000("ShemTov")
#' hispCensus2000("Shem Tov")
#' hispCensus2000(c("ShemTov","li","Londono","smith"))
#' @import dplyr
#' @export
hispCensus2000 = function(last.name){
if (is.character(last.name)==FALSE){
warning("The last name is not a character variable and the function will return an NA")
}
# Move to character and save original names inserted to the function:
last.name = as.character(last.name)
last.name.original = last.name
# Move to upper case letters to match census records:
last.name = toupper(last.name)
# Match to census names
index.match = match(last.name,census00$name)
# generate missing values for the non-matched names
if (sum(is.na(index.match))>0){
results.na = data.frame(name.last=last.name[which(is.na(index.match))],
percent.hispanic = NA,
hispanic.impute=NA,
count=NA
)
}
# find hispanic for the matched names
results <- census00 %>%
slice(index.match) %>%
mutate(
hispanic.impute = as.numeric(percent.hispanic > 50)
) %>%
select(name.last, percent.hispanic, hispanic.impute, count)
# Adding the non-matched names to the results data frame
if (sum(is.na(index.match))>0){
results = rbind(results,results.na)
}
results = results[order(results$name.last),]
# Add the original names:
results$last.name.original = last.name.original[order(last.name.original)]
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.