R/gender.R

#' Determines the gender of a name based on findings on the website nameberry.com
#' @param names Names to determine gender
#' @return out A list of the same size as names.  Its entries can be any of "male", "female", "both", or "none", depending on where the names are found in the relevant text files
#' @export
determine_genders <- function(names){
  boy.names <- readLines(system.file("extdata", "boys.txt", package="WilliamsStaff"))
  girl.names <- readLines(system.file("extdata", "girls.txt", package="WilliamsStaff"))

  return(
    sapply(names, function(name){
      male   <- check_name(name, boy.names)
      female <- check_name(name, girl.names)
      if(!male){
        if(!female){
          "not listed"
        } else{
          "female"
        }
      } else{
        if(!female){
          "male"
        } else{
          "both"
        }
      }
      })
  )
}

#' Checks if a given name is present in a list of names.  Names with special characters might not be detected
#' @param n The target name
#' @param list The universal list of names
#' @return present A boolean indicating whether n is found in the list
check_name <- function(n, list){
  full_name = stringr::str_split(n, ",")[[1]][1]
  full_name = stringr::str_trim(gsub("[^a-zA-z ]", "", full_name))
  first_name = stringr::str_split(full_name, " ")[[1]][1]
  #note: start&end depend on the specific setup of nameberry.com.  Changes to the formatting of this website could lead to the obsoletion of this code
  pattern <- paste0(first_name, ",") #pattern is constructed to prevent false positives (such as a name being a substring of another name).
  return(!is.integer0(grep(pattern, list))) #integer(0) indicates that the pattern is NOT found
}

#' Determins if an input is integer(0)
#' @param x Any input
#' @return boolean TRUE iff the input is integer(0)
is.integer0 <- function(x){
  is.integer(x) && length(x) == 0L
}
PhilBrockman/WilliamsStaff documentation built on May 8, 2019, 1:33 a.m.