phd_degrees = c("PHD", "DMA")
ma_degrees = c("MA", "MFA", "MED", "MS", "MM", "MMA", "PHM")
ba_degrees = c("BA", "BS", "AB", "BM", "BE", "BFA", "BPHIL")
#' calculates the standard error of a list
#' @param l a list of numbers
#' @return se the standard error
#' @export
std_error <- function(l){
return(sd(l)/sqrt(length(l)))
}
#' Returns all accepted notation for an undergrad degree
#' @return ba_degrees a list
get_ba_degrees <- function(){
return(ba_degrees)
}
#' Finds the year that professor was registered as faculty. Needs append_year=TRUE
#' @param person a faculty member
#' @return year the year that person taught
get_prof_year <- function(person){
person <- gsub("[^0-9]", "", person) #remove all non-digit chaff
return(as.integer(substr(person, nchar(person)-3, nchar(person)))) #return the last 4 digits, as the year is appended at the end
}
#' Determines whether a given string contains information about schooling based on the presence of a year
#' @param str String in question
#' @return bool Wheteher or not the string contains a type of degree and a year
contains_school_info <- function(str){
str <- gsub("[^[:graph:]^[:space:]]", "", str)
str <- gsub("[-]", " ", str)
arr <- stringr::str_split(str, " ")[[1]]
all_degrees <- c(phd_degrees, ma_degrees, ba_degrees)
year <- sum(grepl("[0-9]{4}", arr)) > 0 #has a year present
degree <- length(intersect(sanitize(all_degrees), sanitize(arr))) > 0 #has at least one degree listed
return(year & degree)
}
#' Helper function to determine if a string is a page number. Assumes that page numbers are three digits, as all page numbers are three digits long in this section of the data
#' @param str String in question
#' @return bool Page number found?
is_page_number <- function(str){
return(grepl("\\b[0-9]{3}\\b", str))
}
#' Helper function to determine if a string is a professor based on the presence of professor keywords
#' @param person String in question
#' @return num The number of instances of "professor words"
is_this_a_professor <- function(person){
professor_keywords <- c("Professor", "rofessor", "Professorship", "Fellow", "Lecturer", "Librarian", "Theatre Production Manager", "Director", "Instructor", "Artist-in-Residence", "Artist in Residence")
return(sum(sapply(professor_keywords, function(x) grepl(sprintf('\\b%s\\b', x), person))) > 0)
}
#' Makes input suitable for searching by converting to upper case and removing any non-letter characters
#' @param str String to mold
#' @return str String with no non-letter characters
sanitize <- function(str){
str <- toupper(str)
str <- gsub("[^[:alpha:]]", "", str)
return(str)
}
#' Determines whether a string contains sufficient information to be considered a full faculty member -- that is, does the string contain a title and a year of receipt of a degree?
#' @param person a string
#' @return bool Whether person meets the requirements
is_suitable_faculty_candidate <- function(person){
if(is.null(person)){
return(FALSE)
}
return(sum(is_this_a_professor(person)) > 0 &
sum(contains_school_info(person)) > 0)
}
#' applies stringr::str_trim to an array
#' @param a Array (containing strings)
#' @return trim_a a trimmed up Array
str_trim_arr <- function(a){
a <- lapply(a, function(x) stringr::str_trim(x))
return(a)
}
#' splits and trims an array
#' @param str string to manipulate
#' @return arr converted string
split_trim <- function(str){
clean <- stringr::str_split(str, ",")[[1]]
clean <- str_trim_arr(clean)
return(clean)
}
#' returns a RegEx to match any of the undergraduate degrees
#' @return pattern
ba_pattern <- function(){
return(paste0("^", ba_degrees, "$", collapse="|"))
}
#' returns a RegEx to match any master degrees
#' @return pattern
ma_pattern <- function(){
return(paste0("^", ma_degrees, "$", collapse="|"))
}
#' returns a RegEx to match any doctorates
#' @return pattern
phd_pattern <- function(){
return(paste0("^", phd_degrees, "$", collapse="|"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.