# Faculty-related info fetches and gets
#
#
#' Returns a list of faculty from the college with one professor per line
#' @param year Desired year of interest
#' @param append_year If true, appends '|||####' where #### is the year to each faculty member item. Otherwise, it does not
#' @return list a list of professors working at Williams during the year in question
#' @export
collect_faculty <- function(year, append_year=FALSE){
raw_data <- find_data_by_year(year)
fragment_bucket <- c() #used for picking up partial lines
faculty <- c() #initially empty faculty list
for(current in raw_data){
if(!is_suitable_faculty_candidate(fragment_bucket)){ #does the existing fragment already contain a full candidate?
fragment_bucket <- paste(fragment_bucket, current, sep=", ")
} else{
if(is_this_a_professor(current)){ #both fragment_bucket and current refer to different profs
if(append_year){
fragment_bucket <- paste0(fragment_bucket, "|||", year)
}
faculty <- c(faculty, fragment_bucket) #so append the old prof to the list of faculty
fragment_bucket <- current
} else{
fragment_bucket <- paste(fragment_bucket, current, sep=", ")
}
}
}
# return(faculty)
return(sapply(faculty, function(x) general_format(x, year)))
}
#' Converts the formatting of the different PDFs to a common form
#' That form is Name, Title, degree, year, degree, year, etc.
#' @param person A line of content to describe a person
#' @param year The year the person information was found
#' @return general_form A comma separated list with years preceeding the degrees
#' @export
general_format <- function(person, year){
#2013: William G. Wagner, Brown Professor of History, 1974, BPhil, Oxford University, 1981, PHD, Oxford University
#2012: " Magnus T. Bernhardsson,Professor of History--B.A. (1990) University of Iceland, Ph.D. (1999) Yale"
# first step is to clean off any non-letters from the beginning of the string (*)
person <- gsub("^[^[:alpha:]]+", "", person)
seperator <- ", "
if(year == 2013){
clean <- stringr::str_split(person, ",")[[1]]
clean <- stringr::str_trim(iconv(clean, "latin1", "ASCII", sub=""))
years <- grepl("^[0-9]{4}$", clean) #find the years
degrees <- taRifx::shift(grepl("^[0-9]{4}$", clean), -1) #degrees come after years in 2013
temp <- clean
temp[years] <- clean[degrees]
temp[degrees] <- clean[years]
return(stringr::str_c(temp, collapse = seperator))
} else if(year %in% 2000:2012){
person <- gsub("\\(([0-9]{4})\\)", " \\1 ", person) #removes the '(' and the ')' around a year
person <- gsub("\\b([0-9]{4})\\b", ", \\1,", person) #adds commas around years
#turn the dash/semicolon into a regular seperater
person <- gsub("--", seperator, person)
person <- gsub(";", seperator, person)
#remove any trailing/leading spaces
clean <- stringr::str_split(person, seperator)[[1]]
clean <- stringr::str_trim(clean)
return(stringr::str_c(person, collapse = seperator))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.