R/dept_of_education.R

Defines functions .munge_doe_names .dictionary_doe_names

.dictionary_doe_names <-
  function() {
    tibble(name_doe = c("ID", "OPEID", "Institution Name", "City", "State", "Foreign Gift Received Date",
                        "Foreign Gift Amount", "Gift Type", "Country of Giftor", "Giftor Name"
    ),
    name_actual = c("row", "idOPE", "nameInstitution", "cityInstitution", "stateInstitution", "dateGift",
                    "amountGift", "typeGift", "countryGiftor", "nameGiftor"
    )

    )
  }

.munge_doe_names <-
  function(data) {
    dict_names <- .dictionary_doe_names()
    doe_names <-
      names(data)

    actual_names <-
      doe_names %>%
      map_chr(function(x) {
        df_row <-
          dict_names %>%
          filter(name_doe == x) %>%
          distinct() %>%
          slice(1)
        if (nrow(df_row) == 0) {
          glue::glue("Missing {x}") %>% message()
          return(x)
        }

        df_row$name_actual
      })

    data %>%
      set_names(actual_names)
  }

# foreign_aid -------------------------------------------------------------

#' Foreign Donations to U.S. Universities
#'
#' Returns all foreign donations to United States
#' unversities from foreign entities.
#'
#' @param snake_names if `TRUE` returns snake names
#' @param clean_entities if `TRUE` cleans entities
#' @param return_message if `TRUE` retuns a message
#'
#' @return `tibble`
#' @export
#'
#' @examples
#' us_university_foreign_donations()
us_university_foreign_donations <-
  memoise::memoise(function(snake_names = T, clean_entities = T, return_message = T) {
    data <-
      download_excel_file(
        "https://studentaid.gov/sites/default/files/fsawg/datacenter/library/ForeignGifts.xls"
      )
    ed_names <- data %>% slice(2) %>% as.character()
    data <- data %>% slice(3:nrow(data)) %>%
      setNames(ed_names)

    data <-
      .munge_doe_names(data = data) %>%
      select(-one_of("row"))

    data <- data %>%
      mutate(
        dateGift = as.numeric(dateGift) %>% janitor::excel_numeric_to_date(),
        idOPE = as.numeric(idOPE),
        yearGift = year(dateGift),
        nameGiftor = nameGiftor %>% coalesce("ANONYMOUS")
      ) %>%
      munge_data(clean_address = T,
                 unformat = T,
                 snake_names = F)

    data <- data %>%
      mutate(isAnonymousGift = nameGiftor %>% str_detect("ANONYMOUS|ANON DONOR|ANONYOUS|ANONYNMOUS"))


    if (clean_entities) {
      data <-
        data %>%
        entities::refine_columns(entity_columns = "nameGiftor")

      data <- data %>%
        mutate(
          nameGiftorClean = case_when(
            isAnonymousGift ~ "ANONYMOUS",
            nameGiftorClean %>% str_detect("CRRC INDUSTRIAL|CRRD") ~ "CRRC INDUSTRIAL",
            nameGiftorClean %>% str_detect("EMIRATES INSTITUTE FOR ADVANCED") ~ "EMIRATES INSTITUTE FOR ADVANCED SCIENCES",
            nameGiftorClean %>% str_detect("ANT FINANCIAL") ~ "ANT FINANCIAL",

            nameGiftorClean %>% str_detect("MOHAMMED A JAMEEL") ~ "MOHAMMED A JAMEEL",
            nameGiftorClean %>% str_detect("ANON DONOR|ANONYNMOUS") ~ "ANONYMOUS",
            nameGiftorClean %>% str_detect("QATAR FOUNDATION") ~ "QATAR FOUNDATION",
            nameGiftorClean %>% str_detect("ARAMCO") ~ "ARAMCO",
            nameGiftorClean %>% str_detect("ASTELLAS PHARMA") ~ "ASTELLAS PHARMA",
            nameGiftorClean %>% str_detect("ASTRA ZENECA|ASTRAZENECA") ~ "ASTRAZENECA",
            nameGiftorClean %>% str_detect("BASF") ~ "BASF",
            nameGiftorClean %>% str_detect("BAYER") ~ "BAYER",
            nameGiftorClean %>% str_detect("BEIJING GUANGHUI JIN") ~ "BEIJING GUANGHUI JINTONG",
            nameGiftorClean %>% str_detect("BIOCON") ~ "BIOCON ACADEMY",
            nameGiftorClean %>% str_detect("BMW") ~ "BMW",
            nameGiftorClean %>% str_detect("BOEHRING|BOEHRINGER") ~ "BOEHRINGER INGELHEIM",
            nameGiftorClean %>% str_detect("^BP |^BP-") ~ "BP",
            nameGiftorClean %>% str_detect("BRITISH AMERICAN TOBACCO") ~ "BRITISH AMERICAN TOBACCO",
            nameGiftorClean %>% str_detect("BURROUGHS WELLCOME") ~ "BURROUGHS WELLCOME",
            nameGiftorClean %>% str_detect("CAF AMERICAN DONOR") ~ "CAF AMERICAN DONOR",
            nameGiftorClean %>% str_detect("CAMBRIDGE UNIV") ~ "CAMBRIDGE UNIVERSITY",
            nameGiftorClean %>% str_detect("CELGENE") ~ "CELGENE INTERNATIONAL",
            nameGiftorClean %>% str_detect("CENTRE FOR THE AIDS PROGRAMME") ~ "CENTRE FOR THE AIDS PROGRAMME",
            nameGiftorClean %>% str_detect("CHEUNG KONG GRADUATE SCHOOL") ~ "CHEUNG KONG GRADUATE SCHOOL OF BUSINESS",
            nameGiftorClean %>% str_detect("CHIANG CHEN INDUSTRIAL") ~ "CHIANG CHEN INDUSTRIAL FOUNDATION",
            nameGiftorClean %>% str_detect("CHILDREN'S INVESTMENT FUND") ~ "CHILDREN'S INVESTMENT FUND",
            nameGiftorClean %>% str_detect("CHINA MEDICAL UNIVERSITY") ~ "CHINA MEDICAL UNIVERSITY",
            nameGiftorClean %>% str_detect("CHINA FAW GROUP") ~ "CHINA FAW GROUP",
            nameGiftorClean %>% str_detect("CHINA UNIVERSITY OF MINING") ~ "CHINA UNIVERSITY OF MINING",
            nameGiftorClean %>% str_detect(
              "CHINESE LANG COUNCIL|CHINESE LANGUAGE COUNCIL|HANBAN|CONFUCIOUS|CONFUCIUS|CONFUCIUS"
            ) ~ "CONFUCIOUS INSTITUTE - HANBAN",
            nameGiftorClean %>% str_detect("CHINESE CENTER FOR DISEASE CONTROL|CHINESE CDC") ~ "CHINESE CENTER FOR DISEASE CONTROL",
            nameGiftorClean %>% str_detect("CHRIST UNIVERSITY") ~ "CHRIST UNIVERSITY",
            nameGiftorClean %>% str_detect("CHUGAI PHARMA") ~ "CHUGAI PHARMACEUTICAL",
            nameGiftorClean %>% str_detect("CIMMYT") ~ "CIMMYT",
            nameGiftorClean %>% str_detect("CITY UNIVERSITY") ~ "CITY UNIVERSITY OF HONG KONG",
            nameGiftorClean %>% str_detect("CONSEJO NACIONAL") ~ "CONSEJO NACIONAL DE CIENCIA",
            nameGiftorClean %>% str_detect("COUNCIL OF AMERICA EDUCATION|COUNCIL OF AMERICAN EDUCATION") ~ "COUNCIL OF AMERICAN EDUCATION",
            nameGiftorClean %>% str_detect("NOVO NORDISK|NOVO-NORDISK") ~ "NOVO NORDISK",
            nameGiftorClean %>% str_detect(
              "ROYAL EMBASSY SAUDI|ROYAL EMB OF SAUDI|CUL MISSION OF THE ROYAL EMBASSY SA"
            ) ~ "ROYAL EMBASSY OF SAUDI ARABIA",
            nameGiftorClean %>% str_detect("HUAWEI") ~ "HUAWEI",
            nameGiftorClean %>% str_detect("SANOFI") ~ "SANOFI",
            nameGiftorClean %>% str_detect("MASTERCARD") ~ "MASTERCARD FOUNDATION",
            nameGiftorClean %>% str_detect("UNIVERSITY OF QUEENSLAND") ~ "UNIVERSITY OF QUEENSLAND",
            nameGiftorClean %in% c("GOVERNMENT OF SAUDI ARABIA", "SAUDI ARABIA") ~ "GOVERNMENT OF SAUDI ARABIA",
            nameGiftorClean %>% str_detect("GLAXO SMITH|GLAXO GROUP|GLAXOSMITHKLINE|GSK") ~ "GLAXOSMITHKLINE",
            nameGiftorClean %>% str_detect("ARAMCO") ~ "ARAMCO",
            nameGiftorClean %>% str_detect("BOMBARDIER") ~ "BOMBARDIER",
            nameGiftorClean %>% str_detect("ELSEVIER") ~ "ELSEVIER",
            nameGiftorClean %>% str_detect("^SHIRE") ~ "SHIRE",
            nameGiftorClean %>% str_detect(
              "SINO-AMERICAN CORP ON HIGHER EDUC|SINO-AMERICAN COOPERATION ON HE"
            ) ~ "SINO-AMERICAN CORP ON HIGHER EDUCATION",
            nameGiftorClean %>% str_detect("TENCENT") ~ "TENCENT",
            TRUE ~ nameGiftorClean
          )
        )
    }

    if (return_message) {
      amt <- data$amountGift %>% sum(na.rm = T) %>% currency(digits = 0)
      from <- data$dateGift %>% min()
      to <- data$dateGift %>% max()
      count_inst <-
        data %>% distinct(nameInstitution) %>% nrow() %>% comma(digits = 0)
      count_countries <- data %>% distinct(countryGiftor) %>% nrow()

      glue(
        "\n\n{green({amt})} gifted between {red({from})} and {red({to})} across {yellow(count_inst)} institutions amongst {magenta(count_countries)} countries\n"
      ) %>% message()
    }

    if (snake_names) {
      data <- clean_names(data)

    }

    data
  })
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.