R/china.R

Defines functions .uw_universities us_espionage_cases ccp_thousand_talents .additional_data .parse_history .munge_ccp_names .dictionary_ccp_names

Documented in ccp_thousand_talents us_espionage_cases

.dictionary_ccp_names <-
  function() {
    tibble(
      name_macro = c(
        "member_id",
        "name",
        "xing_ming",
        "age",
        "nian_ling",
        "organizations",
        "suo_shu_ji_gou",
        "full_committee_member_zhong_guo_gong_chan_dang_zhong_yang_wei_yuan_hui_wei_yuan",
        "alternate_committee_member_zhong_guo_gong_chan_dang_zhong_yang_wei_yuan_hui_hou_bu_wei_yuan",
        "politburo_standing_committee_zhong_guo_gong_chan_dang_zhong_yang_zheng_zhi_ju_chang_wei_hui",
        "politburo_zhong_guo_gong_chan_dang_zhong_yang_zheng_zhi_ju",
        "central_military_commission_zhong_guo_gong_chan_dang_zhong_yang_jun_shi_wei_yuan_hui",
        "position",
        "zhi_wu",
        "previous_position",
        "qian_zhi_wu",
        "gender",
        "xing_bie",
        "ethnicity",
        "min_zu",
        "place_of_ancestry",
        "ji_guan",
        "province_of_ancestry",
        "ji_guan_sheng_fen",
        "rank",
        "ling_dao_zhi_wu_ceng_ci",
        "province_of_employment",
        "gong_zuo_de_dian_sheng_fen",
        "sector_of_employment",
        "gong_zuo_dan_wei",
        "headshot"
      ),
      name_actual = c(
        "idMember",
        "nameMember",
        "xing_ming",
        "ageMember",
        "nian_ling",
        "namesOrganizations",
        "suo_shu_ji_gou",
        "isFullCommitteeMember",
        "isAlternateComitteeMember",
        "isPolitburoStandingCommitteeMember",
        "isPolitboroMember",
        "isCentralCommissionMember",
        "namePosition",
        "zhi_wu",
        "namePositionPrevious",
        "qian_zhi_wu",
        "genderMember",
        "xing_bie",
        "ethnicityMember",
        "min_zu",
        "locationMemberAncestry",
        "ji_guan",
        "proviceMemberAncestry",
        "ji_guan_sheng_fen",
        "memberRank",
        "ling_dao_zhi_wu_ceng_ci",
        "provinceMemberEmployment",
        "gong_zuo_de_dian_sheng_fen",
        "sectorEmployment",
        "gong_zuo_dan_wei",
        "urlHeadshot"
      )

    )
  }

.munge_ccp_names <-
  function(data) {
    names_dict <- names(data)

    dict <- .dictionary_ccp_names()
    actual_names <-
      names_dict %>%
      map_chr(function(name) {
        df_row <-
          dict %>% filter(name_macro == name)
        if (nrow(df_row) == 0) {
          glue::glue("Missing {name}") %>% message()
          return(name)
        }

        df_row$name_actual
      })

    data %>%
      set_names(actual_names)
  }


# macro_polo --------------------------------------------------------------

.parse_history <-
  function() {

  }

.additional_data <- function() {
  json <-
    fromJSON("https://paulson.gistapp.com/committee-members/ancillary-data/")

  json %>% select(id, positions)
}

#' Chinese Communist Party Committee Members
#'
#' Returns information about purported CCP
#' committee members from as reported by
#' \url{https://macropolo.org/digital-projects/the-committee/}{Macro Polo}
#'
#' @param snake_names if `TRUE` returns snake names
#' @param include_mandarin if `TRUE` keeps Mandarin columns
#'
#' @return `tibble`
#' @export
#'
#' @examples
#' ccp_committee_members()
ccp_committee_members <-
  memoise::memoise(function(snake_names = T, include_mandarin = F) {
    json <- fromJSON(
      "https://paulson.gistapp.com/committee-members/gallery/bind-data?limit=9999",
      flatten = F,
      simplifyDataFrame = T
    )

    data <- as_tibble(json$data$rowParams)

    data <-
      data %>%
      select(-matches("Order|Field")) %>%
      clean_names()


    data <- .munge_ccp_names(data)
    if (!include_mandarin) {
      data <- data %>%
        select(-one_of(
          c(
            "xing_ming",
            "nian_ling",
            "suo_shu_ji_gou",
            "zhi_wu",
            "qian_zhi_wu",
            "xing_bie",
            "min_zu",
            "ji_guan",
            "ji_guan_sheng_fen",
            "ling_dao_zhi_wu_ceng_ci",
            "gong_zuo_de_dian_sheng_fen",
            "gong_zuo_dan_wei"
          )
        ))
    }

    data <- data %>%
      munge_data() %>%
      mutate_at(c("idMember", "ageMember"),
                readr::parse_number)

    urls <- json$data$nestedImagePath
    urlHeadshotFile =
      json$data$scrapedImageUrl %>% map_chr(URLencode)

    data <-
      data %>%
      mutate(
        urlHeadshot = urls,
        urlHeadshotFile,
        countOrganizations = namesOrganizations %>% str_count("\\,") + 1,
        namesOrganization = namesOrganizations %>% str_replace_all("\\,", "\\ | "),
        countPositions = namePosition %>% str_count(" / ") + 1,
        countPositionsPrior = namePositionPrevious %>% str_count(" / ") + 1
      ) %>%
      mutate_at(c("namePosition", "namePositionPrevious"),
                list(function(x) {
                  x %>% str_replace_all(" / ", " | ")
                }))

    if (snake_names) {
      data <- data %>%
        clean_names()
    }

    data
  })

# thousand talents ---------------------------------------------------------

#' Chinese Communist Party 1000 Talents Members
#'
#' Crowd-sourced thousand talents participants
#'
#' @param snake_names if `TRUE` returns snake case names
#'
#' @return \code{tibble()}
#' @family china
#' @export
#'
#' @examples

ccp_thousand_talents <-
  function(snake_names = F) {
    data <-
      read_csv("https://asbcllc.com/r_packages/govtrackR/data/thousand_talents.tsv.gz")

    if (snake_names) {
      data <- clean_names(data)
    }

    data
  }


# AEI ---------------------------------------------------------------------

#' AEI Chinese foreign investments
#'
#' Acquires all Chinese foreign investments and construction contracts
#' from the American Enterprise Institute
#'
#' @param include_incomplete_transactions if `TRUE` include incomplete transaction
#' @param snake_names if `TRUE` return snake case columns
#' @param unformat if `TRUE` unformat currency columns
#' @param return_message return a messsage
#' @family china
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' aei_chinese_investments()
aei_chinese_investments <-
  memoise::memoise(function(include_incomplete_transactions = T,
                            snake_names = F,
                            unformat = F,
                            return_message = T) {
    page <-
      read_html("https://www.aei.org/china-global-investment-tracker/")
    url <- page %>% html_nodes("p strong a") %>% html_attr("href")

    tmp <-
      tempfile()
    curl::curl_download(url, tmp)

    sheets <- tmp %>% readxl::excel_sheets()

    df_types <- tibble(
      sheet = 1:3,
      type = c(
        "Foreign Investment",
        "Construction Contract",
        "Uncompleted Transaction"
      )
    )

    all_data <-
      1:3 %>%
      map_dfr(function(x) {
        data <- tmp %>% read_excel(sheet = x)
        type <- df_types %>% dplyr::slice(x) %>% pull(type)


        data <-
          data %>%
          dplyr::slice(6:nrow(data))

        if (x == 1) {
          data <-
            data %>%
            setNames(
              c(
                "yearTransaction",
                "monthTransaction",
                "namesChineseEntities",
                "amountInvestment",
                "pctShare",
                "namesEntityInvestment",
                "sectorInvestment",
                "subSectorInvestment",
                "countryInvestment",
                "regionInvestment",
                "isBRI",
                "isGreenfield"
              )
            )
        }

        if (x == 2) {
          data <-
            data %>%
            setNames(
              c(
                "yearTransaction",
                "monthTransaction",
                "namesChineseEntities",
                "amountInvestment",
                "pctShare",
                "namesEntityInvestment",
                "sectorInvestment",
                "subSectorInvestment",
                "countryInvestment",
                "regionInvestment",
                "isBRI"
              )
            )
        }

        if (x == 3) {
          data <-
            data %>%
            setnames(
              c(
                "yearTransaction",
                "monthTransaction",
                "namesChineseEntities",
                "amountInvestment",
                "namesEntityInvestment",
                "sectorInvestment",
                "subSectorInvestment",
                "countryInvestment",
                "regionInvestment",
                "isGreenfield",
                "isBRI"
              )
            )
        }

        data <-
          data %>%
          mutate(
            typeTransaction = type,
            dateTransaction = glue("{yearTransaction}-{monthTransaction}-01") %>% ymd()  %m+% months(1) - 1
          ) %>%
          select(
            typeTransaction,
            yearTransaction,
            monthTransaction,
            dateTransaction,
            namesChineseEntities,
            namesEntityInvestment,
            everything()

          )

        data
      })

    all_data <-
      all_data %>%
      mutate(
        isCompletedTransaction = typeTransaction != "Uncompleted Transaction",
        amountInvestment = as.numeric(amountInvestment) * 1000000,
        yearTransaction = as.numeric(yearTransaction),
        isGreenfield = case_when(isGreenfield == "0" ~ F,
                                 isGreenfield %in% c("1", "G") ~ T,
                                 TRUE ~ F),
        isBRI = case_when(isBRI == "0" ~ F,
                          isBRI %in% c("1", "G") ~ T,
                          TRUE ~ F)
      )

    df_pct <-
      all_data %>%
      filter(!is.na(pctShare)) %>%
      distinct(pctShare)

    df_pct <-
      df_pct$pctShare %>%
      map_dfr(function(pct_share) {
        pct <-
          pct_share %>% str_split("\\,") %>% flatten_chr() %>% .[[1]]
        if (pct %>% str_detect("%")) {
          pct <- pct %>% readr::parse_number() / 100
          data <- tibble(pctShare = pct_share,
                         pctShareActual = pct)
          return(data)
        }
        pct <- readr::parse_number(pct_share)
        if (pct > 1) {
          pct <- pct / 100
        }

        data <- tibble(pctShare = pct_share,
                       pctShareActual = pct)
        data
      })

    all_data <-
      all_data %>%
      left_join(df_pct, by = "pctShare") %>%
      select(-one_of("pctShare")) %>%
      rename(pctShare = pctShareActual) %>%
      mutate(isMajorityPurchase  = pctShare >= .5,
             isCompletePurchase = pctShare == 1) %>%
      select(isCompletedTransaction , one_of(names(all_data)))

    tmp %>%
      unlink()

    all_data <-
      all_data %>%
      mutate(
        amountEnterpriseValueTransaction = case_when(!is.na(pctShare) ~ amountInvestment / pctShare,
                                                     TRUE ~ NA_real_)
      )

    all_data <-
      all_data %>%
      mutate(
        isChineseConsortium = str_to_upper(namesChineseEntities) %>% str_detect("CONSORT|GROUP OF"),
        namesChineseEntities = namesChineseEntities %>% str_remove_all("-led consortium$| led consortium$| consortium$")
      ) %>%
      separate(
        namesChineseEntities,
        into = c("namesChineseEntities", "detailsChineseEntites"),
        sep = "\\(",
        extra = "merge",
        fill = 'right'
      ) %>%
      mutate_at(c("namesChineseEntities", "detailsChineseEntites"),
                list(function(x) {
                  x %>% str_remove_all("\\)") %>% str_squish()
                }))


    all_data <-
      all_data %>%
      mutate(
        namesChineseEntities = namesChineseEntities %>% str_replace_all("\\ AND |/", "\\, ") %>%
          str_replace_all(
            "STATE DEVELOPMENT, INVESTMENT CORP",
            "STATE DEVELOPMENT & INVESTMENT CORP"
          ) %>%
          str_replace_all("WUHAN IRON, STEEL", "WUHAN IRON & STEEL")
      ) %>%
      mutate(
        namesChineseEntities = case_when(
          namesChineseEntities %>% str_detect("ANBANG") ~ "ANBANG",
          namesChineseEntities %>% str_detect("QINGDAO HENGSHUN") ~ "QINGDAO HENGSHUN ZHONGSHENG",
          namesChineseEntities %>% str_detect("SAILUN") ~ "SAILUN TIRE",
          namesChineseEntities %>% str_detect("SHANDA") ~ "SHANDA GROUP",
          namesChineseEntities %>% str_detect("WUXI PHARMA") ~ "WUXI PHARMATECH",
          namesChineseEntities %>% str_detect("ZHEJIANG HUAYOU") ~ "ZHEJIANG HUAYOU COBALT",
          namesChineseEntities %>% str_detect("SHANDONG ELECTRIC POWER CONSTRUCTION") ~  "SHANDONG ELECTRIC POWER",
          namesChineseEntities %>% str_detect("SHANGHAI MUNICIPAL")  ~ "SHANGHAI MUNICIPAL INVESTMENT",
          namesChineseEntities %>% str_detect("SHANDONG HEAVY") ~ "SHANDONG HEAVY",
          namesChineseEntities %>% str_detect("SHENHUA'S WATERMARK") ~ "SHENHUA",
          namesChineseEntities %>% str_detect("SHOUGANG") ~ "SHOUGANG GROUP",
          namesChineseEntities %>% str_detect("SUNING") ~ "SUNING APPLIANCE",
          namesChineseEntities %>% str_detect("SHANDONG LINGLONG") ~ "SHANDONG LINGLONG TIRE",
          namesChineseEntities %>% str_detect("SHANGHAI CONSTRUCTION") ~ "SHANGHAI CONSTRUCTION GROUP",
          namesChineseEntities %>% str_detect("GREENLAND") ~ "SHANGHAI GREENLAND",
          namesChineseEntities %>% str_detect("GROUP OF SHANGHAI STATE") ~ "SHANGHAI STATE INVESTMENT GROUP",
          namesChineseEntities %>% str_detect("GUANGDONG RISING") ~ "GUANGDONG RISING ASSET MANAGEMENT",
          namesChineseEntities %>% str_detect("BOSAI") ~ "BOSAI MINERALS",
          namesChineseEntities %>% str_detect("HANGZHOU GREAT STAR") ~ "HANGZHOU GREAT STAR INDUSTRIAL",
          namesChineseEntities %>% str_detect("CHINA MINGSHEN INVESTMENT") ~ "CHINA MINSHENG INVESTMENT",
          namesChineseEntities %>% str_detect("ENVIVSION ENERGY") ~ "ENVISION ENERGY",
          namesChineseEntities %>% str_detect("GREAT WALL MOTOR") ~ "GREAT WALL MOTORS",
          namesChineseEntities %>% str_detect("LUXSHARE") ~ "LUXSHARE PRECISION",
          namesChineseEntities %>% str_detect("TIDFORE HEAVY") ~ "TIDFORE HEAVY	EQUIPMENT",
          namesChineseEntities == "CHEM CHINA" ~ "CHEMCHINA",
          namesChineseEntities == "DONGFANG" ~ "DONGFANG ELECTRIC",
          namesChineseEntities  == "EXIM BANK" ~ "EX-IM BANK",
          namesChineseEntities  == "HUANENG" ~ "HUANENG POWER",
          namesChineseEntities  == "TEBIAN ELECTRIC" ~ "TEBIAN ELECTRIC APPARATUS",
          namesChineseEntities  == "HUMANWELL" ~ "HUMANWELL HEALTHCARE",
          namesChineseEntities  == "JINCHUAN GROUP" ~ "JINCHUAN",
          namesChineseEntities  == "LUYE" ~ "LUYE GROUP",
          namesChineseEntities  == "JIQUAN IRON, STEEL" ~ "JIQUAN IRON & STEEL",
          namesChineseEntities  == "TSINGHUA" ~ "TSINGHUA UNIGROUP",
          namesChineseEntities  == "WISON" ~ "WISON ENERGY",
          namesChineseEntities %in% c("HEBEI IRON", "HEBEI IRON, STEEL", "HEBEI STEEL") ~ "HEBEI IRON & STEEL",
          TRUE ~ namesChineseEntities
        )
      )

    all_data <-
      all_data %>%
      mutate(idTransaction = 1:n()) %>%
      select(idTransaction, everything())

    df_entities <- all_data %>%
      select(idTransaction, namesChineseEntities) %>%
      separate_rows(namesChineseEntities, sep = "\\, ") %>%
      rename(nameChineseEntities = namesChineseEntities)

    df_entities <-
      df_entities %>%
      entities::refine_columns(entity_columns = "nameChineseEntities")

    df_entities <- df_entities %>%
      mutate(
        nameChineseEntitiesClean = case_when(
          nameChineseEntitiesClean %>% str_detect("ANBANG") ~ "ANBANG",
          nameChineseEntitiesClean %>% str_detect("CITIC") ~ "CITIC",
          nameChineseEntitiesClean %>% str_detect("COUNTRY GARDEN") ~ "COUNTRY GARDEN HOLDINGS",
          nameChineseEntitiesClean %>% str_detect("DONGFANG|DONGFENG") ~ "DONGFANG ELECTRIC",
          nameChineseEntitiesClean %>% str_detect("FUJIAN CONSTRUCTION") ~ "FUJIAN CONSTRUCTION ENGINEERING",
          nameChineseEntitiesClean %>% str_detect("GUANGDONG RISING ASSET") ~ "GUANGDONG RISING ASSET MANAGEMENT",
          nameChineseEntitiesClean %>% str_detect("HEBEI CONSTRUCTION") ~ "HEBEI CONSTRUCTION",
          nameChineseEntitiesClean %>% str_detect("HOPU") ~ "HOPU INVESTMENT",
          nameChineseEntitiesClean %>% str_detect("JIN JIANG") ~ "JIN JIANG HOTELS",
          nameChineseEntitiesClean %>% str_detect("LUXSHARE") ~ "LUXSHARE PRECISION",
          nameChineseEntitiesClean %>% str_detect("POWER CONSTRUCTION") ~ "POWER CONSTRUCTION CORP",
          nameChineseEntitiesClean %>% str_detect("SHANGHAI MUNICIPAL") ~ "SHANGHAI MUNICIPAL GOVERNMENT",
          nameChineseEntitiesClean %>% str_detect("SHOUGANG") ~ "SHOUGANG GROUP",
          nameChineseEntitiesClean %>% str_detect("SUNING") ~ "SUNING APPLIANCE",
          nameChineseEntitiesClean %>% str_detect("TSINGSHAN") ~ "TSINGSHAN STEEL",
          nameChineseEntitiesClean == "GREENLAND" ~ "SHANGHAI GREENLAND",
          TRUE ~ nameChineseEntitiesClean
        )
      )  %>%
      select(idTransaction, nameChineseEntities = nameChineseEntitiesClean)

    df_entities <-
      df_entities %>% group_by(idTransaction) %>% summarise(
        countChineseEntities = n(),
        namesChineseEntities = unique(nameChineseEntities) %>% sort() %>% str_c(collapse = " | ")
      ) %>%
      ungroup()

    all_data <- all_data %>%
      select(-namesChineseEntities) %>%
      left_join(df_entities, by = "idTransaction") %>%
      select(one_of(names(all_data)), everything())

    all_data <- all_data %>%
      mutate(
        regionInvestment = case_when(
          regionInvestment %>% str_detect("Africa") ~ "Africa",
          TRUE ~ regionInvestment
        )
      )

    all_data <- all_data %>%
      mutate(
        isUnknownInvestmentEntity = is.na(namesEntityInvestment),
        namesEntityInvestment = str_to_upper(namesEntityInvestment)
      )

    all_data <-
      all_data %>%
      mutate(
        namesEntityInvestment = case_when(
          namesEntityInvestment %>% str_detect("PYONGYANG INVESTMENT AND DEVELOPMENT ") ~ "PYONGYANG INVESTMENT & DEVELOPMENT",
          namesEntityInvestment %>% str_detect("IRAN WATER AND POWER") ~ "IRAN WATER & POWER",
          namesEntityInvestment %>% str_detect("INTERSTATE HOTEL AND RESORTS") ~ "INTERSTATE HOTEL & RESORTS",
          namesEntityInvestment %>% str_detect("GEORGIAN OIL AND GAS") ~ "GEORGIAN OIL & GAS",
          namesEntityInvestment %>% str_detect("FISHER AND PAYKEL") ~ "FISHER & PAYKEL",
          namesEntityInvestment %>% str_detect("SOKHNA REFINERY AND PETROCHEMICALS") ~ "SOKHNA REFINERY & PETROCHEMICALS",
          namesEntityInvestment %>% str_detect("CHINA LIGHT AND POWER") ~ "CHINA LIGHT & POWER",
          namesEntityInvestment %>% str_detect("BANKO INDUSTRIAL AND COMERCIAL") ~ "BANKO INDUSTRIAL & COMERCIAL",
          namesEntityInvestment %>% str_detect("AUSTRALIA AND NEW ZEALAND BANKING") ~ "AUSTRALIA & NEW ZEALAND BANKING",
          TRUE ~ namesEntityInvestment
        ),
        namesEntityInvestment =           namesEntityInvestment %>% str_replace_all(" AND ", ", ")
      )

    df_entities <-
      all_data %>%
      select(idTransaction, namesEntityInvestment) %>%
      separate_rows(namesEntityInvestment, sep = "\\, ") %>%
      rename(nameEntityInvestment = namesEntityInvestment) %>%
      filter(!is.na(nameEntityInvestment))

    df_entities <-
      df_entities %>%
      entities::refine_columns(entity_columns = "nameEntityInvestment")


    df_entities <-
      df_entities %>%
      mutate(
        nameEntityInvestmentClean = case_when(
          nameEntityInvestmentClean %>% str_detect("ACCOR") ~ "ACCOR HOTELS",
          nameEntityInvestmentClean %>% str_detect("ACWA") ~ "ACWA POWER",
          nameEntityInvestmentClean %>% str_detect("ARAMCO") ~ "ARAMCO",
          nameEntityInvestmentClean %>% str_detect("ATHABASCA") ~ "ATHABASCA OIL CORP",
          nameEntityInvestmentClean %>% str_detect("BACCARAT") ~ "BACCARAT HOTELS",
          nameEntityInvestmentClean %>% str_detect("BINTANG") ~ "BINTANG DELAPAN",
          nameEntityInvestmentClean %>% str_detect("BROOKEFIELD|BROOKFIELD") ~ "BROOKFIELD",
          nameEntityInvestmentClean %>% str_detect("CAPE LAMBERT") ~ "CAPE LAMBERT IRON",
          nameEntityInvestmentClean %>% str_detect("CARLYLE") ~ "CARLYLE GROUP",
          nameEntityInvestmentClean %>% str_detect("CBRE") ~ "CBRE",
          nameEntityInvestmentClean %>% str_detect("CVRD|VALE") ~ "VALE",
          nameEntityInvestmentClean %>% str_detect("DAMAC") ~ "DAMAC",
          nameEntityInvestmentClean %>% str_detect("EXXON") ~ "EXXONMOBIL",
          nameEntityInvestmentClean == "GE" ~ "GENERAL ELECTRIC",
          nameEntityInvestmentClean == "GE SEACO" ~ "SEACO",
          nameEntityInvestmentClean %>% str_detect("GENTING") ~ "GENTING",
          nameEntityInvestmentClean %>% str_detect("GENERALI") ~ "GENERALI",
          nameEntityInvestmentClean %>% str_detect("GINDALBIE") ~ "GINDALBIE METALS",
          nameEntityInvestmentClean %>% str_detect("GLOBAL LOGISTICS|GLP") ~ "GLOBAL LOGISTICS PROPERTIES",
          nameEntityInvestmentClean == "GM" ~ "GENERAL MOTORS",
          nameEntityInvestmentClean %>% str_detect("GREENLAND") ~ "SHANGHAI GREENLAND",
          nameEntityInvestmentClean %>% str_detect("HATTAT") ~ "HATTAT HOLDING",
          nameEntityInvestmentClean %>% str_detect("INOVA") ~ "INOVA GEOPHYSICAL",
          nameEntityInvestmentClean %>% str_detect("IVANHOE") ~ "IVANHOE MINES",
          nameEntityInvestmentClean %>% str_detect("J&T") ~ "J&T FINANCE",
          nameEntityInvestmentClean %>% str_detect("KAZAKHMYS") ~ "KAZAKHMYS AKTOGA",
          nameEntityInvestmentClean %>% str_detect("KAZMUNAIGAS|KAZMUNAIGAS|KAZMUNAIGAZ|KAZMUNAYGAS") ~ "KAZMUNAYGAS",
          nameEntityInvestmentClean %>% str_detect("MITSUBISHI") ~ "MITSUBISHI",
          nameEntityInvestmentClean %>% str_detect("MOTOROLA") ~ "MOTOROLA",
          nameEntityInvestmentClean %>% str_detect("OAKTREE CAPITAL") ~ "OAKTREE CAPITAL MANAGEMENT",
          nameEntityInvestmentClean %>% str_detect("OPERA") ~ "OPERA SOFTWARE",
          nameEntityInvestmentClean %>% str_detect("PIONEER") ~ "PIONEER NATURAL RESOUCES",
          nameEntityInvestmentClean %>% str_detect("PT SEMEN") ~ "SEMEN INDONESIA",
          nameEntityInvestmentClean %>% str_detect("REPSOL") ~ "REPSOL",
          nameEntityInvestmentClean %>% str_detect("RFR") ~ "RFR HOLDING",
          nameEntityInvestmentClean %>% str_detect("SAAB") ~ "SAAB",
          nameEntityInvestmentClean %>% str_detect("SAXO") ~ "SAXO BANK",
          nameEntityInvestmentClean %>% str_detect("SONGBIRD") ~ "SONGBIRD ESTATES",
          nameEntityInvestmentClean %>% str_detect("STARWOOD") ~ "STARWOOD CAPITAL",
          nameEntityInvestmentClean %>% str_detect("SUNDANCE") ~ "SUNDANCE RESOURCES",
          nameEntityInvestmentClean %>% str_detect("TEHRAN RAIL|TEHRAN URBAN & SUBURBAN RAIL") ~ "TEHRAN URBAN & SUBURBAN RAIL",
          nameEntityInvestmentClean %>% str_detect("THOMAS COOK") ~ "THOMAS COOK GROUP",
          nameEntityInvestmentClean %>% str_detect("TRILITY") ~ "TRILITY WENTWORTH",
          nameEntityInvestmentClean %>% str_detect("UBER") ~ "UBER",
          nameEntityInvestmentClean %>% str_detect("UZBEKNEFTEGAS|UZBEKNEFTEGAZ") ~ "UZBEKNEFTEGAZ",
          nameEntityInvestmentClean %>% str_detect("VENEZUELA NATIONAL ELECTRIC") ~ "VENEZUELA NATIONAL ELECTRIC",
          nameEntityInvestmentClean %>% str_detect("VIETNAM NATIONAL COAL") ~ "VIETNAM NATIONAL COAL",
          nameEntityInvestmentClean %>% str_detect("VTB") ~ "VTB GROUP",
          nameEntityInvestmentClean %>% str_detect("BURG") ~ "BURG INDUSTRIES",
          TRUE ~ nameEntityInvestmentClean
        )
      ) %>%
      select(idTransaction, nameEntityInvestment = nameEntityInvestmentClean)

    df_entities <-
      df_entities %>%
      group_by(idTransaction) %>% summarise(
        countInvestmentEntities = n(),
        namesEntityInvestment = unique(nameEntityInvestment) %>% sort() %>% str_c(collapse = " | ")
      ) %>%
      ungroup()

    all_data <-
      all_data %>%
      select(-namesEntityInvestment) %>%
      left_join(df_entities, by = "idTransaction") %>%
      select(one_of(names(all_data)), everything())


    if (!include_incomplete_transactions) {
      all_data <- all_data %>% filter(!isCompletedTransaction)
    }

    if (return_message) {
      actions <-
        all_data %>%
        filter(isCompletedTransaction) %>%
        nrow() %>% comma(digits = 0)

      amt <-
        all_data %>% filter(isCompletedTransaction) %>% pull(amountInvestment) %>% sum(na.rm = T) %>% currency(digits = 0)
      from_date <-
        all_data %>% filter(isCompletedTransaction) %>% pull(dateTransaction) %>% min(na.rm = T)
      to_date <-
        all_data %>% filter(isCompletedTransaction) %>% pull(dateTransaction) %>% max(na.rm = T)
      countries <-
        all_data %>% filter(isCompletedTransaction) %>% distinct(countryInvestment) %>% nrow()
      glue(
        "\n\n{green({amt})} in completed Communist Chinese investment {red({from_date})} and {red({to_date})} across {yellow(actions)} actions in {magenta(countries)} countries\n\n"
      ) %>% cat(fill = T)
    }


    all_data <-
      all_data %>%
      munge_data(snake_names = snake_names, unformat = unformat)


    all_data
  })



# jeremy_wu ---------------------------------------------------------------

#' Espionage Cases
#'
#' Data about U.S. espionage cases collected via Jeremy Wu
#'
#' @return \code{tibble()}
#' @family china
#' @export
#'
#' @examples
us_espionage_cases <- function(snake_names = F) {
  page <- "https://jeremy-wu.info/fed-cases/" %>%
    read_html()
  data <- page %>% html_table(fill = F) %>% .[[1]] %>% as_tibble()
  link_nodes <- page %>% html_nodes("#tablepress-3 .column-11")
  link_nodes <- link_nodes[2:length(link_nodes)]
  df_links <-
    seq_along(link_nodes) %>%
    map_dfr(function(x) {
      urls <- link_nodes[[x]] %>% html_nodes("a") %>% html_attr("href")
      tibble(idRow = x, urlsCase = list(urls))
    })

  data <-
    data %>%
    setNames(
      c(
        "dateReported",
        "idCase",
        "yearEvent",
        "countryAttacker",
        "descriptionCase",
        "slugCourt",
        "typeCharges",
        "statusCases",
        "dataDefendants",
        "dataFirms",
        "removeU"
      )
    ) %>%
    .munge_data(clean_address = F) %>%
    mutate(idRow = 1:n()) %>%
    select(-matches("remove"))

  data <-
    data %>%
    left_join(df_links, by = "idRow") %>%
    select(-idRow)


  data <-
    data %>%
    mutate(
      descriptionCase = descriptionCase %>% str_remove_all(
        page %>% html_nodes("#tablepress-3 .column-5") %>% html_nodes("a") %>% html_text() %>% unique() %>% str_c(collapse = "|")
      )
    )

  data <- munge_data(data = data, snake_names = snake_names)

  data
}



# uni_watch ---------------------------------------------------------------


.uw_universities <-
  function(url = "https://unitracker.aspi.org.au/") {
    headers = c(
      `authority` = 'unitracker.aspi.org.au',
      `pragma` = 'no-cache',
      `cache-control` = 'no-cache',
      `upgrade-insecure-requests` = '1',
      `user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
      `accept` = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
      `sec-fetch-site` = 'none',
      `sec-fetch-mode` = 'navigate',
      `sec-fetch-user` = '?1',
      `sec-fetch-dest` = 'document',
      `accept-language` = 'en-US,en;q=0.9',
      `cookie` = '__cfduid=d60816f93c7ed35cfdfaf57a31ce88b731592596455; cf_clearance=21ac30fb332ece6967fa83df38d1793f08b532ab-1592597657-0-9e1ce14-150'
    )

    res <-
      httr::GET(url = 'https://unitracker.aspi.org.au/', httr::add_headers(.headers =
                                                                             headers))
    page <-
      res %>% content("text") %>% read_html()

    nameInstitution <-
      page %>% html_nodes(".data-table__university-title") %>% html_children() %>% html_text()
    urlSchool <-
      page %>% html_nodes(".data-table__university-title") %>% html_attr("href") %>%
      str_c("https://unitracker.aspi.org.au", .)

    typeEntity <-
      page %>% html_nodes("tbody td:nth-child(2)") %>% html_text() %>% str_squish() %>% str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper()

    levelRisk <-
      page %>% html_nodes("tbody td:nth-child(3)") %>% html_text() %>% str_squish() %>%
      str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper()

    typeSecurityCredentials <-
      page %>% html_nodes("tbody td:nth-child(4)") %>% html_text() %>% str_squish() %>%
      str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper()

    isBISRestricted <-
      page %>% html_nodes("tbody td:nth-child(5)") %>% html_text() %>% str_squish() %>%
      str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper() %>%
      str_detect("✓")

    hasSpying <-
      page %>% html_nodes("tbody td:nth-child(6)") %>% html_text() %>% str_squish() %>%
      str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper() %>%
      str_detect("✓")

    data <-
      tibble(
        nameInstitution,
        levelRisk,
        urlSchool,
        typeEntity,
        typeSecurityCredentials,
        isBISRestricted,
        hasSpying
      ) %>%
      .munge_data(clean_address = F)

    data

  }

.parse_uw_url <-
  function(url = "https://unitracker.aspi.org.au/universities/air-force-command-college",
           return_message = T) {
    if (return_message) {
      glue("Parsing {url}") %>% message()
    }
    headers = c(
      `authority` = 'unitracker.aspi.org.au',
      `pragma` = 'no-cache',
      `cache-control` = 'no-cache',
      `upgrade-insecure-requests` = '1',
      `user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
      `accept` = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
      `sec-fetch-site` = 'same-origin',
      `sec-fetch-mode` = 'navigate',
      `sec-fetch-user` = '?1',
      `sec-fetch-dest` = 'document',
      `referer` = 'https://unitracker.aspi.org.au/universities/air-force-command-college',
      `accept-language` = 'en-US,en;q=0.9',
      `cookie` = '__cfduid=d60816f93c7ed35cfdfaf57a31ce88b731592596455; cf_clearance=21ac30fb332ece6967fa83df38d1793f08b532ab-1592597657-0-9e1ce14-150'
    )

    res <-
      httr::GET(url = url, httr::add_headers(.headers =
                                               headers))

    page <-
      res %>% content("text") %>% read_html()

    links <-
      page %>% html_nodes("span a") %>% html_attr("href")

    descriptionInstitution <-
      page %>% html_nodes(".copy h2 , p") %>% html_text() %>% str_c(collapse = "")

    descriptionInstitution <-
      descriptionInstitution %>%
      str_split("\\;") %>%
      flatten_chr() %>%
      str_squish() %>%
      str_c(collapse = "") %>%
      str_split(".html") %>%
      flatten_chr() %>%
      str_split("http") %>%
      flatten_chr() %>%
      discard(function(x) {
        x %>% str_detect("\\://")
      }) %>%
      str_c(collapse = "")

    details <-
      page %>% html_nodes(".aside__heading , .aside__item") %>% html_text() %>% str_squish()

    data <-
      tibble(item = details) %>%
      mutate(
        isBase = item %in% c(
          "Aliases",
          "Location",
          "Supervising agencies",
          "Categories",
          "Topics"
        )
      ) %>%
      mutate(parent = case_when(isBase ~ item,
                                TRUE ~ NA_character_)) %>%
      fill(parent) %>%
      filter(!isBase) %>%
      select(parent, item) %>%
      group_by(parent) %>%
      summarise(item = item %>% str_c(collapse =  " | ")) %>%
      ungroup()


    data <-
      data %>%
      mutate(
        parent = case_when(
          parent == "Aliases" ~ "namesAliases",
          parent == "Categories" ~ "typeCategories",
          parent == "Location" ~ "locationInstitution",
          parent == "Supervising agencies" ~ "namesAgencySupervising",
          parent == "Topics" ~ "topicsExpertise"
        )
      ) %>%
      mutate(item = item %>% str_to_upper()) %>%
      spread(parent, item)

    logo <-
      page %>% html_nodes(".aside__logo-image") %>% html_attr("src")

    if (length(logo) > 0) {
      data <-
        data %>%
        mutate(urlLogoInstitution = logo)
    }

    data %>%
      mutate(
        descriptionInstitution,
        urlSchool = url,
        urlsInstitution = str_c(links, collapse = " | ")
      )


  }

#' Chinese University Tracker
#'
#' Data tracking key topics areas from
#' the major Chinese research institutions maintained by
#' the Australian Strategic Policy Institute
#'
#' @param parse_details if \code{TRUE} parses details
#' @param return_message if `TRUE` return_message
#' @param snake_names  if `TRUE` retruns message
#'
#' @return
#' @famiy china
#' @export
#'
#' @examples
#' china_unitracker()
china_unitracker <-
  function(parse_details = T,
           snake_names = F,
           use_cached_data = F,
           return_message = T) {

    if (use_cached_data) {
      data <- read_rda("https://asbcllc.com/r_packages/govtrackR/data/aspi_data.rda")
      return(data)
    }
    data <-
      .uw_universities()

    if (!parse_details) {
      if (snake_names) {
        data <- data %>% clean_names()
      }
      return(data)
    }

    df_details <-
      data$urlSchool %>%
      future_map_dfr(function(url) {
        .parse_uw_url(url = url, return_message = return_message)
      })

    df_details <- df_details %>%
      mutate(
        countAgenciesSupervising = namesAgencySupervising %>% str_count("\\|"),
        countCategories = typeCategories %>% str_count("\\|"),
        countTopics = topicsExpertise %>% str_count("\\|"),
        countAliases = namesAliases %>% str_count("\\|"),
        countLocations = locationInstitution %>% str_count("\\|")
      )

    data <-
      data %>%
      left_join(df_details, by = "urlSchool")

    if (snake_names) {
      data <- data %>% clean_names()
    }

    data
  }
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.