.dictionary_ccp_names <-
function() {
tibble(
name_macro = c(
"member_id",
"name",
"xing_ming",
"age",
"nian_ling",
"organizations",
"suo_shu_ji_gou",
"full_committee_member_zhong_guo_gong_chan_dang_zhong_yang_wei_yuan_hui_wei_yuan",
"alternate_committee_member_zhong_guo_gong_chan_dang_zhong_yang_wei_yuan_hui_hou_bu_wei_yuan",
"politburo_standing_committee_zhong_guo_gong_chan_dang_zhong_yang_zheng_zhi_ju_chang_wei_hui",
"politburo_zhong_guo_gong_chan_dang_zhong_yang_zheng_zhi_ju",
"central_military_commission_zhong_guo_gong_chan_dang_zhong_yang_jun_shi_wei_yuan_hui",
"position",
"zhi_wu",
"previous_position",
"qian_zhi_wu",
"gender",
"xing_bie",
"ethnicity",
"min_zu",
"place_of_ancestry",
"ji_guan",
"province_of_ancestry",
"ji_guan_sheng_fen",
"rank",
"ling_dao_zhi_wu_ceng_ci",
"province_of_employment",
"gong_zuo_de_dian_sheng_fen",
"sector_of_employment",
"gong_zuo_dan_wei",
"headshot"
),
name_actual = c(
"idMember",
"nameMember",
"xing_ming",
"ageMember",
"nian_ling",
"namesOrganizations",
"suo_shu_ji_gou",
"isFullCommitteeMember",
"isAlternateComitteeMember",
"isPolitburoStandingCommitteeMember",
"isPolitboroMember",
"isCentralCommissionMember",
"namePosition",
"zhi_wu",
"namePositionPrevious",
"qian_zhi_wu",
"genderMember",
"xing_bie",
"ethnicityMember",
"min_zu",
"locationMemberAncestry",
"ji_guan",
"proviceMemberAncestry",
"ji_guan_sheng_fen",
"memberRank",
"ling_dao_zhi_wu_ceng_ci",
"provinceMemberEmployment",
"gong_zuo_de_dian_sheng_fen",
"sectorEmployment",
"gong_zuo_dan_wei",
"urlHeadshot"
)
)
}
.munge_ccp_names <-
function(data) {
names_dict <- names(data)
dict <- .dictionary_ccp_names()
actual_names <-
names_dict %>%
map_chr(function(name) {
df_row <-
dict %>% filter(name_macro == name)
if (nrow(df_row) == 0) {
glue::glue("Missing {name}") %>% message()
return(name)
}
df_row$name_actual
})
data %>%
set_names(actual_names)
}
# macro_polo --------------------------------------------------------------
.parse_history <-
function() {
}
.additional_data <- function() {
json <-
fromJSON("https://paulson.gistapp.com/committee-members/ancillary-data/")
json %>% select(id, positions)
}
#' Chinese Communist Party Committee Members
#'
#' Returns information about purported CCP
#' committee members from as reported by
#' \url{https://macropolo.org/digital-projects/the-committee/}{Macro Polo}
#'
#' @param snake_names if `TRUE` returns snake names
#' @param include_mandarin if `TRUE` keeps Mandarin columns
#'
#' @return `tibble`
#' @export
#'
#' @examples
#' ccp_committee_members()
ccp_committee_members <-
memoise::memoise(function(snake_names = T, include_mandarin = F) {
json <- fromJSON(
"https://paulson.gistapp.com/committee-members/gallery/bind-data?limit=9999",
flatten = F,
simplifyDataFrame = T
)
data <- as_tibble(json$data$rowParams)
data <-
data %>%
select(-matches("Order|Field")) %>%
clean_names()
data <- .munge_ccp_names(data)
if (!include_mandarin) {
data <- data %>%
select(-one_of(
c(
"xing_ming",
"nian_ling",
"suo_shu_ji_gou",
"zhi_wu",
"qian_zhi_wu",
"xing_bie",
"min_zu",
"ji_guan",
"ji_guan_sheng_fen",
"ling_dao_zhi_wu_ceng_ci",
"gong_zuo_de_dian_sheng_fen",
"gong_zuo_dan_wei"
)
))
}
data <- data %>%
munge_data() %>%
mutate_at(c("idMember", "ageMember"),
readr::parse_number)
urls <- json$data$nestedImagePath
urlHeadshotFile =
json$data$scrapedImageUrl %>% map_chr(URLencode)
data <-
data %>%
mutate(
urlHeadshot = urls,
urlHeadshotFile,
countOrganizations = namesOrganizations %>% str_count("\\,") + 1,
namesOrganization = namesOrganizations %>% str_replace_all("\\,", "\\ | "),
countPositions = namePosition %>% str_count(" / ") + 1,
countPositionsPrior = namePositionPrevious %>% str_count(" / ") + 1
) %>%
mutate_at(c("namePosition", "namePositionPrevious"),
list(function(x) {
x %>% str_replace_all(" / ", " | ")
}))
if (snake_names) {
data <- data %>%
clean_names()
}
data
})
# thousand talents ---------------------------------------------------------
#' Chinese Communist Party 1000 Talents Members
#'
#' Crowd-sourced thousand talents participants
#'
#' @param snake_names if `TRUE` returns snake case names
#'
#' @return \code{tibble()}
#' @family china
#' @export
#'
#' @examples
ccp_thousand_talents <-
function(snake_names = F) {
data <-
read_csv("https://asbcllc.com/r_packages/govtrackR/data/thousand_talents.tsv.gz")
if (snake_names) {
data <- clean_names(data)
}
data
}
# AEI ---------------------------------------------------------------------
#' AEI Chinese foreign investments
#'
#' Acquires all Chinese foreign investments and construction contracts
#' from the American Enterprise Institute
#'
#' @param include_incomplete_transactions if `TRUE` include incomplete transaction
#' @param snake_names if `TRUE` return snake case columns
#' @param unformat if `TRUE` unformat currency columns
#' @param return_message return a messsage
#' @family china
#'
#' @return \code{tibble}
#' @export
#'
#' @examples
#' aei_chinese_investments()
aei_chinese_investments <-
memoise::memoise(function(include_incomplete_transactions = T,
snake_names = F,
unformat = F,
return_message = T) {
page <-
read_html("https://www.aei.org/china-global-investment-tracker/")
url <- page %>% html_nodes("p strong a") %>% html_attr("href")
tmp <-
tempfile()
curl::curl_download(url, tmp)
sheets <- tmp %>% readxl::excel_sheets()
df_types <- tibble(
sheet = 1:3,
type = c(
"Foreign Investment",
"Construction Contract",
"Uncompleted Transaction"
)
)
all_data <-
1:3 %>%
map_dfr(function(x) {
data <- tmp %>% read_excel(sheet = x)
type <- df_types %>% dplyr::slice(x) %>% pull(type)
data <-
data %>%
dplyr::slice(6:nrow(data))
if (x == 1) {
data <-
data %>%
setNames(
c(
"yearTransaction",
"monthTransaction",
"namesChineseEntities",
"amountInvestment",
"pctShare",
"namesEntityInvestment",
"sectorInvestment",
"subSectorInvestment",
"countryInvestment",
"regionInvestment",
"isBRI",
"isGreenfield"
)
)
}
if (x == 2) {
data <-
data %>%
setNames(
c(
"yearTransaction",
"monthTransaction",
"namesChineseEntities",
"amountInvestment",
"pctShare",
"namesEntityInvestment",
"sectorInvestment",
"subSectorInvestment",
"countryInvestment",
"regionInvestment",
"isBRI"
)
)
}
if (x == 3) {
data <-
data %>%
setnames(
c(
"yearTransaction",
"monthTransaction",
"namesChineseEntities",
"amountInvestment",
"namesEntityInvestment",
"sectorInvestment",
"subSectorInvestment",
"countryInvestment",
"regionInvestment",
"isGreenfield",
"isBRI"
)
)
}
data <-
data %>%
mutate(
typeTransaction = type,
dateTransaction = glue("{yearTransaction}-{monthTransaction}-01") %>% ymd() %m+% months(1) - 1
) %>%
select(
typeTransaction,
yearTransaction,
monthTransaction,
dateTransaction,
namesChineseEntities,
namesEntityInvestment,
everything()
)
data
})
all_data <-
all_data %>%
mutate(
isCompletedTransaction = typeTransaction != "Uncompleted Transaction",
amountInvestment = as.numeric(amountInvestment) * 1000000,
yearTransaction = as.numeric(yearTransaction),
isGreenfield = case_when(isGreenfield == "0" ~ F,
isGreenfield %in% c("1", "G") ~ T,
TRUE ~ F),
isBRI = case_when(isBRI == "0" ~ F,
isBRI %in% c("1", "G") ~ T,
TRUE ~ F)
)
df_pct <-
all_data %>%
filter(!is.na(pctShare)) %>%
distinct(pctShare)
df_pct <-
df_pct$pctShare %>%
map_dfr(function(pct_share) {
pct <-
pct_share %>% str_split("\\,") %>% flatten_chr() %>% .[[1]]
if (pct %>% str_detect("%")) {
pct <- pct %>% readr::parse_number() / 100
data <- tibble(pctShare = pct_share,
pctShareActual = pct)
return(data)
}
pct <- readr::parse_number(pct_share)
if (pct > 1) {
pct <- pct / 100
}
data <- tibble(pctShare = pct_share,
pctShareActual = pct)
data
})
all_data <-
all_data %>%
left_join(df_pct, by = "pctShare") %>%
select(-one_of("pctShare")) %>%
rename(pctShare = pctShareActual) %>%
mutate(isMajorityPurchase = pctShare >= .5,
isCompletePurchase = pctShare == 1) %>%
select(isCompletedTransaction , one_of(names(all_data)))
tmp %>%
unlink()
all_data <-
all_data %>%
mutate(
amountEnterpriseValueTransaction = case_when(!is.na(pctShare) ~ amountInvestment / pctShare,
TRUE ~ NA_real_)
)
all_data <-
all_data %>%
mutate(
isChineseConsortium = str_to_upper(namesChineseEntities) %>% str_detect("CONSORT|GROUP OF"),
namesChineseEntities = namesChineseEntities %>% str_remove_all("-led consortium$| led consortium$| consortium$")
) %>%
separate(
namesChineseEntities,
into = c("namesChineseEntities", "detailsChineseEntites"),
sep = "\\(",
extra = "merge",
fill = 'right'
) %>%
mutate_at(c("namesChineseEntities", "detailsChineseEntites"),
list(function(x) {
x %>% str_remove_all("\\)") %>% str_squish()
}))
all_data <-
all_data %>%
mutate(
namesChineseEntities = namesChineseEntities %>% str_replace_all("\\ AND |/", "\\, ") %>%
str_replace_all(
"STATE DEVELOPMENT, INVESTMENT CORP",
"STATE DEVELOPMENT & INVESTMENT CORP"
) %>%
str_replace_all("WUHAN IRON, STEEL", "WUHAN IRON & STEEL")
) %>%
mutate(
namesChineseEntities = case_when(
namesChineseEntities %>% str_detect("ANBANG") ~ "ANBANG",
namesChineseEntities %>% str_detect("QINGDAO HENGSHUN") ~ "QINGDAO HENGSHUN ZHONGSHENG",
namesChineseEntities %>% str_detect("SAILUN") ~ "SAILUN TIRE",
namesChineseEntities %>% str_detect("SHANDA") ~ "SHANDA GROUP",
namesChineseEntities %>% str_detect("WUXI PHARMA") ~ "WUXI PHARMATECH",
namesChineseEntities %>% str_detect("ZHEJIANG HUAYOU") ~ "ZHEJIANG HUAYOU COBALT",
namesChineseEntities %>% str_detect("SHANDONG ELECTRIC POWER CONSTRUCTION") ~ "SHANDONG ELECTRIC POWER",
namesChineseEntities %>% str_detect("SHANGHAI MUNICIPAL") ~ "SHANGHAI MUNICIPAL INVESTMENT",
namesChineseEntities %>% str_detect("SHANDONG HEAVY") ~ "SHANDONG HEAVY",
namesChineseEntities %>% str_detect("SHENHUA'S WATERMARK") ~ "SHENHUA",
namesChineseEntities %>% str_detect("SHOUGANG") ~ "SHOUGANG GROUP",
namesChineseEntities %>% str_detect("SUNING") ~ "SUNING APPLIANCE",
namesChineseEntities %>% str_detect("SHANDONG LINGLONG") ~ "SHANDONG LINGLONG TIRE",
namesChineseEntities %>% str_detect("SHANGHAI CONSTRUCTION") ~ "SHANGHAI CONSTRUCTION GROUP",
namesChineseEntities %>% str_detect("GREENLAND") ~ "SHANGHAI GREENLAND",
namesChineseEntities %>% str_detect("GROUP OF SHANGHAI STATE") ~ "SHANGHAI STATE INVESTMENT GROUP",
namesChineseEntities %>% str_detect("GUANGDONG RISING") ~ "GUANGDONG RISING ASSET MANAGEMENT",
namesChineseEntities %>% str_detect("BOSAI") ~ "BOSAI MINERALS",
namesChineseEntities %>% str_detect("HANGZHOU GREAT STAR") ~ "HANGZHOU GREAT STAR INDUSTRIAL",
namesChineseEntities %>% str_detect("CHINA MINGSHEN INVESTMENT") ~ "CHINA MINSHENG INVESTMENT",
namesChineseEntities %>% str_detect("ENVIVSION ENERGY") ~ "ENVISION ENERGY",
namesChineseEntities %>% str_detect("GREAT WALL MOTOR") ~ "GREAT WALL MOTORS",
namesChineseEntities %>% str_detect("LUXSHARE") ~ "LUXSHARE PRECISION",
namesChineseEntities %>% str_detect("TIDFORE HEAVY") ~ "TIDFORE HEAVY EQUIPMENT",
namesChineseEntities == "CHEM CHINA" ~ "CHEMCHINA",
namesChineseEntities == "DONGFANG" ~ "DONGFANG ELECTRIC",
namesChineseEntities == "EXIM BANK" ~ "EX-IM BANK",
namesChineseEntities == "HUANENG" ~ "HUANENG POWER",
namesChineseEntities == "TEBIAN ELECTRIC" ~ "TEBIAN ELECTRIC APPARATUS",
namesChineseEntities == "HUMANWELL" ~ "HUMANWELL HEALTHCARE",
namesChineseEntities == "JINCHUAN GROUP" ~ "JINCHUAN",
namesChineseEntities == "LUYE" ~ "LUYE GROUP",
namesChineseEntities == "JIQUAN IRON, STEEL" ~ "JIQUAN IRON & STEEL",
namesChineseEntities == "TSINGHUA" ~ "TSINGHUA UNIGROUP",
namesChineseEntities == "WISON" ~ "WISON ENERGY",
namesChineseEntities %in% c("HEBEI IRON", "HEBEI IRON, STEEL", "HEBEI STEEL") ~ "HEBEI IRON & STEEL",
TRUE ~ namesChineseEntities
)
)
all_data <-
all_data %>%
mutate(idTransaction = 1:n()) %>%
select(idTransaction, everything())
df_entities <- all_data %>%
select(idTransaction, namesChineseEntities) %>%
separate_rows(namesChineseEntities, sep = "\\, ") %>%
rename(nameChineseEntities = namesChineseEntities)
df_entities <-
df_entities %>%
entities::refine_columns(entity_columns = "nameChineseEntities")
df_entities <- df_entities %>%
mutate(
nameChineseEntitiesClean = case_when(
nameChineseEntitiesClean %>% str_detect("ANBANG") ~ "ANBANG",
nameChineseEntitiesClean %>% str_detect("CITIC") ~ "CITIC",
nameChineseEntitiesClean %>% str_detect("COUNTRY GARDEN") ~ "COUNTRY GARDEN HOLDINGS",
nameChineseEntitiesClean %>% str_detect("DONGFANG|DONGFENG") ~ "DONGFANG ELECTRIC",
nameChineseEntitiesClean %>% str_detect("FUJIAN CONSTRUCTION") ~ "FUJIAN CONSTRUCTION ENGINEERING",
nameChineseEntitiesClean %>% str_detect("GUANGDONG RISING ASSET") ~ "GUANGDONG RISING ASSET MANAGEMENT",
nameChineseEntitiesClean %>% str_detect("HEBEI CONSTRUCTION") ~ "HEBEI CONSTRUCTION",
nameChineseEntitiesClean %>% str_detect("HOPU") ~ "HOPU INVESTMENT",
nameChineseEntitiesClean %>% str_detect("JIN JIANG") ~ "JIN JIANG HOTELS",
nameChineseEntitiesClean %>% str_detect("LUXSHARE") ~ "LUXSHARE PRECISION",
nameChineseEntitiesClean %>% str_detect("POWER CONSTRUCTION") ~ "POWER CONSTRUCTION CORP",
nameChineseEntitiesClean %>% str_detect("SHANGHAI MUNICIPAL") ~ "SHANGHAI MUNICIPAL GOVERNMENT",
nameChineseEntitiesClean %>% str_detect("SHOUGANG") ~ "SHOUGANG GROUP",
nameChineseEntitiesClean %>% str_detect("SUNING") ~ "SUNING APPLIANCE",
nameChineseEntitiesClean %>% str_detect("TSINGSHAN") ~ "TSINGSHAN STEEL",
nameChineseEntitiesClean == "GREENLAND" ~ "SHANGHAI GREENLAND",
TRUE ~ nameChineseEntitiesClean
)
) %>%
select(idTransaction, nameChineseEntities = nameChineseEntitiesClean)
df_entities <-
df_entities %>% group_by(idTransaction) %>% summarise(
countChineseEntities = n(),
namesChineseEntities = unique(nameChineseEntities) %>% sort() %>% str_c(collapse = " | ")
) %>%
ungroup()
all_data <- all_data %>%
select(-namesChineseEntities) %>%
left_join(df_entities, by = "idTransaction") %>%
select(one_of(names(all_data)), everything())
all_data <- all_data %>%
mutate(
regionInvestment = case_when(
regionInvestment %>% str_detect("Africa") ~ "Africa",
TRUE ~ regionInvestment
)
)
all_data <- all_data %>%
mutate(
isUnknownInvestmentEntity = is.na(namesEntityInvestment),
namesEntityInvestment = str_to_upper(namesEntityInvestment)
)
all_data <-
all_data %>%
mutate(
namesEntityInvestment = case_when(
namesEntityInvestment %>% str_detect("PYONGYANG INVESTMENT AND DEVELOPMENT ") ~ "PYONGYANG INVESTMENT & DEVELOPMENT",
namesEntityInvestment %>% str_detect("IRAN WATER AND POWER") ~ "IRAN WATER & POWER",
namesEntityInvestment %>% str_detect("INTERSTATE HOTEL AND RESORTS") ~ "INTERSTATE HOTEL & RESORTS",
namesEntityInvestment %>% str_detect("GEORGIAN OIL AND GAS") ~ "GEORGIAN OIL & GAS",
namesEntityInvestment %>% str_detect("FISHER AND PAYKEL") ~ "FISHER & PAYKEL",
namesEntityInvestment %>% str_detect("SOKHNA REFINERY AND PETROCHEMICALS") ~ "SOKHNA REFINERY & PETROCHEMICALS",
namesEntityInvestment %>% str_detect("CHINA LIGHT AND POWER") ~ "CHINA LIGHT & POWER",
namesEntityInvestment %>% str_detect("BANKO INDUSTRIAL AND COMERCIAL") ~ "BANKO INDUSTRIAL & COMERCIAL",
namesEntityInvestment %>% str_detect("AUSTRALIA AND NEW ZEALAND BANKING") ~ "AUSTRALIA & NEW ZEALAND BANKING",
TRUE ~ namesEntityInvestment
),
namesEntityInvestment = namesEntityInvestment %>% str_replace_all(" AND ", ", ")
)
df_entities <-
all_data %>%
select(idTransaction, namesEntityInvestment) %>%
separate_rows(namesEntityInvestment, sep = "\\, ") %>%
rename(nameEntityInvestment = namesEntityInvestment) %>%
filter(!is.na(nameEntityInvestment))
df_entities <-
df_entities %>%
entities::refine_columns(entity_columns = "nameEntityInvestment")
df_entities <-
df_entities %>%
mutate(
nameEntityInvestmentClean = case_when(
nameEntityInvestmentClean %>% str_detect("ACCOR") ~ "ACCOR HOTELS",
nameEntityInvestmentClean %>% str_detect("ACWA") ~ "ACWA POWER",
nameEntityInvestmentClean %>% str_detect("ARAMCO") ~ "ARAMCO",
nameEntityInvestmentClean %>% str_detect("ATHABASCA") ~ "ATHABASCA OIL CORP",
nameEntityInvestmentClean %>% str_detect("BACCARAT") ~ "BACCARAT HOTELS",
nameEntityInvestmentClean %>% str_detect("BINTANG") ~ "BINTANG DELAPAN",
nameEntityInvestmentClean %>% str_detect("BROOKEFIELD|BROOKFIELD") ~ "BROOKFIELD",
nameEntityInvestmentClean %>% str_detect("CAPE LAMBERT") ~ "CAPE LAMBERT IRON",
nameEntityInvestmentClean %>% str_detect("CARLYLE") ~ "CARLYLE GROUP",
nameEntityInvestmentClean %>% str_detect("CBRE") ~ "CBRE",
nameEntityInvestmentClean %>% str_detect("CVRD|VALE") ~ "VALE",
nameEntityInvestmentClean %>% str_detect("DAMAC") ~ "DAMAC",
nameEntityInvestmentClean %>% str_detect("EXXON") ~ "EXXONMOBIL",
nameEntityInvestmentClean == "GE" ~ "GENERAL ELECTRIC",
nameEntityInvestmentClean == "GE SEACO" ~ "SEACO",
nameEntityInvestmentClean %>% str_detect("GENTING") ~ "GENTING",
nameEntityInvestmentClean %>% str_detect("GENERALI") ~ "GENERALI",
nameEntityInvestmentClean %>% str_detect("GINDALBIE") ~ "GINDALBIE METALS",
nameEntityInvestmentClean %>% str_detect("GLOBAL LOGISTICS|GLP") ~ "GLOBAL LOGISTICS PROPERTIES",
nameEntityInvestmentClean == "GM" ~ "GENERAL MOTORS",
nameEntityInvestmentClean %>% str_detect("GREENLAND") ~ "SHANGHAI GREENLAND",
nameEntityInvestmentClean %>% str_detect("HATTAT") ~ "HATTAT HOLDING",
nameEntityInvestmentClean %>% str_detect("INOVA") ~ "INOVA GEOPHYSICAL",
nameEntityInvestmentClean %>% str_detect("IVANHOE") ~ "IVANHOE MINES",
nameEntityInvestmentClean %>% str_detect("J&T") ~ "J&T FINANCE",
nameEntityInvestmentClean %>% str_detect("KAZAKHMYS") ~ "KAZAKHMYS AKTOGA",
nameEntityInvestmentClean %>% str_detect("KAZMUNAIGAS|KAZMUNAIGAS|KAZMUNAIGAZ|KAZMUNAYGAS") ~ "KAZMUNAYGAS",
nameEntityInvestmentClean %>% str_detect("MITSUBISHI") ~ "MITSUBISHI",
nameEntityInvestmentClean %>% str_detect("MOTOROLA") ~ "MOTOROLA",
nameEntityInvestmentClean %>% str_detect("OAKTREE CAPITAL") ~ "OAKTREE CAPITAL MANAGEMENT",
nameEntityInvestmentClean %>% str_detect("OPERA") ~ "OPERA SOFTWARE",
nameEntityInvestmentClean %>% str_detect("PIONEER") ~ "PIONEER NATURAL RESOUCES",
nameEntityInvestmentClean %>% str_detect("PT SEMEN") ~ "SEMEN INDONESIA",
nameEntityInvestmentClean %>% str_detect("REPSOL") ~ "REPSOL",
nameEntityInvestmentClean %>% str_detect("RFR") ~ "RFR HOLDING",
nameEntityInvestmentClean %>% str_detect("SAAB") ~ "SAAB",
nameEntityInvestmentClean %>% str_detect("SAXO") ~ "SAXO BANK",
nameEntityInvestmentClean %>% str_detect("SONGBIRD") ~ "SONGBIRD ESTATES",
nameEntityInvestmentClean %>% str_detect("STARWOOD") ~ "STARWOOD CAPITAL",
nameEntityInvestmentClean %>% str_detect("SUNDANCE") ~ "SUNDANCE RESOURCES",
nameEntityInvestmentClean %>% str_detect("TEHRAN RAIL|TEHRAN URBAN & SUBURBAN RAIL") ~ "TEHRAN URBAN & SUBURBAN RAIL",
nameEntityInvestmentClean %>% str_detect("THOMAS COOK") ~ "THOMAS COOK GROUP",
nameEntityInvestmentClean %>% str_detect("TRILITY") ~ "TRILITY WENTWORTH",
nameEntityInvestmentClean %>% str_detect("UBER") ~ "UBER",
nameEntityInvestmentClean %>% str_detect("UZBEKNEFTEGAS|UZBEKNEFTEGAZ") ~ "UZBEKNEFTEGAZ",
nameEntityInvestmentClean %>% str_detect("VENEZUELA NATIONAL ELECTRIC") ~ "VENEZUELA NATIONAL ELECTRIC",
nameEntityInvestmentClean %>% str_detect("VIETNAM NATIONAL COAL") ~ "VIETNAM NATIONAL COAL",
nameEntityInvestmentClean %>% str_detect("VTB") ~ "VTB GROUP",
nameEntityInvestmentClean %>% str_detect("BURG") ~ "BURG INDUSTRIES",
TRUE ~ nameEntityInvestmentClean
)
) %>%
select(idTransaction, nameEntityInvestment = nameEntityInvestmentClean)
df_entities <-
df_entities %>%
group_by(idTransaction) %>% summarise(
countInvestmentEntities = n(),
namesEntityInvestment = unique(nameEntityInvestment) %>% sort() %>% str_c(collapse = " | ")
) %>%
ungroup()
all_data <-
all_data %>%
select(-namesEntityInvestment) %>%
left_join(df_entities, by = "idTransaction") %>%
select(one_of(names(all_data)), everything())
if (!include_incomplete_transactions) {
all_data <- all_data %>% filter(!isCompletedTransaction)
}
if (return_message) {
actions <-
all_data %>%
filter(isCompletedTransaction) %>%
nrow() %>% comma(digits = 0)
amt <-
all_data %>% filter(isCompletedTransaction) %>% pull(amountInvestment) %>% sum(na.rm = T) %>% currency(digits = 0)
from_date <-
all_data %>% filter(isCompletedTransaction) %>% pull(dateTransaction) %>% min(na.rm = T)
to_date <-
all_data %>% filter(isCompletedTransaction) %>% pull(dateTransaction) %>% max(na.rm = T)
countries <-
all_data %>% filter(isCompletedTransaction) %>% distinct(countryInvestment) %>% nrow()
glue(
"\n\n{green({amt})} in completed Communist Chinese investment {red({from_date})} and {red({to_date})} across {yellow(actions)} actions in {magenta(countries)} countries\n\n"
) %>% cat(fill = T)
}
all_data <-
all_data %>%
munge_data(snake_names = snake_names, unformat = unformat)
all_data
})
# jeremy_wu ---------------------------------------------------------------
#' Espionage Cases
#'
#' Data about U.S. espionage cases collected via Jeremy Wu
#'
#' @return \code{tibble()}
#' @family china
#' @export
#'
#' @examples
us_espionage_cases <- function(snake_names = F) {
page <- "https://jeremy-wu.info/fed-cases/" %>%
read_html()
data <- page %>% html_table(fill = F) %>% .[[1]] %>% as_tibble()
link_nodes <- page %>% html_nodes("#tablepress-3 .column-11")
link_nodes <- link_nodes[2:length(link_nodes)]
df_links <-
seq_along(link_nodes) %>%
map_dfr(function(x) {
urls <- link_nodes[[x]] %>% html_nodes("a") %>% html_attr("href")
tibble(idRow = x, urlsCase = list(urls))
})
data <-
data %>%
setNames(
c(
"dateReported",
"idCase",
"yearEvent",
"countryAttacker",
"descriptionCase",
"slugCourt",
"typeCharges",
"statusCases",
"dataDefendants",
"dataFirms",
"removeU"
)
) %>%
.munge_data(clean_address = F) %>%
mutate(idRow = 1:n()) %>%
select(-matches("remove"))
data <-
data %>%
left_join(df_links, by = "idRow") %>%
select(-idRow)
data <-
data %>%
mutate(
descriptionCase = descriptionCase %>% str_remove_all(
page %>% html_nodes("#tablepress-3 .column-5") %>% html_nodes("a") %>% html_text() %>% unique() %>% str_c(collapse = "|")
)
)
data <- munge_data(data = data, snake_names = snake_names)
data
}
# uni_watch ---------------------------------------------------------------
.uw_universities <-
function(url = "https://unitracker.aspi.org.au/") {
headers = c(
`authority` = 'unitracker.aspi.org.au',
`pragma` = 'no-cache',
`cache-control` = 'no-cache',
`upgrade-insecure-requests` = '1',
`user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
`accept` = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
`sec-fetch-site` = 'none',
`sec-fetch-mode` = 'navigate',
`sec-fetch-user` = '?1',
`sec-fetch-dest` = 'document',
`accept-language` = 'en-US,en;q=0.9',
`cookie` = '__cfduid=d60816f93c7ed35cfdfaf57a31ce88b731592596455; cf_clearance=21ac30fb332ece6967fa83df38d1793f08b532ab-1592597657-0-9e1ce14-150'
)
res <-
httr::GET(url = 'https://unitracker.aspi.org.au/', httr::add_headers(.headers =
headers))
page <-
res %>% content("text") %>% read_html()
nameInstitution <-
page %>% html_nodes(".data-table__university-title") %>% html_children() %>% html_text()
urlSchool <-
page %>% html_nodes(".data-table__university-title") %>% html_attr("href") %>%
str_c("https://unitracker.aspi.org.au", .)
typeEntity <-
page %>% html_nodes("tbody td:nth-child(2)") %>% html_text() %>% str_squish() %>% str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper()
levelRisk <-
page %>% html_nodes("tbody td:nth-child(3)") %>% html_text() %>% str_squish() %>%
str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper()
typeSecurityCredentials <-
page %>% html_nodes("tbody td:nth-child(4)") %>% html_text() %>% str_squish() %>%
str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper()
isBISRestricted <-
page %>% html_nodes("tbody td:nth-child(5)") %>% html_text() %>% str_squish() %>%
str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper() %>%
str_detect("✓")
hasSpying <-
page %>% html_nodes("tbody td:nth-child(6)") %>% html_text() %>% str_squish() %>%
str_remove_all("[0-9]|\\.") %>% str_squish() %>% str_to_upper() %>%
str_detect("✓")
data <-
tibble(
nameInstitution,
levelRisk,
urlSchool,
typeEntity,
typeSecurityCredentials,
isBISRestricted,
hasSpying
) %>%
.munge_data(clean_address = F)
data
}
.parse_uw_url <-
function(url = "https://unitracker.aspi.org.au/universities/air-force-command-college",
return_message = T) {
if (return_message) {
glue("Parsing {url}") %>% message()
}
headers = c(
`authority` = 'unitracker.aspi.org.au',
`pragma` = 'no-cache',
`cache-control` = 'no-cache',
`upgrade-insecure-requests` = '1',
`user-agent` = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
`accept` = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
`sec-fetch-site` = 'same-origin',
`sec-fetch-mode` = 'navigate',
`sec-fetch-user` = '?1',
`sec-fetch-dest` = 'document',
`referer` = 'https://unitracker.aspi.org.au/universities/air-force-command-college',
`accept-language` = 'en-US,en;q=0.9',
`cookie` = '__cfduid=d60816f93c7ed35cfdfaf57a31ce88b731592596455; cf_clearance=21ac30fb332ece6967fa83df38d1793f08b532ab-1592597657-0-9e1ce14-150'
)
res <-
httr::GET(url = url, httr::add_headers(.headers =
headers))
page <-
res %>% content("text") %>% read_html()
links <-
page %>% html_nodes("span a") %>% html_attr("href")
descriptionInstitution <-
page %>% html_nodes(".copy h2 , p") %>% html_text() %>% str_c(collapse = "")
descriptionInstitution <-
descriptionInstitution %>%
str_split("\\;") %>%
flatten_chr() %>%
str_squish() %>%
str_c(collapse = "") %>%
str_split(".html") %>%
flatten_chr() %>%
str_split("http") %>%
flatten_chr() %>%
discard(function(x) {
x %>% str_detect("\\://")
}) %>%
str_c(collapse = "")
details <-
page %>% html_nodes(".aside__heading , .aside__item") %>% html_text() %>% str_squish()
data <-
tibble(item = details) %>%
mutate(
isBase = item %in% c(
"Aliases",
"Location",
"Supervising agencies",
"Categories",
"Topics"
)
) %>%
mutate(parent = case_when(isBase ~ item,
TRUE ~ NA_character_)) %>%
fill(parent) %>%
filter(!isBase) %>%
select(parent, item) %>%
group_by(parent) %>%
summarise(item = item %>% str_c(collapse = " | ")) %>%
ungroup()
data <-
data %>%
mutate(
parent = case_when(
parent == "Aliases" ~ "namesAliases",
parent == "Categories" ~ "typeCategories",
parent == "Location" ~ "locationInstitution",
parent == "Supervising agencies" ~ "namesAgencySupervising",
parent == "Topics" ~ "topicsExpertise"
)
) %>%
mutate(item = item %>% str_to_upper()) %>%
spread(parent, item)
logo <-
page %>% html_nodes(".aside__logo-image") %>% html_attr("src")
if (length(logo) > 0) {
data <-
data %>%
mutate(urlLogoInstitution = logo)
}
data %>%
mutate(
descriptionInstitution,
urlSchool = url,
urlsInstitution = str_c(links, collapse = " | ")
)
}
#' Chinese University Tracker
#'
#' Data tracking key topics areas from
#' the major Chinese research institutions maintained by
#' the Australian Strategic Policy Institute
#'
#' @param parse_details if \code{TRUE} parses details
#' @param return_message if `TRUE` return_message
#' @param snake_names if `TRUE` retruns message
#'
#' @return
#' @famiy china
#' @export
#'
#' @examples
#' china_unitracker()
china_unitracker <-
function(parse_details = T,
snake_names = F,
use_cached_data = F,
return_message = T) {
if (use_cached_data) {
data <- read_rda("https://asbcllc.com/r_packages/govtrackR/data/aspi_data.rda")
return(data)
}
data <-
.uw_universities()
if (!parse_details) {
if (snake_names) {
data <- data %>% clean_names()
}
return(data)
}
df_details <-
data$urlSchool %>%
future_map_dfr(function(url) {
.parse_uw_url(url = url, return_message = return_message)
})
df_details <- df_details %>%
mutate(
countAgenciesSupervising = namesAgencySupervising %>% str_count("\\|"),
countCategories = typeCategories %>% str_count("\\|"),
countTopics = topicsExpertise %>% str_count("\\|"),
countAliases = namesAliases %>% str_count("\\|"),
countLocations = locationInstitution %>% str_count("\\|")
)
data <-
data %>%
left_join(df_details, by = "urlSchool")
if (snake_names) {
data <- data %>% clean_names()
}
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.