# devtools::load_all()
# ridiculous hack to avoid janitor::clean_names() throwing numerous warnings when running in our
# docker image: https://github.com/Tazinho/snakecase/issues/191
# explanation: I believe snakecase:::replace_special_characters_internal is only a promise until
# the first time it is called, so simply printing the function to stdout causes the function to
# be loaded for the first time, triggering the warnings, thereby relieving any further calls
# to it from additional warnings
# because janitor::clean_names() is used in multiple functions here, this is the cleanest way
# to deal with them all at once up front
suppressWarnings(invisible(snakecase:::replace_special_characters_internal))
### Portfolio cleaning functions
read_raw_portfolio_file <- function(project_name) {
portfolio <- NA
input_path <- paste0(project_location, "/20_Raw_Inputs/")
csv_to_read <- list.files(path = input_path, pattern = "_Input.csv")
txt_to_read <- list.files(path = input_path, pattern = "_Input.txt")
if (length(csv_to_read) == 1) {
portfolio <- read_csv(paste0(input_path, csv_to_read))
}
if (length(txt_to_read) == 1) {
enc <- guess_encoding(paste0(input_path, txt_to_read))$encoding[1]
portfolio <- read.table(paste0(input_path, txt_to_read), sep = ",", header = T, fileEncoding = enc)
}
# Reads in Files saved with a ; not a ,
if (ncol(portfolio) == 1 & length(csv_to_read) == 1) {
portfolio <- read.csv(paste0(input_path, csv_to_read), strip.white = T, stringsAsFactors = F, sep = ";")
}
if (ncol(portfolio) == 1 & length(txt_to_read) == 1) {
portfolio <- read.table(paste0(input_path, txt_to_read), sep = "\t", header = T, fileEncoding = enc)
}
if (ncol(portfolio) == 1 & length(txt_to_read) == 1) {
portfolio <- read.table(paste0(input_path, txt_to_read), sep = ";", header = T, fileEncoding = enc)
}
if (!data_check(portfolio)) {
stop("No portfolio Input File")
}
portfolio
}
clean_colnames_portfolio_input_file <- function(portfolio) {
if (is.data.frame(portfolio)) {
# Removes additional columns added by Excel on saving
portfolio <- portfolio[, !grepl("X", colnames(portfolio))]
} else {
stop("No portfolio Data readable")
}
portfolio <- janitor::clean_names(portfolio)
if ("numberof_shares" %in% colnames(portfolio)) {
portfolio <- portfolio %>% rename(number_of_shares = numberof_shares)
}
portfolio
}
clean_portfolio_col_types <- function(portfolio, grouping_variables) {
# portfolio[,grouping_variables] <- lapply(portfolio[,grouping_variables], clean_punctuation)
if (is.character(portfolio$investor_name) == FALSE) {
write_log(msg = paste0(
"Wrong variable class for investor_name. Should be character, but is ",
class(portfolio$investor_name),
". This can introduce errors in further calculations!"
),
file_path = log_path)
}
if (is.character(portfolio$portfolio_name) == FALSE) {
write_log(msg = paste0(
"Wrong variable class for portfolio_name Should be character, but is ",
class(portfolio$portfolio_name),
". This can introduce errors in further calculations!"
),
file_path = log_path)
}
if (is.numeric(portfolio$market_value) == FALSE) {
write_log(msg = paste0(
"Wrong variable class for market_value Should be numeric, but is ",
class(portfolio$market_value),
". This can introduce errors in further calculations!"
),
file_path = log_path)
}
if("number_of_shares" %in% colnames(portfolio)){
if (is.numeric(portfolio$number_of_shares) == FALSE) {
write_log(msg = paste0(
"Wrong variable class for number_of_shares Should be numeric, but is ",
class(portfolio$number_of_shares),
". This can introduce errors in further calculations!"
),
file_path = log_path)
}
portfolio$number_of_shares <- suppressWarnings(as.numeric(portfolio$number_of_shares))
}
if (is.character(portfolio$currency) == FALSE) {
write_log(msg = paste0(
"Wrong variable class for currency Should be character, but is ",
class(portfolio$currency),
". This can introduce errors in further calculations!"
),
file_path = log_path)
}
if (is.character(portfolio$isin) == FALSE) {
write_log(msg = paste0(
"Wrong variable class for isin Should be character, but is ",
class(portfolio$isin),
". This can introduce errors in further calculations!"
),
file_path = log_path)
}
portfolio$market_value <- as.numeric(portfolio$market_value)
portfolio$currency <- as.character(portfolio$currency)
portfolio$currency <- if_else(portfolio$currency == "Euro", "EUR", portfolio$currency)
portfolio
}
clear_portfolio_input_blanks <- function(portfolio) {
if (any(portfolio[, grouping_variables] == "" | is.na(portfolio[, grouping_variables]))) {
print("Warning: missing grouping variables, corresponding rows removed")
write_log(msg = paste(
"Warning: some entries of the uploaded portfolio file were removed
because of missing values in at least one of the variables", str_c(grouping_variables, collapse = ", "),
"\n To ensure complete analysis, please upload a file without
missing values in these columns."
),
file_path = log_path)
portfolio <- portfolio %>% filter_at(
grouping_variables, all_vars(!is.na(.))
)
}
portfolio
}
add_meta_portfolio <- function(portfolio, inc_meta_portfolio) {
portfolio_meta <- portfolio
# Group at a level
# lvl <- 1
#
#
# gv <- grouping_variables[1:lvl]
# ngv <- grouping_variables[seq(lvl+1,length(grouping_variables))]
#
# portfolio_1 <- portfolio_meta %>% mutate(!!!rlang::syms(ngv) = "Meta")
#
#
# # loop through grouping variables
# # the order of gv defines this
#
# for (g in length(grouping_variables)){
#
# gv <- grouping_variables[g]
#
# # portfolio_meta_sub <- [,gv]
#
# portfolio_0 <- portfolio %>%
# mutate(country_group = "meta",
# investor_name = "meta",
# portfolio_name = "meta")
#
# portfolio_1 <- portfolio %>%
# group_by(country_group) %>%
# mutate(investor_name = paste(country_group, "meta"),
# portfolio_name = paste(country_group, "meta"))
#
# portfolio_2 <- portfolio %>%
# group_by(country_group, investor_name) %>%
# mutate(portfolio_name = paste(investor_name, "meta"))
#
# portfolio_all <- bind_rows(portfolio,portfolio_0, portfolio_1, portfolio_2)
#
# }
if (inc_meta_portfolio) {
portfolio_meta$portfolio_name <- meta_portfolio_name
portfolio_meta$investor_name <- meta_investor_name
portfolio <- rbind(portfolio, portfolio_meta)
}
portfolio
}
add_holding_id <- function(portfolio) {
if (length(setdiff("holding_id", names(portfolio))) != 0) {
portfolio$holding_id <- row.names(portfolio)
}
portfolio
}
check_missing_cols <- function(portfolio, grouping_variables) {
required_input_cols <- c("holding_id", "market_value", "currency", "isin", grouping_variables, "number_of_shares")
if (!"number_of_shares" %in% colnames(portfolio)) {
portfolio$number_of_shares <- NA_real_
}
missing_columns <- setdiff(required_input_cols, colnames(portfolio))
if (length(missing_columns) > 0) {
write_log(
msg = paste0("The input file is missing the following data columns: ", missing_columns),
file_path = log_path
)
stop(paste0("The input file is missing the following data columns: ", missing_columns))
}
portfolio <- as_tibble(portfolio)
portfolio
}
set_currency_timestamp <- function(currencies) {
currencies <- currencies %>% mutate(ExchangeRate_2019Q2 = 1)
currencies <- currencies %>%
select(Currency_abbr, paste0("ExchangeRate_", financial_timestamp)) %>%
filter(!is.na(Currency_abbr), Currency_abbr != "") %>%
distinct()
names(currencies) <- c("currency", "exchange_rate")
currencies$exchange_rate <- as.numeric(currencies$exchange_rate)
currencies
}
### Fin data cleaning functions
map_security_sectors <- function(fin_data, sector_bridge) {
initial_no_rows <- nrow(fin_data)
fin_data <- fin_data %>%
left_join(sector_bridge %>% filter(source == "BICS") %>% select(-source),
by = c("security_bics_subgroup" = "industry_classification")
) %>%
mutate(security_icb_subsector = as.character(security_icb_subsector))
fin_data_na <- fin_data %>%
filter(is.na(sector)) %>%
select(-c(sector,sector_boe,subsector_boe,sector_dnb,sector_ipr,subsector_ipr))
fin_data <- fin_data %>% filter(!is.na(sector))
fin_data_na <- fin_data_na %>% left_join(sector_bridge %>% filter(source == "ICB") %>% select(-source),
by = c("security_icb_subsector" = "industry_classification")
)
fin_data <- fin_data %>% bind_rows(fin_data_na)
fin_data <- fin_data %>%
select(-security_mapped_sector) %>%
rename(security_mapped_sector = sector)
fin_data %>%
group_by(security_mapped_sector) %>%
filter(is.na(security_mapped_sector)) %>%
summarise(count = n())
fin_data_na <- fin_data %>% filter(is.na(security_mapped_sector))
if (nrow(fin_data) != initial_no_rows) {
stop("Rows being dropped in mapping sectors")
}
return(fin_data)
}
map_comp_sectors <- function(comp_fin_data, sector_bridge) {
initial_no_rows <- nrow(comp_fin_data)
comp_fin_data <- comp_fin_data %>% left_join(sector_bridge %>% filter(source == "BICS") %>% select(-source),
by = c("bics_subgroup" = "industry_classification")
)
comp_fin_data_na <- comp_fin_data %>%
filter(is.na(sector)) %>%
select(-sector)
comp_fin_data <- comp_fin_data %>% filter(!is.na(sector))
comp_fin_data_na <- comp_fin_data_na %>% left_join(sector_bridge %>% filter(source == "ICB") %>% select(-source),
by = c("icb_subgroup" = "industry_classification")
)
comp_fin_data <- comp_fin_data %>% bind_rows(comp_fin_data_na)
comp_fin_data <- comp_fin_data %>%
select(-financial_sector) %>%
rename(financial_sector = sector)
comp_fin_data %>%
group_by(financial_sector) %>%
filter(is.na(financial_sector)) %>%
summarise(count = n())
comp_fin_data_na <- comp_fin_data %>% filter(is.na(financial_sector))
if (nrow(comp_fin_data) != initial_no_rows) {
stop("Rows being dropped in mapping sectors")
}
return(comp_fin_data)
}
override_sector_classification <- function(fin_data, overrides) {
start_rows <- nrow(fin_data)
overrides <- overrides %>%
mutate_at(vars(company_name, corporate_bond_ticker, fin_sector_override), list(as.character))
overrides$sector_override <- TRUE
# Merge in by company corp ticker
overrides_cbt <- overrides %>%
filter(corporate_bond_ticker != "", !is.na(corporate_bond_ticker)) %>%
select(corporate_bond_ticker, fin_sector_override, sector_override) %>%
distinct()
fin_data <- left_join(fin_data, overrides_cbt, by = "corporate_bond_ticker")
# Merge in by bloomberg_id
overrides_bbg <- overrides %>%
filter(is.na(corporate_bond_ticker) | corporate_bond_ticker == "") %>%
select(bloomberg_id, fin_sector_override, sector_override) %>%
distinct()
fin_data <- left_join(fin_data, overrides_bbg, by = "bloomberg_id")
# Clean resulting financial data
fin_data <- fin_data %>%
mutate(
sector_override = sector_override.x,
sector_override = if_else(sector_override.y != "" & !is.na(sector_override.y), sector_override.y, sector_override),
fin_sector_override = fin_sector_override.x,
fin_sector_override = if_else(!is.na(fin_sector_override.y) & fin_sector_override.y != "", fin_sector_override.y, fin_sector_override),
sector_override = if_else(is.na(sector_override), FALSE, TRUE)
) %>%
select(-sector_override.x, -sector_override.y, -fin_sector_override.x, -fin_sector_override.y)
fin_data <- fin_data %>%
mutate(security_mapped_sector = if_else(sector_override, fin_sector_override, security_mapped_sector)) %>%
select(-fin_sector_override)
if (nrow(fin_data) != start_rows) {
stop("Additional rows being added by fin sector override")
}
fin_data
}
check_asset_types <- function(fin_data) {
fin_data <- fin_data %>%
mutate(
asset_type = if_else(asset_type == "Other", "Others", asset_type),
asset_type = if_else(is.na(asset_type), "Others", asset_type),
)
fin_data$asset_type <- first_char_up(fin_data$asset_type)
### TEST
# if (!any(unique(fin_data$asset_type) %in% allowable_asset_list())) {
# stop("Check Financial Data Asset Types")
# }
fin_data
}
check_mapped_assets_flag <- function(fin_data) {
# convert old naming of "mapped to assets" column to be mapped_to_assets
if ("EQ.mapped_to_assets" %in% colnames(fin_data) | "CB.mapped_to_assets" %in% colnames(fin_data) | "has_prod_after_2018" %in% colnames(fin_data)) {
if ("EQ.mapped_to_assets" %in% colnames(fin_data) | "CB.mapped_to_assets" %in% colnames(fin_data)) {
fin_data <- fin_data %>%
mutate(
mapped_to_assets = case_when(
Asset.Type == "Equity" ~ EQ.mapped_to_assets,
Asset.Type == "Bonds" ~ CB.mapped_to_assets,
TRUE ~ 0
)
) %>%
select(-CB.mapped_to_assets, -EQ.mapped_to_assets)
} else if ("has_prod_after_2018" %in% colnames(fin_data)) {
fin_data <- fin_data %>%
mutate(
mapped_to_assets = has_prod_after_2018
) %>%
select(-has_prod_after_2018)
}
}
unique(fin_data$mapped_to_assets)
# Ensure that flag is a logical
fin_data <- fin_data %>%
mutate(mapped_to_assets = case_when(
mapped_to_assets %in% c("t", 1) ~ TRUE,
mapped_to_assets %in% c("f", 0) ~ FALSE
))
### TEST
any(!fin_data$mapped_to_assets %in% c(TRUE, FALSE))
###
fin_data
}
check_fin_mapped_sectors <- function(fin_data) {
fin_data <- fin_data %>%
mutate(security_mapped_sector = case_when(
security_mapped_sector == "Others" ~ "Other",
security_mapped_sector == "OIl&Gas" ~ "Oil&Gas",
is.na(security_mapped_sector) ~ "Other",
TRUE ~ security_mapped_sector
))
actual_sectors <- unique(fin_data$security_mapped_sector)
if (any(!actual_sectors %in% c(sector_list, pacta_sectors_not_analysed, "Other"))) {
stop("Additional Sectors in fin_data")
}
fin_data
}
convert_corporate_bonds <- function(fin_data) {
cb_groups <- c("Convertible Bonds", "Corporate Bonds", "Corporate inflation linked Bonds", "Convertible Preferreds")
fin_data <- fin_data %>%
mutate(
asset_type = if_else(security_type %in% cb_groups, "Bonds", asset_type),
asset_type = if_else(!security_type %in% cb_groups & asset_type == "Bonds", "Others", asset_type),
)
fin_data
}
identify_sb <- function(fin_data) {
sb_groups <- c("Sovereign Debt", "Sovereign Agency Debt", "Government inflation linked Bonds", "Sovereign", "Sovereign Agency", "Sovereigns")
fin_data <- fin_data %>%
mutate(is_sb = case_when(
security_type %in% sb_groups ~ TRUE,
security_bics_subgroup %in% sb_groups ~ TRUE,
TRUE ~ FALSE
))
fin_data
}
classify_all_funds <- function(fin_data) {
nrow(fin_data[fin_data$asset_type == "Funds", ])
fin_data <- fin_data %>%
mutate(asset_type = case_when(
grepl("Fund", security_type) ~ "Funds",
grepl("ETF", security_type) ~ "Funds",
grepl("Fund", security_bclass4) ~ "Funds",
grepl("ETF", security_bclass4) ~ "Funds",
grepl("Fund", security_icb_subsector) ~ "Funds",
grepl("ETF", security_icb_subsector) ~ "Funds",
TRUE ~ asset_type
))
### TEST?
fin_data
}
normalise_fund_data <- function(fund_data) {
if (data_check(fund_data)) {
fund_data <- fund_data %>%
group_by(fund_isin) %>%
mutate(total_weight = sum(isin_weight, na.rm = T))
fund_data_large <- fund_data %>%
group_by(fund_isin) %>%
filter(total_weight > 1) %>%
mutate(isin_weight = isin_weight / total_weight) %>%
select(-total_weight)
fund_data_small <- fund_data %>%
group_by(fund_isin) %>%
filter(total_weight <= 1) %>%
select(-total_weight)
fund_data_missing <- fund_data_small %>%
summarise(isin_weight = 1 - sum(isin_weight, na.rm = T)) %>%
mutate(holding_isin = "MissingValue")
fund_data <- bind_rows(fund_data_large, fund_data_small, fund_data_missing)
fund_data
} else {
stop("No fund data")
}
fund_data
}
### Portfolio Check Functions
convert_currencies <- function(portfolio, currencies) {
portfolio <- left_join(portfolio, currencies, by = "currency")
portfolio$value_usd <- portfolio$market_value * portfolio$exchange_rate
portfolio
}
add_fin_data <- function(portfolio, fin_data) {
left_join(portfolio, fin_data, by = "isin")
}
calculate_value_usd_with_fin_data <- function(portfolio) {
# check correct inputs
necessary_columns <- c("currency", "unit_share_price")
### TEST
if (!any(necessary_columns %in% colnames(portfolio))) {
stop("Portfolio not structured correctly")
}
# add missing currency for number of shares
portfolio <- portfolio %>%
mutate(currency = if_else(!is.na(number_of_shares), "USD", currency))
# calculates the value_usd where number of shares are given
portfolio <- portfolio %>%
mutate(value_usd = if_else(
asset_type %in% c("Equity", "Funds") & is.na(value_usd),
number_of_shares * unit_share_price,
value_usd
))
portfolio
}
identify_fund_portfolio <- function(portfolio) {
fund_portfolio <- portfolio %>% filter(asset_type == "Funds", !is.na(isin))
fund_portfolio
}
calculate_fund_portfolio <- function(fund_portfolio, fund_data, cols_portfolio_no_bbg = cols_portfolio, cols_funds = cols_of_funds) {
if (data_check(fund_portfolio)) {
fund_portfolio <- left_join(fund_portfolio, fund_data, by = c("isin" = "fund_isin"), all.x = T)
fund_portfolio$direct_holding <- FALSE
fund_portfolio$original_value_usd <- fund_portfolio$value_usd
fund_portfolio$value_usd <- fund_portfolio$isin_weight * fund_portfolio$value_usd
fund_portfolio$fund_isin <- fund_portfolio$isin
fund_portfolio$isin <- fund_portfolio$holding_isin
# If there is no fund breakdown available, return the "original isin data" to the original locations
fund_portfolio <- fund_portfolio %>%
mutate(
value_usd = if_else(!fund_isin %in% fund_data$fund_isin, original_value_usd, value_usd),
isin = if_else(!fund_isin %in% fund_data$fund_isin, fund_isin, isin),
direct_holding = if_else(!fund_isin %in% fund_data$fund_isin, TRUE, direct_holding),
)
} else {
fund_portfolio <- fund_portfolio %>% bind_cols(data.frame(direct_holding = integer(0), fund_isin = character(0), original_value_usd = numeric(0)))
}
fund_portfolio <- fund_portfolio %>% select(all_of(cols_portfolio_no_bbg), all_of(cols_funds))
fund_portfolio
}
add_fund_portfolio <- function(portfolio, fund_portfolio, cols_of_funds) {
# Remove the fund lines from the portfolio
portfolio_no_funds <- portfolio %>% filter(!isin %in% fund_portfolio$fund_isin)
# Check that there are the correct number of isins in both portfolios
if (nrow(portfolio_no_funds) + length(unique(fund_portfolio$holding_id)) != nrow(portfolio)) {
stop("Something unexpected with fund portfolio merge")
}
# Add additional fund relevant lines to original portfolio
portfolio_no_funds <- portfolio_no_funds %>%
mutate(
direct_holding = TRUE,
fund_isin = NA,
original_value_usd = value_usd
)
# select same columns for both portfolios
portfolio_no_funds <- portfolio_no_funds %>% select(colnames(portfolio), all_of(cols_of_funds))
fund_portfolio <- fund_portfolio %>% select(colnames(portfolio), all_of(cols_of_funds))
if (!identical(colnames(portfolio_no_funds), colnames(fund_portfolio))) {
stop("Colnames not equal, funds vs no funds")
}
# Merge in the results
portfolio_total <- rbind(portfolio_no_funds, fund_portfolio)
portfolio_total <- as_tibble(portfolio_total)
portfolio_total
}
# Add Columns for missing or incorrect information
check_isin_format <- function(portfolio_total) {
portfolio_total <- portfolio_total %>%
mutate(has_valid_isin = case_when(
nchar(isin) != 12 ~ FALSE,
isin == "" ~ FALSE,
is.na(isin) ~ FALSE,
grepl("[^[:alnum:]]", isin) ~ FALSE,
TRUE ~ TRUE
))
portfolio_total
}
check_missing_currency <- function(portfolio_total) {
# Currency blank or not in our currency data frame
portfolio_total <- portfolio_total %>%
mutate(has_currency = case_when(
is.na(currency) ~ FALSE,
currency == "" ~ FALSE,
!currency %in% currencies$currency ~ FALSE,
TRUE ~ TRUE
))
portfolio_total
}
check_valid_input_value <- function(portfolio_total) {
# Currency negative or missing market value/number of shares
portfolio_total <- portfolio_total %>%
mutate(has_valid_input = case_when(
is.na(market_value) & is.na(number_of_shares) ~ FALSE,
market_value < 0 ~ FALSE,
number_of_shares < 0 ~ FALSE,
# !currency %in% currencies$currency ~ FALSE,
TRUE ~ TRUE
))
portfolio_total
}
check_valid_value_usd <- function(portfolio_total) {
portfolio_total <- portfolio_total %>%
mutate(
has_valid_value_usd = case_when(
is.na(value_usd) & is.na(number_of_shares) ~ FALSE,
value_usd < 0 ~ FALSE,
number_of_shares < 0 ~ FALSE,
TRUE ~ TRUE
)
)
portfolio_total
}
check_bloomberg_data <- function(portfolio_total) {
portfolio_total <- portfolio_total %>%
mutate(has_bbg_data = case_when(
(asset_type == "Equity" | asset_type == "Unclassifiable") & (is.na(bloomberg_id) | bloomberg_id == "") ~ FALSE,
(asset_type == "Bonds" | asset_type == "Unclassifiable") & (is.na(corporate_bond_ticker) | corporate_bond_ticker == "") ~ FALSE,
(asset_type == "" | asset_type == "Unclassifiable") ~ FALSE,
is.na(asset_type) ~ FALSE,
TRUE ~ TRUE
))
portfolio_total
}
add_flags <- function(portfolio) {
portfolio <- portfolio %>%
mutate(flag = case_when(
!has_currency ~ "Missing currency information",
!has_valid_input ~ "Negative or missing input value",
!has_valid_isin ~ "Invalid or missing ISIN",
!has_bbg_data ~ "Holding not in Bloomberg database",
TRUE ~ "Included in analysis"
))
portfolio
}
overall_validity_flag <- function(portfolio_total) {
portfolio_total <- portfolio_total %>%
mutate(valid_input = case_when(
!has_currency ~ FALSE,
!has_bbg_data ~ FALSE,
!has_valid_input ~ FALSE,
!has_valid_isin ~ FALSE,
TRUE ~ TRUE
))
portfolio_total
}
create_ald_flag <- function(portfolio, comp_fin_data, debt_fin_data) {
portfolio_eq <- portfolio %>% filter(asset_type == "Equity")
portfolio_cb <- portfolio %>% filter(asset_type == "Bonds")
portfolio_other <- portfolio %>% filter(!asset_type %in% c("Equity", "Bonds"))
portfolio_eq <- check_for_ald(portfolio_eq, "Equity", comp_fin_data)
portfolio_cb <- check_for_ald(portfolio_cb, "Bonds", debt_fin_data)
if (data_check(portfolio_other)) {
portfolio_other <- portfolio_other %>% mutate(
has_asset_level_data = NA,
sectors_with_assets = NA,
has_ald_in_fin_sector = NA
)
} else {
portfolio_other <- portfolio_other %>% add_column("has_asset_level_data", "sectors_with_assets", "has_ald_in_fin_sector")
}
portfolio <- rbind(portfolio_eq, portfolio_cb, portfolio_other)
}
check_for_ald <- function(portfolio_subset, portfolio_type, relevant_fin_data) {
if (data_check(portfolio_subset)) {
initial_port_value <- sum(portfolio_subset$value_usd, na.rm = T)
if (portfolio_type == "Equity") {
joining_id <- "company_id"
} else if (portfolio_type == "Bonds") {
joining_id <- "corporate_bond_ticker"
}
ald_markers <- relevant_fin_data %>%
select(all_of(joining_id), has_asset_level_data, sectors_with_assets) %>%
distinct()
portfolio_subset <- left_join(portfolio_subset, ald_markers, by = joining_id)
portfolio_subset <- portfolio_subset %>%
rowwise() %>%
mutate(has_ald_in_fin_sector = if_else(grepl(financial_sector, sectors_with_assets), TRUE, FALSE)) %>%
ungroup()
if (sum(portfolio_subset$value_usd, na.rm = T) != initial_port_value) {
stop("Merge over company id changes portfolio value")
}
} else {
portfolio_subset <- portfolio_subset %>% add_column("has_asset_level_data", "sectors_with_assets", "has_ald_in_fin_sector")
}
return(portfolio_subset)
}
add_revenue_split <- function(has_revenue, portfolio, revenue_data) {
if (has_revenue) {
revenue_data_min <- revenue_data %>%
filter(!is.na(company_id)) %>%
select(-company_name, -equity_ticker, -corporate_bond_ticker, -bloomberg_id)
initial_portfolio_value <- sum(portfolio$value_usd, na.rm = T)
port_rev <- left_join(portfolio, revenue_data_min, by = "company_id", all.x = T)
# Fill in gaps where possible
port_rev <- port_rev %>%
mutate(
has_revenue_data = if_else(is.na(has_revenue_data), FALSE, has_revenue_data),
tot_rev = if_else(is.na(tot_rev), 1, tot_rev),
revenue_sector = if_else(is.na(revenue_sector), "Other", revenue_sector),
value_usd = value_usd * tot_rev
) %>%
rename(financial_sector = revenue_sector)
if (sum(port_rev$value_usd, na.rm = T) != initial_portfolio_value) {
stop("Revenue data causing duplications")
}
} else {
port_rev <- portfolio %>%
mutate(
has_revenue_data = FALSE,
financial_sector = security_mapped_sector
)
}
return(port_rev)
}
calculate_number_of_shares <- function(portfolio) {
portfolio <- portfolio %>%
mutate(number_of_shares = ifelse(is.na(number_of_shares) & asset_type == "Equity", value_usd / unit_share_price, number_of_shares))
return(portfolio)
}
create_id_columns <- function(portfolio, portfolio_type) {
if (portfolio_type == "Equity") {
portfolio <- portfolio %>%
rename(id = bloomberg_id) %>%
mutate(
id_name = "bloomberg_id",
id = as.character(id)
)
}
if (portfolio_type == "Bonds") {
portfolio <- portfolio %>%
rename(id = corporate_bond_ticker) %>%
mutate(
id_name = "corporate_bond_ticker",
id = as.character(id)
)
}
return(portfolio)
}
# FINAL SCRIPTS
get_and_clean_total_fund_list_data <- function() {
total_fund_list <- read_rda("data/total_fund_list.rds")
total_fund_list
}
get_and_clean_currency_data <- function() {
currencies <- readRDS("data/currencies.rds")
currencies <- set_currency_timestamp(currencies)
if (all(currencies$exchange_rate) == 1){print("currency data temporary. todo: update")}
currencies
}
get_and_clean_fund_data <- function() {
fund_data <- NA
# Fund Data
if (file.exists(paste0(analysis_inputs_path, "/fund_data_", financial_timestamp, ".rda"))) {
fund_data <- readRDS(paste0(analysis_inputs_path, "/fund_data_", financial_timestamp, ".rda"))
} else if (file.exists(paste0(analysis_inputs_path, "/fund_data_2018Q4.rda"))) {
fund_data <- readRDS(paste0(analysis_inputs_path, "/fund_data_2018Q4.rda"))
print("Old Fund Data being used. Replace FundsData2018Q4 or check name of file.")
} else if (file.exists(paste0(analysis_inputs_path, "/SFC_26052020_funds.csv"))) {
fund_data <- read_csv(paste0(analysis_inputs_path, "/SFC_26052020_funds.csv"))
print("2020Q2 SFC fund data being used")
} else {
if (!data_check(fund_data)) {
warning("No fund data available")
}
}
if (data_check(fund_data)) {
fund_data <- fund_data %>% janitor::clean_names()
fund_data <- fund_data %>% filter(!is.na(holding_isin) & holding_isin != "")
fund_data$holding_isin <- as.character(fund_data$holding_isin)
fund_data$fund_isin <- as.character(fund_data$fund_isin)
fund_data <- normalise_fund_data(fund_data)
}
return(fund_data)
}
get_and_clean_fin_data <- function(fund_data) {
# Financial Data
fin_data_raw <- read_rda(paste0(analysis_inputs_path, "/security_financial_data.rda")) %>% as_tibble()
# remove unclear duplicates from raw financial data. This should be moved to DataStore.
rm_duplicates <- read_csv("non_distinct_isins.csv")
rm_duplicates <- rm_duplicates %>%
distinct(isin) %>%
pull(isin)
fin_data_raw <- fin_data_raw %>%
filter(!(isin %in% rm_duplicates))
if (!unique(fin_data_raw$financial_timestamp) == financial_timestamp) {
print("Financial timestamp not equal")
}
overrides <- read_csv("data/fin_sector_overrides.csv",
col_types = "ccdc"
)
sector_bridge <- read_csv("data/sector_bridge.csv", col_types = "cccccccc")
fin_data <- fin_data_raw
fin_data <- fin_data %>% filter(!is.na(isin))
fin_data <- map_security_sectors(fin_data, sector_bridge)
# Adds in the manual sector classification overrides
fin_data <- override_sector_classification(fin_data, overrides)
# Checks that only eq, cb, funds and others are in the fin_data
fin_data <- check_asset_types(fin_data)
# Checks for other mapped sectors not within the sector lists
fin_data <- check_fin_mapped_sectors(fin_data)
# TODO: find alternative here, calling in data from company financial data
# Cleans and normalises the mapped_to_assets flag
# fin_data <- check_mapped_assets_flag(fin_data)
# Limits the Bonds category to corporate bonds only
fin_data <- convert_corporate_bonds(fin_data)
# Checks whether the bond is sovereign or not
fin_data <- identify_sb(fin_data)
# Checks to ensure all finds are classified as such
fin_data <- classify_all_funds(fin_data)
fin_data <- add_bics_sector(fin_data)
# Select relevant columns
fin_data <- fin_data %>%
select(
company_id, company_name, bloomberg_id, corporate_bond_ticker,
country_of_domicile,
isin,
unit_share_price, exchange_rate_usd,
asset_type, security_type,
security_mapped_sector, security_icb_subsector, security_bics_subgroup, bics_sector, # bclass4,
maturity_date, coupon_value, amount_issued, current_shares_outstanding_all_classes, unit_share_price,
sector_override, sector_boe, subsector_boe, sector_dnb, sector_ipr, subsector_ipr,
is_sb
) %>%
distinct()
### TEST
if (nrow(fin_data) > nrow(fin_data_raw)) {
stop("Additional rows added to fin data")
}
return(fin_data)
}
add_bics_sector <- function(fin_data) {
bics_bridge <- read_csv("data/bics_bridge.csv")
fin_data <- left_join(fin_data, bics_bridge, by = c("security_bics_subgroup" = "bics_subgroup"))
}
get_and_clean_revenue_data <- function() {
revenue_data <- data.frame()
if (has_revenue) {
revenue_data <- read_rda(paste0(analysis_inputs_path, "/revenue_data_member_ticker.rda"))
# col_types = "dcdcclcd")
revenue_data <- revenue_data %>%
filter(tot_rev > 0) %>%
rename(revenue_sector = sector) %>%
ungroup()
}
return(revenue_data)
}
get_and_clean_company_fin_data <- function() {
comp_fin_data_raw <- read_rds(paste0(analysis_inputs_path, "/consolidated_financial_data.rda"))
comp_fin_data_raw <- comp_fin_data_raw %>% select(
company_id, company_name, bloomberg_id, country_of_domicile, corporate_bond_ticker, bics_subgroup, bics_sector,
icb_subgroup, financial_sector, has_asset_level_data, has_assets_in_matched_sector, sectors_with_assets, current_shares_outstanding_all_classes,
market_cap, bond_debt_out, financial_timestamp
)
sector_bridge <- read_csv("data/sector_bridge.csv", col_types = "cccccccc")
comp_fin_data <- map_comp_sectors(comp_fin_data_raw, sector_bridge)
return(comp_fin_data)
}
get_and_clean_debt_fin_data <- function() {
debt_fin_data_raw <- read_rds(paste0(analysis_inputs_path, "/debt_financial_data.rda"))
return(debt_fin_data_raw)
}
process_raw_portfolio <- function(portfolio_raw,
fin_data,
fund_data,
currencies,
grouping_variables,
total_fund_list = NULL) {
portfolio <- clean_colnames_portfolio_input_file(portfolio_raw)
portfolio <- clear_portfolio_input_blanks(portfolio)
portfolio <- add_meta_portfolio(portfolio, inc_meta_portfolio)
start_port_rows <- nrow(portfolio)
portfolio <- add_holding_id(portfolio)
portfolio <- check_missing_cols(portfolio, grouping_variables)
portfolio <- clean_portfolio_col_types(portfolio, grouping_variables)
portfolio <- convert_currencies(portfolio, currencies)
cols_portfolio <- colnames(portfolio)
cols_of_funds <- c("direct_holding", "fund_isin", "original_value_usd")
# Add financial data
# Merges in the clean data and calculates the marketvalue and number of shares
portfolio <- add_fin_data(portfolio, fin_data)
if (nrow(portfolio) != start_port_rows) {
stop("Portfolio lines changing unexpectedly")
}
portfolio <- calculate_value_usd_with_fin_data(portfolio)
portfolio <- calculate_number_of_shares(portfolio)
original_value_usd <- sum(portfolio$value_usd, na.rm = T)
# correct Funds classification by comparing isin to the list of all known funds isins
if (!is.null(total_fund_list)) {
portfolio <- portfolio %>%
mutate(asset_type = ifelse(
is.element(isin, total_fund_list$fund_isin), "Funds", asset_type
))
}
# identify fund in the portfolio
fund_portfolio <- identify_fund_portfolio(portfolio)
if (data_check(fund_data)) {
# Creates the fund_portfolio to match the original portfolio
fund_portfolio <- calculate_fund_portfolio(fund_portfolio, fund_data, cols_portfolio, cols_of_funds)
# Merges in the bbg data to the fund portfolio
fund_portfolio <- add_fin_data(fund_portfolio, fin_data)
# add fund_portfolio and check that the total value is the same
portfolio_total <- add_fund_portfolio(portfolio, fund_portfolio, cols_of_funds)
} else {
portfolio_total <- as_tibble(portfolio)
portfolio_total$direct_holding <- TRUE
}
portfolio_total <- clean_unmatched_holdings(portfolio_total)
if (!all.equal(sum(portfolio_total$value_usd, na.rm = TRUE), original_value_usd, tolerance = 1e-3)) {
stop("Fund Portfolio introducing errors in total value")
}
### TODO
# summarise fund results
# identify missing funds and isins
###
return(portfolio_total)
}
get_fund_coverage <- function(portfolio_raw,
fin_data,
fund_data,
currencies,
grouping_variables) {
portfolio <- clean_colnames_portfolio_input_file(portfolio_raw)
portfolio <- clear_portfolio_input_blanks(portfolio)
portfolio <- add_meta_portfolio(portfolio, inc_meta_portfolio)
start_port_rows <- nrow(portfolio)
portfolio <- add_holding_id(portfolio)
portfolio <- check_missing_cols(portfolio, grouping_variables)
portfolio <- clean_portfolio_col_types(portfolio, grouping_variables)
portfolio <- convert_currencies(portfolio, currencies)
cols_portfolio <- colnames(portfolio)
cols_of_funds <- c("direct_holding", "fund_isin", "original_value_usd")
# Add financial data
# Merges in the clean data and calculates the marketvalue and number of shares
portfolio <- add_fin_data(portfolio, fin_data)
if (nrow(portfolio) != start_port_rows) {
stop("Portfolio lines changing unexpectedly")
}
portfolio <- calculate_value_usd_with_fin_data(portfolio)
portfolio <- calculate_number_of_shares(portfolio)
original_value_usd <- sum(portfolio$value_usd, na.rm = TRUE)
# identify funds in the portfolio
fund_portfolio <- identify_fund_portfolio(portfolio)
# the raw portfolio will be compared with the merged portfolio
fund_portfolio_raw <- fund_portfolio
if (data_check(fund_data)) {
# Creates the fund_portfolio to match the original portfolio
fund_portfolio <- calculate_fund_portfolio(fund_portfolio, fund_data, cols_portfolio, cols_of_funds)
# Merges in the bbg data to the fund portfolio
fund_portfolio <- add_fin_data(fund_portfolio, fin_data)
# add fund_portfolio and check that the total value is the same
portfolio_total <- add_fund_portfolio(portfolio, fund_portfolio, cols_of_funds)
} else {
portfolio_total <- as_tibble(portfolio)
portfolio_total$direct_holding <- TRUE
portfolio_total$fund_isin <- NA
}
fund_portfolio_total <- portfolio_total %>% filter(!is.na(fund_isin))
fund_portfolio_total_mapped_value_usd <- fund_portfolio_total %>%
group_by(holding_id) %>%
summarize(total_mapped_value_usd = sum(value_usd), .groups = "drop")
fund_portfolio_missing_value_usd <- fund_portfolio_total %>%
filter(nchar(isin)!=12) %>%
group_by(holding_id) %>%
summarize(missing_value_usd = sum(value_usd), .groups = "drop")
fund_portfolio_funds_in_funds_not_mapped_value_usd <- fund_portfolio_total %>%
filter(nchar(isin)==12 & asset_type == "Funds") %>%
group_by(holding_id) %>%
summarize(funds_in_funds_not_mapped = sum(value_usd), .groups = "drop")
fund_portfolio <- fund_portfolio_raw
fund_portfolio <- left_join(fund_portfolio, fund_portfolio_total_mapped_value_usd, by = "holding_id")
fund_portfolio$total_mapped_value_usd[is.na(fund_portfolio$total_mapped_value_usd)] <- 0
fund_portfolio <- left_join(fund_portfolio, fund_portfolio_missing_value_usd, by = "holding_id")
fund_portfolio$missing_value_usd[is.na(fund_portfolio$missing_value_usd)] <- 0
fund_portfolio <- left_join(fund_portfolio, fund_portfolio_funds_in_funds_not_mapped_value_usd, by = "holding_id")
fund_portfolio$funds_in_funds_not_mapped[is.na(fund_portfolio$funds_in_funds_not_mapped)] <- 0
fund_portfolio <- fund_portfolio %>% mutate(effective_coverage = (total_mapped_value_usd - missing_value_usd - funds_in_funds_not_mapped) / value_usd)
fund_portfolio <- fund_portfolio %>% mutate(fund_data_file_coverage = (total_mapped_value_usd) / value_usd)
fund_portfolio <- fund_portfolio %>% mutate(lipper_data_coverage = (total_mapped_value_usd - missing_value_usd) / total_mapped_value_usd)
fund_portfolio <- fund_portfolio %>% mutate(lost_coverage_fif = 1 - (total_mapped_value_usd - missing_value_usd - funds_in_funds_not_mapped) / (total_mapped_value_usd-missing_value_usd))
return(fund_portfolio)
}
summarize_fund_coverage <- function(fund_portfolio) {
fund_portfolio %>% select(investor_name,
portfolio_name,
isin,
value_usd,
company_name,
effective_coverage)
return(fund_portfolio)
}
list_unknown_funds_in_funds <- function(portfolio_total) {
fund_portfolio_total <- portfolio_total %>% filter(!is.na(fund_isin))
table_of_funds_in_funds_not_mapped <- fund_portfolio_total %>%
filter(nchar(isin)==12 & asset_type == "Funds") %>%
mutate(direct_holding="FALSE") %>%
select(investor_name,
portfolio_name,
isin,
value_usd,
company_name,
fund_isin) %>% rename(parent_fund_isin = fund_isin)
return(table_of_funds_in_funds_not_mapped)
}
add_portfolio_flags <- function(portfolio) {
### FLAGS/Exclusions
portfolio <- check_isin_format(portfolio)
portfolio <- check_missing_currency(portfolio)
portfolio <- check_valid_input_value(portfolio)
portfolio <- check_bloomberg_data(portfolio)
portfolio <- add_flags(portfolio)
portfolio <- overall_validity_flag(portfolio)
return(portfolio)
}
portfolio_summary <- function(portfolio_total) {
overview_data <- portfolio_total %>%
ungroup() %>%
group_by(!!!rlang::syms(grouping_variables), asset_type, financial_sector, valid_input) %>%
mutate(valid_value_usd = sum(value_usd, na.rm = T)) %>%
ungroup() %>%
group_by(!!!rlang::syms(grouping_variables), asset_type, valid_input) %>%
mutate(asset_value_usd = sum(value_usd, na.rm = T)) %>%
ungroup() %>%
group_by(!!!rlang::syms(grouping_variables), valid_input) %>%
mutate(portfolio_value_usd = sum(value_usd, na.rm = T)) %>%
ungroup() %>%
select(
!!!rlang::syms(grouping_variables), asset_type, financial_sector, valid_input,
valid_value_usd, asset_value_usd, portfolio_value_usd
) %>%
distinct()
return(overview_data)
}
create_merged_portfolio <- function(eq_portfolio, cb_portfolio) {
portfolio_ <- rbind(eq_portfolio, cb_portfolio)
portfolio_ <- portfolio_ %>% filter(!is.na(investor_name))
if (!data_check(portfolio_)) {
warning("No portfolio data")
}
return(portfolio_)
}
create_portfolio_subset <- function(portfolio, portfolio_type) {
if (portfolio_type %in% unique(portfolio$asset_type)) {
portfolio_subset <- portfolio %>%
ungroup() %>%
filter(asset_type == portfolio_type)
portfolio_subset <- create_id_columns(portfolio_subset, portfolio_type)
portfolio_subset <- portfolio_subset %>%
select(
all_of(grouping_variables), holding_id, value_usd, number_of_shares,
company_id, company_name, id, id_name, country_of_domicile, unit_share_price, current_shares_outstanding_all_classes,
financial_sector, has_ald_in_fin_sector, bics_sector
)
} else {
write_log(paste0("No ", portfolio_type, " in portfolio"))
portfolio_subset <- NA
}
return(portfolio_subset)
}
create_audit_chart <- function(audit_file, proc_input_path) {
## GRAPH VALUES
base_size <- 12
base_family <- "sans"
font_color <- "#3D3D3C"
# flag.Numbers.Colors <- brewer.pal(n = 8, name = "RdBu")
### red - "#B2182B" "#D6604D" "#F4A582" "#FDDBC7" "#D1E5F0" "#92C5DE" "#4393C3" "#2166AC" - blue
flag_numbers <- audit_file %>%
group_by(flag) %>%
summarise(n = n(), .groups = "drop_last") %>%
mutate(perc_n = n / sum(n))
flag_numbers_colors <- c(
"Included in analysis" = "#207ddb",
"Holding not in Bloomberg database" = "#AAA9AD",
"Missing currency information" = "#848689",
"Negative or missing input value" = "#5B676D",
"Invalid or missing ISIN" = "#2A3439"
)
flag_categories <- names(flag_numbers_colors)
flag_numbers$flag <- factor(flag_numbers$flag,
levels = flag_categories
)
y_intercept <- sum(flag_numbers$perc_n[flag_numbers$flag != "Included in analysis"])
nrows <- length(unique(flag_numbers$flag))
nrow_Legend <- ifelse(nrows <= 2, 3.2,
ifelse(nrows == 3 | nrows == 4, 3.7,
ifelse(nrows == 5, 3.8,
ifelse(nrows == 6, 4.0,
ifelse(nrows == 7, 4, 4.6)
)
)
)
)
## Chart
Chart_1 <- ggplot(flag_numbers, aes(x = "", y = perc_n, fill = flag)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = flag_numbers_colors) +
scale_y_continuous(
labels = percent_format(accuracy = 1), expand = c(0, 0.2),
# limits = c(0, 1),
breaks = c(0, 0.25, 0.5, 0.75, 1)
) +
scale_x_discrete(expand = c(0, 0, 0, 0.9)) +
ylab("By count of ISINs") +
theme_minimal() +
theme(
axis.text.x = element_text(family = base_family, size = base_size, colour = font_color),
axis.title = element_text(family = base_family, size = base_size, colour = font_color, hjust = 0.18),
legend.key.height = unit(0.5, "cm"),
legend.key.width = unit(0.5, "cm"),
legend.position = "bottom",
legend.text = element_text(
family = base_family, size = base_size, colour = font_color,
margin = margin(l = 5, r = 10, unit = "pt")
),
legend.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.background = element_rect(color = "NA", fill = "NA"),
plot.margin = unit(c(1, 0.1, 0.1, 0.1), "cm")
) +
guides(fill = guide_legend(reverse = TRUE, ncol = 1)) +
geom_hline(aes(yintercept = y_intercept), color = "#3D3D3C", size = 1, show.legend = FALSE) +
geom_text(aes(label = "Not included\nin analysis\n\n", x = 1.5, y_intercept - 0.02),
color = "#2e2e2e", family = base_family, size = base_size / 2.5, hjust = 1
) +
geom_text(aes(label = "Included\nin analysis\n\n", x = 1.5, y_intercept + 0.02),
color = "#207ddb", family = base_family, size = base_size / 2.5, hjust = 0
)
ggsave(paste0(proc_input_path, "/AuditChart.png"), Chart_1, height = nrow_Legend, width = 8)
}
create_audit_file <- function(portfolio_total) {
# portfolio_total <- left_join(portfolio_total, comp_fin_data %>% select(company_id, sectors_with_assets, bics_sector), by = "company_id")
#
# portfolio_total <- portfolio_total %>% rowwise() %>%
# mutate(has_assets = ifelse(is.na(sectors_with_assets), TRUE, FALSE),
# has_assets_in_fin_sector = grepl(pattern = financial_sector, x = sectors_with_assets)
# )
audit_file <- portfolio_total %>%
select(
all_of(grouping_variables), holding_id, isin, value_usd, company_name, asset_type, has_revenue_data, valid_input,
direct_holding, financial_sector, bics_sector, sectors_with_assets, has_ald_in_fin_sector, flag
)
if (has_revenue == FALSE) {
audit_file <- audit_file %>% select(-has_revenue_data)
}
return(audit_file)
}
clean_unmatched_holdings <- function(portfolio) {
portfolio %>%
mutate(asset_type = ifelse(is.na(security_mapped_sector), "Unclassifiable", asset_type)) %>%
mutate(security_mapped_sector = ifelse(is.na(security_mapped_sector), "Unclassifiable", security_mapped_sector))
}
### Emissions work
calculate_average_portfolio_emissions <- function(portfolio_total,
comp_fin_data,
average_sector_intensity) {
min_portfolio <- portfolio_total %>%
select(
investor_name,
portfolio_name,
financial_sector,
security_mapped_sector,
company_id,
value_usd,
asset_type
) %>%
filter(asset_type %in% c("Equity", "Bonds")) %>%
left_join(
select(comp_fin_data, company_id, bics_sector),
by = "company_id"
) %>%
group_by(
investor_name,
portfolio_name,
asset_type,
financial_sector,
bics_sector
) %>%
summarise(value_usd = sum(value_usd, na.rm = T), .groups = "drop") %>%
left_join(
select(
average_sector_intensity,
bics_sector,
asset_type,
median_intensity,
mean_intensity
),
by = c("bics_sector", "asset_type")
)
# Create averages where bics sectors were missing
average_other_sectors <- min_portfolio %>%
group_by(asset_type, financial_sector) %>%
summarise(
mean_intensity = mean(mean_intensity, na.rm = T),
.groups = "drop"
)
min_portfolio <- min_portfolio %>%
left_join(
average_other_sectors,
by = c("asset_type", "financial_sector")
) %>%
mutate(
mean_intensity = ifelse(
is.na(mean_intensity.x),
mean_intensity.y,
mean_intensity.x
)
) %>%
select(-mean_intensity.x, -mean_intensity.y) %>%
mutate(weighted_sector_emissions = value_usd * mean_intensity)
min_portfolio <- min_portfolio %>%
mutate(
sector = ifelse(
financial_sector != "Other",
financial_sector,
bics_sector
),
sector = ifelse(
is.na(sector),
"Other",
sector
)
) %>%
group_by(investor_name, portfolio_name, asset_type,sector) %>%
summarise(
weighted_sector_emissions = sum(weighted_sector_emissions, na.rm = T),
.groups = "drop"
)
return(min_portfolio)
}
get_average_emission_data <- function(inc_emission_factors) {
average_sector_intensity <- data.frame()
if (inc_emission_factors) {
average_sector_intensity <- read_rda(file.path(analysis_inputs_path, "average_sector_intensity.rda"))
}
return(average_sector_intensity)
}
get_company_emission_data <- function(inc_emission_factors) {
company_emissions <- data.frame()
if (inc_emission_factors) {
company_emissions <- read_rda(file.path(analysis_inputs_path, "company_emissions.rda"))
}
return(company_emissions)
}
prepare_portfolio_emissions <- function(
audit_file,
fin_data,
comp_fin_data,
average_sector_intensity,
company_emissions) {
audit_file <- audit_file %>%
janitor::clean_names(case = "snake")
# prepare sector view
company_bics_sector <- comp_fin_data %>%
distinct(
company_id,
bics_sector,
bics_subgroup,
financial_sector
) %>%
filter(!is.na(company_id))
# create audit view
audit_file_view <- audit_file %>%
distinct(
holding_id,
portfolio_name,
investor_name,
isin,
value_usd
)
# connect audit to company ids
audit_file_view <- fin_data %>%
distinct(
isin,
company_id,
company_name,
asset_type
) %>%
inner_join(
audit_file_view,
by = "isin"
)
# connect to consolidated financial data
audit_file_view <- comp_fin_data %>%
distinct(
company_id,
market_cap,
bond_debt_out
) %>%
inner_join(
audit_file_view,
by = "company_id"
)
# first try connecting at the company level
audit_company_emissions <- audit_file_view %>%
inner_join(
company_emissions,
# consolidated_company_emissions,
by = c(
"company_id",
"company_name"
)
)
# specify source
audit_company_emissions <- audit_company_emissions %>%
mutate(estimation_source = "Company data")
# fix sectors
# audit_company_emissions <- audit_company_emissions %>%
# mutate(ald_sector = ifelse(ald_sector %in% c("Cement", "Steel"), "Cement&Steel", ald_sector))
#
# save output
# save(
# audit_company_emissions,
# file = path(here(), "EF", "Output", "sample_audit_company_emissions", ext = "rda")
# )
# create clean view
audit_company_emissions <- audit_company_emissions %>%
distinct(
investor_name,
portfolio_name,
holding_id,
isin,
value_usd,
company_id,
company_name,
asset_type,
ald_sector,
bics_sector,
bics_subgroup,
financial_sector,
emissions,
estimation_source
)
# then try connecting at the sector level
audit_file_sector <- audit_file_view %>%
anti_join(
audit_company_emissions,
by = "company_id"
)
audit_file_sector <- audit_file_sector %>%
inner_join(
company_bics_sector,
by = "company_id"
)
audit_sector_emissions <- audit_file_sector %>%
inner_join(
average_sector_intensity,
by = c(
"bics_sector",
"asset_type"
)
)
# calculate absolute emissions
audit_sector_emissions <- audit_sector_emissions %>%
mutate(
emissions = case_when(
asset_type == "Bonds" ~ bond_debt_out * mean_intensity,
asset_type == "Equity" ~ market_cap * mean_intensity
)
)
# flagging issues
audit_sector_emissions <- audit_sector_emissions %>%
mutate(
issue = case_when(
is.na(bond_debt_out) & asset_type == "Bonds" ~ "Missing debt outstanding",
is.na(market_cap) & asset_type == "Equity" ~ "Missing market capitilization"
)
)
# specify source
audit_sector_emissions <- audit_sector_emissions %>%
mutate(estimation_source = "Sector average")
# create clean view
audit_sector_emissions <- audit_sector_emissions %>%
distinct(
investor_name,
portfolio_name,
holding_id,
isin,
value_usd,
company_id,
company_name,
asset_type,
bics_sector,
bics_subgroup,
financial_sector,
emissions,
estimation_source,
issue
)
# joining both sources together
bind_rows(
audit_sector_emissions,
audit_company_emissions
)
}
calculate_portfolio_emissions <- function(
inc_emission_factors,
audit_file,
fin_data,
comp_fin_data,
average_sector_intensity,
company_emissions) {
audit_sector_emissions <- data.frame()
if (inc_emission_factors) {
audit_emissions <- prepare_portfolio_emissions(
audit_file,
fin_data,
comp_fin_data,
average_sector_intensity,
company_emissions
)
# calculate holding weight
audit_emissions <- audit_emissions %>%
group_by(
portfolio_name,
investor_name
) %>%
mutate(weighting = value_usd / sum(value_usd, na.rm = TRUE))
# weight emissions by holding weight
audit_emissions <- audit_emissions %>%
mutate(weighted_emissions = weighting * emissions)
audit_emissions <- add_other_to_sector_classifications(audit_emissions)
# sum weighted emissions
audit_sector_emissions <- audit_emissions %>%
group_by(
portfolio_name,
investor_name,
asset_type,
sector
) %>%
summarise(
value_usd = sum(value_usd, na.rm = TRUE),
weighted_sector_emissions = sum(weighted_emissions, na.rm = TRUE),
.groups = "drop_last"
)
}
audit_sector_emissions
}
add_other_to_sector_classifications <- function(audit) {
# fix sector classifications
audit <- audit %>%
mutate(ald_sector = ifelse(financial_sector != "Other" & is.na(ald_sector), financial_sector, ald_sector))
# create final sector grouping
audit <- audit %>%
mutate(sector = ifelse(!is.na(ald_sector), ald_sector, bics_sector))
# modify sector names
audit <- audit %>%
mutate(sector = ifelse(sector %in% c("Industrials", "Energy", "Utilities", "Materials"), paste0("Other ", sector), sector))
audit
}
pw_calculations <- function(eq_portfolio, cb_portfolio){
port_all <- data.frame()
if(data_check(eq_portfolio)){
port_all <- bind_rows(port_all, eq_portfolio)
}
if(data_check(cb_portfolio)){
port_all <- bind_rows(port_all, cb_portfolio)
}
if(data_check(port_all)){
port_all <- port_all %>% select(!!!rlang::syms(grouping_variables),company_id, value_usd)
port_all <- calculate_port_weight(port_all, grouping_variables)
pw <- port_all %>%
group_by(!!!rlang::syms(grouping_variables), company_id) %>%
summarise(port_weight = sum(port_weight, na.rm = T), .groups = "drop") %>%
select(company_id, port_weight) %>%
rename(portfolio_weight = port_weight)
}else{
pw <- data.frame(company_id = "No companies in portfolio", portfolio_weight = "0")
}
return(pw)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.