The relevant issue is this, and the code that CJ showed today is below.
library(dplyr)
library(dbplyr)
library(readr)
library(fs)
library(tictoc)
# Connection functions
connect_datastore <-
function(data_quarter = '_2019q4') {
require(DBI)
require(RPostgres)
if (is.null(getOption('2dii_datastore_pwd'))) {
set_datastore_pwd()
}
DBI::dbConnect(
drv = RPostgres::Postgres(),
dbname = 'twodii',
host = 'twodii-gocd.westeurope.cloudapp.azure.com',
user = 'twodii-reader',
password = getOption('2dii_datastore_pwd'),
options = paste0('-c search_path=', data_quarter, ',public')
)
}
set_datastore_pwd <-
function() {
require(rstudioapi)
options('2dii_datastore_pwd' = rstudioapi::askForPassword('Database password'))
}
# Access
dropbox_path <- fs::path('~/Dropbox (2° Investing)')
project_path_rds <- fs::path('PortCheck/00_Data/07_AnalysisInputs/2019Q4_250220')
fin_data_rds <- read_rds(fs::path(dropbox_path, project_path_rds, 'security_financial_data.rda'))
fin_data_rds
dropbox_path <- fs::path('~/Dropbox (2° Investing)')
project_path_csv <- fs::path('PortCheck/00_Data/06_DataStore/2019Q4_export_04232020/2019Q4')
fin_data_csv <- read_csv(fs::path(dropbox_path, project_path_csv, 'security_financial_data.csv'))
fin_data_csv
datastore <- connect_datastore(data_quarter = '_2019q4')
fin_data_db <- tbl(datastore, 'security_financial_data')
fin_data_db
# Advantages:
# 1. Doesn't depend on the name of the Dropbox folder
# 2. Don't have to figure out the proper filepath
# 3. Don't have to worry about cross-platform filepath formatting
# 4. Don't have to be connected to Dropbox at all
# 5. Don't have to worry about column types (read_csv)
# 6. Explicit paths make packaging the code for other uses very difficult
# Disadvantage:
# 1. Does require internet access (potentially worked around by implementing
# a caching system)
# Speed
tic()
fin_data_rds <- read_rds(fs::path(dropbox_path, project_path_rds, 'security_financial_data.rda'))
fin_data_rds
toc()
tic()
fin_data_csv <- read_csv(fs::path(dropbox_path, project_path_csv, 'security_financial_data.csv'))
fin_data_csv
toc()
tic()
fin_data_db <- tbl(datastore, 'security_financial_data')
fin_data_db
toc()
tic()
fin_data_db <- tbl(datastore, 'security_financial_data')
fin_data_db %>% collect()
toc()
tic()
fin_data_db <- tbl(datastore, 'security_financial_data')
fin_data_db %>%
select(company_id, bloomberg_id, company_name, isin, security_bics_subgroup) %>%
collect()
toc()
# Disadvantage:
# 1. Has to download the data over the internet, but... if you specify only
# the parts of the data you need, it can still be pretty fast, even faster
# than reading in the whole local file under certain circumstances
# PACTA analysis
# get_and_clean_fin_data()
source('simulate_portcheck_code.R')
tic()
fin_data_rds <- read_rds(fs::path(dropbox_path, project_path_rds, 'security_financial_data.rda'))
clean_fin_data(fin_data_rds)
toc()
tic()
fin_data_csv <- read_csv(fs::path(dropbox_path, project_path_csv, 'security_financial_data.csv'))
clean_fin_data(fin_data_csv)
toc()
tic()
fin_data_db <- tbl(datastore, 'security_financial_data')
clean_fin_data(fin_data_db)
toc()
tic()
fin_data_db <- tbl(datastore, 'security_financial_data')
clean_fin_data(fin_data_db) %>% collect()
toc()
# Advanced usage
fin_data_db <- tbl(datastore, 'security_financial_data')
bics_sub_lookup <- fin_data_db %>% select(isin, security_bics_subgroup)
company_name_lookup <- fin_data_db %>% select(isin, company_name)
coupon_value_lookup <- fin_data_db %>% select(isin, coupon_value)
bics_sub_lookup
company_name_lookup
coupon_value_lookup
bics_sub_lookup %>% collect()
company_name_lookup %>% collect()
coupon_value_lookup %>% collect()
isins <- fin_data_db %>% select(isin) %>% collect() %>% filter(!is.na(isin))
isins <- isins[sample(1:nrow(isins), 20), ]
isins
isins %>% left_join(bics_sub_lookup, copy = TRUE) %>% print(n = 20)
isins %>% left_join(company_name_lookup, copy = TRUE) %>% print(n = 20)
isins %>% left_join(coupon_value_lookup, copy = TRUE) %>% print(n = 20)
isins %>% left_join(fin_data_db %>% select(isin, company_name), copy = TRUE) %>% print(n = 20)
# disconnet!!!
dbDisconnect(datastore)
library(readr)
library(dplyr)
overrides <-
read_csv("https://raw.githubusercontent.com/2DegreesInvesting/PACTA_analysis/master/data/fin_sector_overrides.csv", col_types = "ccdc") %>%
mutate_at(vars(company_name, corporate_bond_ticker,fin_sector_override), list(as.character)) %>%
mutate(sector_override = TRUE)
overrides_cbt <-
overrides %>%
filter(corporate_bond_ticker != "" , !is.na(corporate_bond_ticker)) %>%
select(corporate_bond_ticker, fin_sector_override, sector_override) %>%
distinct()
overrides_bbg <-
overrides %>%
filter(is.na(corporate_bond_ticker)|corporate_bond_ticker == "")%>%
select(bloomberg_id, fin_sector_override, sector_override) %>%
distinct()
sector_bridge <- read_csv("https://raw.githubusercontent.com/2DegreesInvesting/PACTA_analysis/master/data/sector_bridge.csv", col_types = "ccc")
cb_groups <- c("Convertible bonds", "Corporate Bonds", "Corporate inflation linked Bonds")
sb_groups <- c("Sovereign Debt","Sovereign Agency Debt", "Government inflation linked Bonds", "Sovereign","Sovereign Agency", "Sovereigns")
clean_fin_data <-
function(input_data) {
input_data %>%
left_join(sector_bridge %>% filter(source == "BICS") %>% select(-source),
by = c("security_bics_subgroup" = "industry_classification"),
copy = TRUE) %>%
filter(!is.na(sector)) %>%
select(-security_mapped_sector) %>%
rename(security_mapped_sector = sector) %>%
left_join(overrides_cbt, by = "corporate_bond_ticker", copy = TRUE) %>%
left_join(overrides_bbg, by = "bloomberg_id", copy = TRUE) %>%
mutate(sector_override = sector_override.x,
sector_override = if_else(sector_override.y != ""&!is.na(sector_override.y), sector_override.y, sector_override),
fin_sector_override = fin_sector_override.x,
fin_sector_override = if_else(!is.na(fin_sector_override.y)&fin_sector_override.y != "", fin_sector_override.y, fin_sector_override),
sector_override = if_else(is.na(sector_override),FALSE,TRUE)) %>%
select(-sector_override.x, -sector_override.y, -fin_sector_override.x, -fin_sector_override.y) %>%
mutate(security_mapped_sector = if_else(sector_override, fin_sector_override, security_mapped_sector)) %>%
select(-fin_sector_override) %>%
mutate(asset_type = if_else(asset_type == "Other", "Others", asset_type),
asset_type = if_else(is.na(asset_type), "Others", asset_type)) %>%
mutate(asset_type = paste0(toupper(substr(asset_type,1,1)),tolower(substr(asset_type,2,nchar(asset_type))))) %>%
mutate(security_mapped_sector = case_when(security_mapped_sector == "Others" ~ "Other",
security_mapped_sector == "OIl&Gas" ~ "Oil&Gas",
TRUE ~ security_mapped_sector)) %>%
mutate(asset_type = if_else(security_type %in% cb_groups,"Bonds",asset_type)) %>%
mutate(is_sb = case_when(security_type %in% sb_groups ~ TRUE,
security_bics_subgroup %in% sb_groups ~ TRUE,
TRUE ~ FALSE)) %>%
mutate(asset_type = case_when(grepl("Fund", security_type) ~ "Funds" ,
grepl("ETF", security_type) ~ "Funds",
grepl("Fund", security_bclass4) ~ "Funds" ,
grepl("ETF", security_bclass4) ~ "Funds",
grepl("Fund", security_icb_subsector) ~ "Funds" ,
grepl("ETF", security_icb_subsector) ~ "Funds",
TRUE ~ asset_type)) %>%
select(
company_id, company_name,bloomberg_id,corporate_bond_ticker,
country_of_domicile,
isin,
unit_share_price, exchange_rate_usd,
asset_type, security_type,
security_mapped_sector, security_icb_subsector, security_bics_subgroup,
maturity_date, coupon_value, amount_issued, current_shares_outstanding_all_classes, unit_share_price,
sector_override,
is_sb
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.