#' IShares class constructor
#'
#' The \code{IShares} class is a "virtual" class in the sense that it's never
#' actually instantiated but used to give the interface to its subclasses.
#' For instace at the \code{IShares} class level we implement the getter
#' methods
#'
#' @param summary_link link where the IShares summary data (in JSON format) is downloaded from.
#' This is the data summarizing all the ETF of the IShares provider
#' @param tickers_to_keep character vector containing the tickers to keep. One might
#' want to keep just a subset of tickers due to storage constraints
#'
#' @return a new \code{IShares} object
IShares <- function(summary_link, tickers_to_keep) {
stopifnot(is.character(summary_link))
new_IShares(
summary_link = summary_link,
tickers_to_keep = tickers_to_keep
)
} # IShares
new_IShares <- function(summary_link,
tickers_to_keep,
...,
class = character()) {
res_parsed <- download_summary_data(summary_link = summary_link)
summary_data <- parse_summary_data(res_parsed = res_parsed,
tickers_to_keep = tickers_to_keep)
new_ETF(
summary_link = summary_link,
summary_data = summary_data,
...,
class = c(class, "IShares")
)
} # new_IShares
#' Download summary data
#'
#' download the JSON summary data from the IShares website. For instance, this data
#' is rendered at the following address \link{https://www.ishares.com/uk/individual/en/products/etf-investments}
#'
#' @inheritParams httr::content
#' @inheritParams IShares
#'
#' @return the JSON data downloaded from \code{summary_link} is the format specified by
#' \code{as}
download_summary_data <- function(summary_link, as = "parsed") {
futile.logger::flog.info(
glue::glue("Downloading IShares summary data from {summary_link} and returning it as {dplyr::if_else(as == \"parsed\", \"list\", \"text\")}")
)
res <- httr::GET(url = summary_link)
res_parsed <- httr::content(res, as = as)
return(res_parsed)
} # download_summary_data
#' Parse IShares list
#'
#' parses the raw JSON data (usually returned as nested lists) into a clean dataframe
#' of character data. We decided to return character data to avoid data type issues
#' during the parsing. When the JSON data is downloaded using \code{as = "parsed"},
#' the parsing is done using the functions in the \link{https://tidyr.tidyverse.org/index.html} package
#'
#' @param res_parsed a list returned by the \link{httr::content} function. The list
#' has the following attributes: \itemize{
#' \item \code{code}
#' \item \code{message}
#' \item \code{status}
#' \item \code{code}
#' }
#' The most important attribute is data which contains the actual IShares data.
#' In particular, it's a list with the following attributes: \itemize{
#' \item \code{tableData} a list with two attributes: \code{columns} and
#' \code{data}. The first one store the column names and the second the actual data
#' \item \code{config}.
#' }
#' @param tickers_to_keep character vector containing the tickers to keep. One might
#' want to keep just a subset of tickers due to storage constraints
#'
#' @return a dataframe of characters. Each row containd summary data for one etf
parse_summary_data <- function(res_parsed, tickers_to_keep) {
assertthat::assert_that(is.list(res_parsed))
futile.logger::flog.info("parsing summary data")
## -------------------------------------------------------------------------
# 1) COLNAMES PARSING
## -------------------------------------------------------------------------
columns_df <- tibble::tibble(a = res_parsed$data$tableData$columns) %>%
tidyr::unnest_wider(a) %>%
dplyr::mutate(col_id = 1:nrow(.)) %>%
dplyr::select(col_id, col_name = name)
## -------------------------------------------------------------------------
# 2) DATA PARSING
## -------------------------------------------------------------------------
out <- res_parsed$data$tableData$data %>%
purrr::map(~ {
column_data_df <- tibble::tibble(etf_data_list = .x) %>%
dplyr::mutate(col_id = 1:nrow(.))
column_data_df_list <- column_data_df %>%
dplyr::filter(purrr::map_lgl(etf_data_list, is.list)) %>%
purrr::quietly(tidyr::unnest_wider)(etf_data_list) %>%
purrr::pluck("result") %>%
dplyr::select(col_id, value = d)
column_data_df_chr <- column_data_df %>%
dplyr::filter(purrr::map_lgl(etf_data_list, is.character)) %>%
dplyr::mutate(etf_data_list = purrr::map_chr(etf_data_list, ~
parse_chr_data(unlist(.x)))) %>%
dplyr::select(col_id, value = etf_data_list)
column_data_df_num <- column_data_df %>%
dplyr::filter(purrr::map_lgl(etf_data_list, is.numeric)) %>%
dplyr::mutate(value = unlist(etf_data_list)) %>%
dplyr::select(col_id, value)
list(column_data_df_chr, column_data_df_list, column_data_df_num) %>%
purrr::map_dfc(~ {
.x %>%
dplyr::left_join(columns_df,
by = "col_id") %>%
dplyr::select(-col_id) %>%
tidyr::pivot_wider(names_from = col_name, values_from = value)
})
})
out <- out %>%
purrr::reduce(dplyr::bind_rows)
## -------------------------------------------------------------------------
# 3) SUMMARY DATA SUBSET
## -------------------------------------------------------------------------
# since the column `localExchangeTicker` will be used extensively in the code
# we want to make sure it doesn't contain white spaces
if ("localExchangeTicker" %in% colnames(out)) {
out$localExchangeTicker <- out$localExchangeTicker %>%
stringr::str_trim()
} else {
futile.logger::flog.warn("the column `localExchangeTicker` doesn't exist in summary data!")
stop("the column `localExchangeTicker` doesn't exist in summary data!")
}
if (!purrr::is_empty(tickers_to_keep)) {
out <- out %>% dplyr::filter(localExchangeTicker %in% tickers_to_keep)
}
return(out)
} # parse_summary_data
parse_chr_data <- function(s) {
dplyr::case_when(
s == "-" ~ NA_character_,
TRUE ~ as.character(s)
)
} # parse_chr_data
#' Download IShares constituents
#'
#' downloads from the IShares website the data regarding etf constituents. Constituent data
#' is publish in each etf web page as a csv file and we read it directly from there.
#' In \code{summary_data} there's the link pointing to the page where the csv is
#' published
#'
#' @param summary_data summary data dataframe as returned by \link{parse_summary_data}
#' @param download_csv if \code{TRUE} the constituents csv files will be saved in
#' a local folder called `csv_files`
#' @param url_fixed_number a country-specific code found in the url
#' @param region IShares geographical region (e.g. US)
#'
#' @return a named-list. Names are the \code{localExchangeTicker} found in \code{summary_data}
#' and the elements are the csv file found in each ETF web page in melted format.
#' By melted format we mean as output of the function \link{readr::melt_csv}
download_etf_constituents <- function(summary_data, download_csv = TRUE, url_fixed_number, region) {
# futile.logger::flog.info("downloading US ETF constituents")
assertthat::assert_that(is.data.frame(summary_data))
assertthat::assert_that(is.logical(download_csv))
## -------------------------------------------------------------------------
# 1) CREATE THE URL
## -------------------------------------------------------------------------
# for an example go to a single etf IShares web page and copy the address of the
# csv file in the constituents section
summary_data <- summary_data %>%
dplyr::mutate(
effective_productPageUrl = glue::glue("https://www.ishares.com{productPageUrl}/{url_fixed_number}.ajax?fileType=csv&fileName={localExchangeTicker}_holdings&dataType=fund")
)
## -------------------------------------------------------------------------
# 2) DOWNLOAD THE CSV
## -------------------------------------------------------------------------
if (download_csv) {
dir.create("./csv_files", showWarnings = FALSE)
summary_data[["effective_productPageUrl"]] %>%
purrr::iwalk( ~ {
ticker <- summary_data[["localExchangeTicker"]][[.y]]
if (!is.null(ticker) & !is.na(ticker)) {
futile.logger::flog.info(glue::glue("downloading constituents for {ticker} in csv format"))
try({
download.file(
url = .x,
destfile = glue::glue("./csv_files/{ticker}_{region}.csv")
)
})
}
})
}
## -------------------------------------------------------------------------
# 3) DOWNLOAD MELTED DATA
## -------------------------------------------------------------------------
# keep only funds with a ticker, the other are index fund
localExchangeTicker <-
summary_data[["localExchangeTicker"]][!is.na(summary_data[["localExchangeTicker"]])]
effective_productPageUrl <-
summary_data[["effective_productPageUrl"]][!is.na(summary_data[["localExchangeTicker"]])]
constituents_list <- purrr::map2(
effective_productPageUrl,
localExchangeTicker,
~ {
if (!is.null(.y) & !is.na(.y)) {
futile.logger::flog.info(glue::glue("downloading constituents for {.y} in melted format"))
tryCatch({
readr::melt_csv(file = .x,
trim_ws = TRUE,
na = c("", "NA", "-", " "))
},
error = function(e) {
futile.logger::flog.error(e)
NULL
})
} else {
NULL
}
}) %>%
purrr::set_names(localExchangeTicker)
} # download_etf_constituents_US
#' Classify constituent data
#'
#' The constituent data is downloaded in a melted format so as not to incurr in
#' reading errors. After that we have to understand the structure of the csv files.
#' For this reason we assume there exsist fix templates. This function assigns
#' templates to constituent data: for instance, if a dataframe is in template_1
#' format than \code{classify_constituent_data} will assign to it the string
#' \code{parse_template_1}, which is the function used for parsing the dataframe
#'
#' @param melted_constituents_list list of dataframes with the constituent csv file
#' in melted format downloaded using the \code{readr::melt_csv} function
#' @param n_template number of template data model availabe
#' @inheritParams download_etf_constituents
#'
#' @return a named-list with the names of the parsing functions for each constituent
#' dataframe. If no template matches the data \code{NULL} is returned instead
classify_constituent_data <- function(melted_constituents_list, region = "US", n_template = 3) {
assertthat::assert_that(is.list(melted_constituents_list))
assertthat::assert_that(is.character(region))
assertthat::assert_that(is.numeric(n_template))
futile.logger::flog.info(glue::glue("classifing {region} ETF constituents using {n_template} template"))
out <- melted_constituents_list %>%
purrr::imap( ~ {
futile.logger::flog.info(glue::glue("classifying {.y}"))
tryCatch({
out <- NULL
for (i in 1:n_template) {
if (get(glue::glue("is_template_{i}_{region}"))(.x)) {
out <- glue::glue("parse_template_{i}_{region}")
break
}
}
out
},
error = function(e) {
futile.logger::flog.error(glue::glue("{.y} : {e}"))
return(NULL)
})
})
} # classify_constituent_data
#' Parse constituent data
#'
#' Call the parsing function for each melted constituents dataframe. The tickers included
#' in the output are the names of the input list \code{melted_constituents_list}.
#' That is, all those ETF for which the LocalExchangeTicker is different from
#' \code{NA} in \code{summary_data}.
#'
#' @param template_classification named-list returned by the function \link{classify_constituent_data}
#' with the names of the function used to parse each melted dataframe
#' @inheritParams classify_constituent_data
#'
#' @return a list of dataframes. The dataframes are in a wide format and as parsed as
#' possible. List names are local tickers
parse_etf_constituents <- function(melted_constituents_list,
template_classification) {
assertthat::assert_that(all(
is.list(melted_constituents_list),
is.list(template_classification)
))
## -------------------------------------------------------------------------
# 1) CALL ETF PARSERS
## -------------------------------------------------------------------------
out <- vector("list", length(melted_constituents_list))
for (i in seq_along(melted_constituents_list)) {
if (!is.null(melted_constituents_list[[i]])) {
futile.logger::flog.info(glue::glue("parsing {names(melted_constituents_list)[[i]]} using {template_classification[[i]]}"))
out[[i]] <- tryCatch({
get(template_classification[[i]])(melted_constituents_list[[i]])
},
error = function(e) {
futile.logger::flog.error(e)
})
} else {
futile.logger::flog.info(glue::glue("not parsing {names(melted_constituents_list)[[i]]} because it's NULL"))
}
}
## -------------------------------------------------------------------------
# 2) SET LIST NAMES
## -------------------------------------------------------------------------
names(out) <- names(melted_constituents_list)
## -------------------------------------------------------------------------
# 3) SIGNAL ETF WITH NO CONSTITUENTS
## -------------------------------------------------------------------------
is_null_lgl <- melted_constituents_list %>% purrr::map_lgl(is.null)
if (length(is_null_lgl) > 0) {
out[is_null_lgl] %>%
purrr::iwalk( ~ {
futile.logger::flog.warn(glue::glue("etf {.y} doesn't have constituents data"))
})
}
# out <- out[!is_null_lgl]
## -------------------------------------------------------------------------
# 4) PARSE COLUMN NAMES
## -------------------------------------------------------------------------
out <- out %>% purrr::modify_if(purrr::negate(is.null), rename_col)
return(out)
} # parse_etf_constituents
#' Parse colum names
#'
#' Most of the column names contain special characters or spaces. This function
#' gives them an uniform formatting by removing spaces and special characters
#'
#' @param df a datframe
#'
#' @return a dataframe with parsed column names
rename_col <- function(df) {
df %>%
dplyr::rename_all( ~ {
.x %>%
stringr::str_replace_all(pattern = "[:punct:]", "") %>%
stringr::str_trim() %>%
stringr::str_replace_all(pattern = " ", "_") %>%
stringr::str_to_lower()
})
} # rename_col
#' Convert data in mongo format
#'
#' The data is saved in mongo using the aod and ticker as keys and the data is
#' a list where the first field is the row from \code{summary_data} and the second the
#' \code{constituents} datafarme
#'
#' @param obj a \code{IShares} object or one of its subclasses
#'
#' @return a named list. Elements are list with the following fields
#' \itemize{
#' \item \code{aod} nav as of date
#' \item \code{ticker} local exchange ticker plus region
#' \item \code{data} a list with summary and constiruents data
#' }
#'
#' @export
to_mongo_data_format <- function(obj) {
UseMethod("to_mongo_data_format")
} # to_mongo_data_format
#' @export
to_mongo_data_format.IShares <- function(obj) {
tickers <- names(get_constituents(obj, ticker = "all"))
constituents_list <- get_constituents(obj)
summary_data <- get_summary_data(obj) %>% dplyr::filter(localExchangeTicker %in% tickers)
out <- vector("list", length(tickers))
for (i in seq_along(tickers)) {
futile.logger::flog.info(glue::glue("converting to mongo format {tickers[[i]]}"))
summary_data_slice <- summary_data %>% dplyr::filter(localExchangeTicker == tickers[[i]])
assertthat::assert_that(nrow(summary_data_slice) == 1)
## ---------------------------------------------------------------------
## 1) GET AOD
## ---------------------------------------------------------------------
if ("navAmountAsOf" %in% colnames(summary_data_slice)) {
aod_nav <- summary_data_slice[["navAmountAsOf"]]
} else {
# futile.logger::flog.error(glue::glue("`summary_data` has no column `navAmountAsOf`!"))
stop(glue::glue("`summary_data` has no column `navAmountAsOf`!"))
}
if ("aod" %in% colnames(constituents_list[[i]])) {
aod_constituents <- unique(constituents_list[[i]][["aod"]])
} else {
# futile.logger::flog.error(glue::glue("constituents dataframe has no column `aod`!"))
stop(glue::glue("constituents dataframe has no column `aod`!"))
}
if (aod_nav == aod_constituents) {
aod <- aod_nav
} else {
# futile.logger::flog.error(glue::glue("aod_nav = {aod_nav} != aod_constituents = {aod_constituents}"))
stop(glue::glue("aod_nav = {aod_nav} != aod_constituents = {aod_constituents}"))
}
if (is.null(constituents_list[[i]])) {
futile.logger::flog.warn(glue::glue("{tickers[[i]]} has no constituents, exporting only summary data"))
}
## ---------------------------------------------------------------------
## 2) EXPORTING TO MONGO FORMAT
## ---------------------------------------------------------------------
out[[i]] <- list(
aod = aod ,
ticker = glue::glue("{tickers[[i]]}_{get_region(obj)}"),
data = list(summary_data = summary_data_slice,
constituents = constituents_list[[i]])
)
}
out <- out %>%
purrr::set_names(tickers) %>%
purrr::discard(is.null)
dplyr::setdiff(tickers, names(out)) %>%
purrr::walk( ~ {
futile.logger::flog.info(glue::glue("{.x} not expored to mongo format"))
})
return(out)
} # to_mongo_data_format
#' @export
get_summary_data <- function(obj) {
UseMethod("get_summary_data")
} # get_summary_data
#' @export
get_summary_data.IShares <- function(obj) {
assertthat::has_name(obj, "summary_data")
obj[["summary_data"]]
}
#' @export
get_constituents <- function(obj, ticker) {
UseMethod("get_constituents")
} # get_constituents
#' @export
get_constituents.IShares <- function(obj, ticker = "all") {
assertthat::has_name(obj, "constituents_list")
if (ticker == "all") {
obj$constituents_list
} else {
assertthat::has_name(obj$constituents_list, ticker)
obj$constituents_list[ticker]
}
} # get_constituents
#' @export
get_melted_constituents_list <- function(obj, ticker) {
UseMethod("get_melted_constituents_list")
} # get_melted_constituents_list
get_melted_constituents_list.IShares <- function(obj, ticker = "all") {
assertthat::has_name(obj, "melted_constituents_list")
if (ticker == "all") {
obj$melted_constituents_list
} else {
assertthat::has_name(obj$melted_constituents_list, ticker)
obj$melted_constituents_list[ticker]
}
} # get_constituents
#' @export
get_region <- function(obj) {
UseMethod("get_region")
} # get_region
get_region.IShares <- function(obj) {
assertthat::has_name(obj, "region")
obj[["region"]]
} # get_region
#' @export
get_constituents_aod <- function(obj) {
UseMethod("get_constituents_aod")
} # get_constituents_aod
get_constituents_aod.IShares <- function(obj) {
assertthat::has_name(obj, "constituents_list")
obj$constituents_list %>%
purrr::imap( ~ unique(.x[["aod"]]))
} # get_constituents_aod.IShares
#' @export
get_nav_aod <- function(obj) {
UseMethod("get_nav_aod")
} # get_nav_aod
get_nav_aod.IShares <- function(obj) {
assertthat::has_name(obj, "summary_data")
assertthat::assert_that("navAmountAsOf" %in% colnames(obj$summary_data))
out <- unique(obj$summary_data$navAmountAsOf)
assertthat::assert_that(length(out) == 1)
return(out)
} # get_nav_aod.IShares
#' @export
to_csv <- function(obj, output_folder) {
UseMethod("to_csv")
} # to_csv
#' Export to csv format
#'
#' \code{to_csv} export to csv format the dataframe \code{summary_data} and each
#' dataframe in \code{constituents_list}. The name of constituents files is the
#' ticker plus the region (e.g. IVV_US.csv)
#'
#' @param obj \code{IShares} object or one of its subclasses
#' @param output_folder folder where the csv files are to be saved
#'
#' @export
to_csv.IShares <- function(obj, output_folder) {
## -------------------------------------------------------------------------
# 1) EXPORT SUMMARY DATA
## -------------------------------------------------------------------------
summary_data <- get_summary_data(obj)
if (!purrr::is_empty(summary_data)) {
futile.logger::flog.info(glue::glue("exporting summary data for object of class {class(obj)[[1]]}, output_folder = {output_folder}"))
readr::write_csv(summary_data, path = glue::glue("{output_folder}/{get_region(obj)}_summary_data.csv"))
}
## -------------------------------------------------------------------------
# 2) EXPORT CONSTITUENTS
## -------------------------------------------------------------------------
get_constituents(obj, ticker = "all") %>%
purrr::iwalk( ~ {
if (!is.null(.x)) {
futile.logger::flog.info(glue::glue("exporting {.y} constituents to csv, output_folder = {output_folder}"))
readr::write_csv(.x, path = glue::glue("{output_folder}/{.y}_{get_region(obj)}_constituents.csv"))
}
})
} # to_csv
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.